From 9f71fc09f5c47e3c68e204a0fc84f95c08d3a892 Mon Sep 17 00:00:00 2001 From: "cmzmasek@gmail.com" Date: Thu, 6 Dec 2012 01:46:46 +0000 Subject: [PATCH] "rio" work --- forester/archive/RIO/others/hmmer/00README | 57 - forester/archive/RIO/others/hmmer/COPYRIGHT | 36 - forester/archive/RIO/others/hmmer/INSTALL | 45 - forester/archive/RIO/others/hmmer/LICENSE | 339 -- forester/archive/RIO/others/hmmer/Makefile.in | 284 -- forester/archive/RIO/others/hmmer/NOTES | 197 - forester/archive/RIO/others/hmmer/Userguide.pdf | Bin 299122 -> 0 bytes forester/archive/RIO/others/hmmer/config.guess | 951 ---- forester/archive/RIO/others/hmmer/config.sub | 955 ----- forester/archive/RIO/others/hmmer/configure | 2509 ----------- .../others/hmmer/documentation/man/hmmalign.man | 154 - .../others/hmmer/documentation/man/hmmbuild.man | 476 -- .../hmmer/documentation/man/hmmcalibrate.man | 172 - .../others/hmmer/documentation/man/hmmconvert.man | 124 - .../RIO/others/hmmer/documentation/man/hmmemit.man | 130 - .../RIO/others/hmmer/documentation/man/hmmer.man | 168 - .../others/hmmer/documentation/man/hmmfetch.man | 83 - .../others/hmmer/documentation/man/hmmindex.man | 73 - .../RIO/others/hmmer/documentation/man/hmmpfam.man | 320 -- .../others/hmmer/documentation/man/hmmsearch.man | 289 -- forester/archive/RIO/others/hmmer/install-sh | 251 -- forester/archive/RIO/others/hmmer/squid/00README | 39 - .../RIO/others/hmmer/squid/Docs/abstract.tex | 7 - .../RIO/others/hmmer/squid/Docs/formats.tex | 517 --- .../RIO/others/hmmer/squid/Docs/gsi-format.tex | 87 - .../archive/RIO/others/hmmer/squid/Docs/intro.tex | 2 - .../archive/RIO/others/hmmer/squid/Docs/main.tex | 35 - .../archive/RIO/others/hmmer/squid/Docs/selex.tex | 153 - .../RIO/others/hmmer/squid/Docs/squid-format.tex | 80 - .../RIO/others/hmmer/squid/Docs/ssi-format.tex | 641 --- .../archive/RIO/others/hmmer/squid/Formats/a2m | 200 - .../archive/RIO/others/hmmer/squid/Formats/clustal | 47 - .../archive/RIO/others/hmmer/squid/Formats/embl | 155 - .../archive/RIO/others/hmmer/squid/Formats/fasta | 26 - .../RIO/others/hmmer/squid/Formats/formattest.pl | 97 - .../archive/RIO/others/hmmer/squid/Formats/gcg | 397 -- .../RIO/others/hmmer/squid/Formats/gcgdata.1 | 60 - .../RIO/others/hmmer/squid/Formats/gcgdata.2 | Bin 2192 -> 0 bytes .../archive/RIO/others/hmmer/squid/Formats/genbank | 310 -- .../archive/RIO/others/hmmer/squid/Formats/msf | 266 -- .../archive/RIO/others/hmmer/squid/Formats/phylip | 204 - .../archive/RIO/others/hmmer/squid/Formats/pir | 249 -- .../RIO/others/hmmer/squid/Formats/regression.dat | 20 - .../archive/RIO/others/hmmer/squid/Formats/selex.1 | 37 - .../archive/RIO/others/hmmer/squid/Formats/selex.2 | 11 - .../RIO/others/hmmer/squid/Formats/stockholm.1 | 94 - .../RIO/others/hmmer/squid/Formats/stockholm.2 | 366 -- .../RIO/others/hmmer/squid/Formats/swissprot | 317 -- forester/archive/RIO/others/hmmer/squid/INSTALL | 31 - .../archive/RIO/others/hmmer/squid/Makefile.in | 292 -- .../archive/RIO/others/hmmer/squid/Man/afetch.man | 98 - .../archive/RIO/others/hmmer/squid/Man/alistat.man | 138 - .../archive/RIO/others/hmmer/squid/Man/seqstat.man | 98 - .../archive/RIO/others/hmmer/squid/Man/sfetch.man | 226 - .../archive/RIO/others/hmmer/squid/Man/shuffle.man | 204 - .../RIO/others/hmmer/squid/Man/sreformat.man | 249 -- forester/archive/RIO/others/hmmer/squid/a2m.c | 113 - .../archive/RIO/others/hmmer/squid/afetch_main.c | 182 - .../archive/RIO/others/hmmer/squid/aligneval.c | 513 --- forester/archive/RIO/others/hmmer/squid/alignio.c | 643 --- .../archive/RIO/others/hmmer/squid/alistat_main.c | 273 -- forester/archive/RIO/others/hmmer/squid/clustal.c | 179 - forester/archive/RIO/others/hmmer/squid/cluster.c | 544 --- .../RIO/others/hmmer/squid/compalign_main.c | 221 - .../RIO/others/hmmer/squid/compstruct_main.c | 321 -- forester/archive/RIO/others/hmmer/squid/configure | 2241 ---------- forester/archive/RIO/others/hmmer/squid/dayhoff.c | 171 - forester/archive/RIO/others/hmmer/squid/eps.c | 115 - forester/archive/RIO/others/hmmer/squid/file.c | 231 - forester/archive/RIO/others/hmmer/squid/getopt.c | 251 -- forester/archive/RIO/others/hmmer/squid/gki.c | 390 -- forester/archive/RIO/others/hmmer/squid/gki.h | 51 - forester/archive/RIO/others/hmmer/squid/gsi.c | 385 -- forester/archive/RIO/others/hmmer/squid/gsi.h | 85 - forester/archive/RIO/others/hmmer/squid/gsi64.c | 395 -- forester/archive/RIO/others/hmmer/squid/gsi64.h | 101 - forester/archive/RIO/others/hmmer/squid/hsregex.c | 1314 ------ forester/archive/RIO/others/hmmer/squid/install-sh | 251 -- forester/archive/RIO/others/hmmer/squid/iupac.c | 220 - forester/archive/RIO/others/hmmer/squid/msa.c | 1394 ------ forester/archive/RIO/others/hmmer/squid/msa.h | 286 -- forester/archive/RIO/others/hmmer/squid/msf.c | 389 -- forester/archive/RIO/others/hmmer/squid/phylip.c | 174 - forester/archive/RIO/others/hmmer/squid/revcomp.c | 62 - .../archive/RIO/others/hmmer/squid/revcomp_main.c | 93 - forester/archive/RIO/others/hmmer/squid/rk.c | 134 - forester/archive/RIO/others/hmmer/squid/rk.h | 40 - forester/archive/RIO/others/hmmer/squid/selex.c | 814 ---- .../archive/RIO/others/hmmer/squid/seqencode.c | 177 - .../archive/RIO/others/hmmer/squid/seqsplit_main.c | 163 - .../archive/RIO/others/hmmer/squid/seqstat_main.c | 229 - .../archive/RIO/others/hmmer/squid/sfetch_main.c | 444 -- forester/archive/RIO/others/hmmer/squid/shuffle.c | 550 --- .../archive/RIO/others/hmmer/squid/shuffle_main.c | 281 -- .../archive/RIO/others/hmmer/squid/sindex_main.c | 185 - forester/archive/RIO/others/hmmer/squid/sqerror.c | 95 - forester/archive/RIO/others/hmmer/squid/sqfuncs.h | 293 -- forester/archive/RIO/others/hmmer/squid/sqio.c | 1901 -------- forester/archive/RIO/others/hmmer/squid/squid.h.in | 473 -- .../archive/RIO/others/hmmer/squid/squidconf.h.in | 76 - .../archive/RIO/others/hmmer/squid/squidcore.c | 53 - .../archive/RIO/others/hmmer/squid/sre_ctype.c | 39 - forester/archive/RIO/others/hmmer/squid/sre_math.c | 787 ---- .../archive/RIO/others/hmmer/squid/sre_string.c | 524 --- .../RIO/others/hmmer/squid/sreformat_main.c | 251 -- forester/archive/RIO/others/hmmer/squid/ssi.c | 1504 ------- forester/archive/RIO/others/hmmer/squid/ssi.h | 193 - forester/archive/RIO/others/hmmer/squid/stack.c | 103 - .../archive/RIO/others/hmmer/squid/stockholm.c | 607 --- .../archive/RIO/others/hmmer/squid/stockholm.h | 51 - .../archive/RIO/others/hmmer/squid/stopwatch.c | 307 -- .../archive/RIO/others/hmmer/squid/stopwatch.h | 59 - .../archive/RIO/others/hmmer/squid/test_main.c | 25 - .../archive/RIO/others/hmmer/squid/translate.c | 87 - .../RIO/others/hmmer/squid/translate_main.c | 226 - forester/archive/RIO/others/hmmer/squid/types.c | 228 - forester/archive/RIO/others/hmmer/squid/weight.c | 748 ---- .../archive/RIO/others/hmmer/squid/weight_main.c | 187 - forester/archive/RIO/others/hmmer/src/Makefile.in | 128 - forester/archive/RIO/others/hmmer/src/alphabet.c | 426 -- forester/archive/RIO/others/hmmer/src/camJul97.c | 747 ---- forester/archive/RIO/others/hmmer/src/config.h | 52 - .../archive/RIO/others/hmmer/src/core_algorithms.c | 2445 ----------- forester/archive/RIO/others/hmmer/src/debug.c | 368 -- forester/archive/RIO/others/hmmer/src/display.c | 447 -- forester/archive/RIO/others/hmmer/src/emit.c | 457 -- forester/archive/RIO/others/hmmer/src/emulation.c | 242 -- forester/archive/RIO/others/hmmer/src/funcs.h | 350 -- forester/archive/RIO/others/hmmer/src/globals.h | 24 - forester/archive/RIO/others/hmmer/src/histogram.c | 1369 ------ forester/archive/RIO/others/hmmer/src/hmmalign.c | 325 -- forester/archive/RIO/others/hmmer/src/hmmbuild.c | 1129 ----- .../RIO/others/hmmer/src/hmmcalibrate-pvm.c | 209 - .../archive/RIO/others/hmmer/src/hmmcalibrate.c | 957 ----- forester/archive/RIO/others/hmmer/src/hmmconvert.c | 209 - forester/archive/RIO/others/hmmer/src/hmmemit.c | 267 -- forester/archive/RIO/others/hmmer/src/hmmfetch.c | 130 - forester/archive/RIO/others/hmmer/src/hmmindex.c | 166 - forester/archive/RIO/others/hmmer/src/hmmio.c | 1744 -------- .../archive/RIO/others/hmmer/src/hmmpfam-pvm.c | 229 - forester/archive/RIO/others/hmmer/src/hmmpfam.c | 1094 ----- forester/archive/RIO/others/hmmer/src/hmmpostal.c | 1108 ----- .../archive/RIO/others/hmmer/src/hmmsearch-pvm.c | 180 - forester/archive/RIO/others/hmmer/src/hmmsearch.c | 1101 ----- forester/archive/RIO/others/hmmer/src/masks.c | 367 -- .../archive/RIO/others/hmmer/src/mathsupport.c | 362 -- forester/archive/RIO/others/hmmer/src/misc.c | 140 - .../archive/RIO/others/hmmer/src/modelmakers.c | 940 ---- forester/archive/RIO/others/hmmer/src/plan7.c | 1036 ----- forester/archive/RIO/others/hmmer/src/plan9.c | 141 - forester/archive/RIO/others/hmmer/src/postprob.c | 709 --- forester/archive/RIO/others/hmmer/src/postprob.h | 55 - forester/archive/RIO/others/hmmer/src/prior.c | 725 ---- forester/archive/RIO/others/hmmer/src/pvm.c | 453 -- forester/archive/RIO/others/hmmer/src/states.c | 444 -- forester/archive/RIO/others/hmmer/src/structs.h | 564 --- forester/archive/RIO/others/hmmer/src/threads.c | 90 - forester/archive/RIO/others/hmmer/src/tophits.c | 376 -- forester/archive/RIO/others/hmmer/src/trace.c | 1203 ------ forester/archive/RIO/others/hmmer/src/weetest.c | 55 - .../RIO/others/hmmer/testsuite/Exercises.sh | 17 - .../archive/RIO/others/hmmer/testsuite/Makefile.in | 83 - .../RIO/others/hmmer/testsuite/Optiontests.fa | 16 - .../RIO/others/hmmer/testsuite/Optiontests.nfa | 12 - .../RIO/others/hmmer/testsuite/Optiontests.nslx | 9 - .../RIO/others/hmmer/testsuite/Optiontests.pam | 31 - .../RIO/others/hmmer/testsuite/Optiontests.pl | 116 - .../RIO/others/hmmer/testsuite/Optiontests.pri | 59 - .../RIO/others/hmmer/testsuite/Optiontests.slx | 15 - forester/archive/RIO/others/hmmer/testsuite/README | 82 - .../RIO/others/hmmer/testsuite/alignalign_test.c | 206 - .../archive/RIO/others/hmmer/testsuite/evd_test.c | 295 -- .../RIO/others/hmmer/testsuite/fitting_test.c | 71 - .../archive/RIO/others/hmmer/testsuite/fn3-bin | Bin 16930 -> 0 bytes .../RIO/others/hmmer/testsuite/fn3-bin-swap | Bin 16930 -> 0 bytes .../archive/RIO/others/hmmer/testsuite/fn3.hmm | 270 -- .../archive/RIO/others/hmmer/testsuite/fn3.seed | 332 -- .../RIO/others/hmmer/testsuite/masks_test.c | 149 - .../others/hmmer/testsuite/parsingviterbi_test.c | 167 - .../archive/RIO/others/hmmer/testsuite/titin.fa | 386 -- .../RIO/others/hmmer/testsuite/tophits_test.c | 170 - .../RIO/others/hmmer/testsuite/trace_test.c | 146 - .../RIO/others/hmmer/testsuite/trace_test.hmm | 47 - .../RIO/others/hmmer/testsuite/trace_test.seq | 25 - .../RIO/others/hmmer/testsuite/trace_test.slx | 7 - .../RIO/others/hmmer/testsuite/viterbi_exercise.c | 166 - .../RIO/others/hmmer/testsuite/weeviterbi_test.c | 150 - .../RIO/others/hmmer/testsuite/weeviterbi_test.hmm | 233 - .../RIO/others/hmmer/testsuite/weeviterbi_test.seq | 10 - .../archive/RIO/others/hmmer/tutorial/7LES_DROME | 138 - .../archive/RIO/others/hmmer/tutorial/Artemia.fa | 48 - .../archive/RIO/others/hmmer/tutorial/RU1A_HUMAN | 98 - .../archive/RIO/others/hmmer/tutorial/amino.null | 30 - .../archive/RIO/others/hmmer/tutorial/amino.pri | 70 - forester/archive/RIO/others/hmmer/tutorial/fn3.slx | 351 -- .../RIO/others/hmmer/tutorial/globins50.msf | 427 -- .../RIO/others/hmmer/tutorial/globins630.fa | 2520 ----------- .../archive/RIO/others/hmmer/tutorial/nucleic.null | 15 - .../archive/RIO/others/hmmer/tutorial/nucleic.pri | 27 - .../archive/RIO/others/hmmer/tutorial/pkinase.slx | 712 --- forester/archive/RIO/others/hmmer/tutorial/rrm.hmm | 237 - forester/archive/RIO/others/hmmer/tutorial/rrm.slx | 167 - .../archive/RIO/others/phylip_mod/IMPORTANT_NOTICE | 48 - forester/archive/RIO/others/phylip_mod/src/CHANGES | 29 - .../archive/RIO/others/phylip_mod/src/Makefile | 451 -- forester/archive/RIO/others/phylip_mod/src/cons.c | 1457 ------- forester/archive/RIO/others/phylip_mod/src/cons.h | 58 - .../archive/RIO/others/phylip_mod/src/consense.c | 414 -- forester/archive/RIO/others/phylip_mod/src/dist.c | 526 --- forester/archive/RIO/others/phylip_mod/src/dist.h | 35 - forester/archive/RIO/others/phylip_mod/src/fitch.c | 1176 ----- .../archive/RIO/others/phylip_mod/src/neighbor.c | 602 --- .../archive/RIO/others/phylip_mod/src/phylip.c | 2750 ------------ .../archive/RIO/others/phylip_mod/src/phylip.h | 607 --- forester/archive/RIO/others/phylip_mod/src/proml.c | 3202 -------------- .../archive/RIO/others/phylip_mod/src/promlk.c | 3176 -------------- .../archive/RIO/others/phylip_mod/src/protdist.c | 1973 --------- .../archive/RIO/others/phylip_mod/src/protpars.c | 1925 --------- forester/archive/RIO/others/phylip_mod/src/seq.c | 4178 ------------------ forester/archive/RIO/others/phylip_mod/src/seq.h | 216 - .../archive/RIO/others/phylip_mod/src/seqboot.c | 1419 ------ .../RIO/others/phylip_mod/src/test_infile_fitch | 8 - .../RIO/others/phylip_mod/src/test_infile_protdist | 6 - .../RIO/others/phylip_mod/src/test_infile_protml | 6 - .../RIO/others/phylip_mod/src/test_infile_protmlk | 6 - .../RIO/others/phylip_mod/src/test_infile_protpars | 6 - .../RIO/others/phylip_mod/src/test_infile_seqbboot | 6 - forester/archive/RIO/others/puzzle_dqo/AUTHORS | 45 - forester/archive/RIO/others/puzzle_dqo/COPYING | 340 -- forester/archive/RIO/others/puzzle_dqo/ChangeLog | 347 -- .../archive/RIO/others/puzzle_dqo/IMPORTANT_NOTICE | 48 - forester/archive/RIO/others/puzzle_dqo/INSTALL | 182 - forester/archive/RIO/others/puzzle_dqo/Makefile | 327 -- forester/archive/RIO/others/puzzle_dqo/Makefile.am | 2 - forester/archive/RIO/others/puzzle_dqo/Makefile.in | 327 -- forester/archive/RIO/others/puzzle_dqo/aclocal.m4 | 104 - .../archive/RIO/others/puzzle_dqo/config.status | 179 - forester/archive/RIO/others/puzzle_dqo/configure | 2265 ---------- .../archive/RIO/others/puzzle_dqo/configure.in | 117 - .../archive/RIO/others/puzzle_dqo/data/Makefile | 177 - .../archive/RIO/others/puzzle_dqo/data/Makefile.am | 1 - .../archive/RIO/others/puzzle_dqo/data/Makefile.in | 177 - .../archive/RIO/others/puzzle_dqo/doc/Makefile | 177 - .../archive/RIO/others/puzzle_dqo/doc/Makefile.am | 1 - .../archive/RIO/others/puzzle_dqo/doc/Makefile.in | 177 - forester/archive/RIO/others/puzzle_dqo/install-sh | 251 -- forester/archive/RIO/others/puzzle_dqo/missing | 190 - .../archive/RIO/others/puzzle_dqo/mkinstalldirs | 40 - .../archive/RIO/others/puzzle_dqo/src/Makefile | 317 -- .../archive/RIO/others/puzzle_dqo/src/Makefile.am | 49 - .../archive/RIO/others/puzzle_dqo/src/Makefile.in | 317 -- forester/archive/RIO/others/puzzle_dqo/src/README | 1 - forester/archive/RIO/others/puzzle_dqo/src/gamma.c | 346 -- forester/archive/RIO/others/puzzle_dqo/src/gamma.h | 30 - forester/archive/RIO/others/puzzle_dqo/src/ml.h | 279 -- forester/archive/RIO/others/puzzle_dqo/src/ml1.c | 1743 -------- forester/archive/RIO/others/puzzle_dqo/src/ml2.c | 1637 ------- forester/archive/RIO/others/puzzle_dqo/src/ml3.c | 350 -- .../archive/RIO/others/puzzle_dqo/src/model1.c | 326 -- .../archive/RIO/others/puzzle_dqo/src/model2.c | 1125 ----- forester/archive/RIO/others/puzzle_dqo/src/outdist | 4 - .../archive/RIO/others/puzzle_dqo/src/ppuzzle.h | 274 -- .../archive/RIO/others/puzzle_dqo/src/puzzle.h | 493 --- .../archive/RIO/others/puzzle_dqo/src/puzzle1.c | 2864 ------------- .../archive/RIO/others/puzzle_dqo/src/puzzle2.c | 2651 ------------ forester/archive/RIO/others/puzzle_dqo/src/sched.c | 423 -- forester/archive/RIO/others/puzzle_dqo/src/sched.h | 53 - forester/archive/RIO/others/puzzle_dqo/src/test | 19 - forester/archive/RIO/others/puzzle_dqo/src/test.in | 19 - forester/archive/RIO/others/puzzle_dqo/src/util.c | 751 ---- forester/archive/RIO/others/puzzle_dqo/src/util.h | 96 - forester/archive/RIO/others/puzzle_mod/AUTHORS | 45 - forester/archive/RIO/others/puzzle_mod/COPYING | 340 -- forester/archive/RIO/others/puzzle_mod/ChangeLog | 347 -- .../archive/RIO/others/puzzle_mod/IMPORTANT_NOTICE | 48 - forester/archive/RIO/others/puzzle_mod/Makefile | 327 -- forester/archive/RIO/others/puzzle_mod/Makefile.am | 2 - forester/archive/RIO/others/puzzle_mod/Makefile.in | 327 -- forester/archive/RIO/others/puzzle_mod/aclocal.m4 | 104 - .../archive/RIO/others/puzzle_mod/config.status | 179 - forester/archive/RIO/others/puzzle_mod/configure | 2265 ---------- .../archive/RIO/others/puzzle_mod/configure.in | 117 - .../archive/RIO/others/puzzle_mod/data/Makefile | 177 - .../archive/RIO/others/puzzle_mod/data/Makefile.am | 1 - .../archive/RIO/others/puzzle_mod/data/Makefile.in | 177 - .../archive/RIO/others/puzzle_mod/doc/Makefile | 177 - .../archive/RIO/others/puzzle_mod/doc/Makefile.am | 1 - .../archive/RIO/others/puzzle_mod/doc/Makefile.in | 177 - forester/archive/RIO/others/puzzle_mod/install-sh | 251 -- forester/archive/RIO/others/puzzle_mod/missing | 190 - .../archive/RIO/others/puzzle_mod/mkinstalldirs | 40 - .../archive/RIO/others/puzzle_mod/src/00README | 97 - .../archive/RIO/others/puzzle_mod/src/Makefile | 356 -- .../archive/RIO/others/puzzle_mod/src/Makefile.am | 77 - .../archive/RIO/others/puzzle_mod/src/Makefile.in | 356 -- forester/archive/RIO/others/puzzle_mod/src/README | 1 - forester/archive/RIO/others/puzzle_mod/src/gamma.c | 346 -- forester/archive/RIO/others/puzzle_mod/src/gamma.h | 30 - forester/archive/RIO/others/puzzle_mod/src/ml.h | 279 -- forester/archive/RIO/others/puzzle_mod/src/ml1.c | 1734 -------- forester/archive/RIO/others/puzzle_mod/src/ml2.c | 1871 -------- forester/archive/RIO/others/puzzle_mod/src/ml3.c | 350 -- .../archive/RIO/others/puzzle_mod/src/model1.c | 326 -- .../archive/RIO/others/puzzle_mod/src/model2.c | 1125 ----- .../archive/RIO/others/puzzle_mod/src/ppuzzle.c | 2418 ----------- .../archive/RIO/others/puzzle_mod/src/ppuzzle.h | 274 -- .../archive/RIO/others/puzzle_mod/src/puzzle.h | 493 --- .../archive/RIO/others/puzzle_mod/src/puzzle1.c | 4527 -------------------- .../archive/RIO/others/puzzle_mod/src/puzzle2.c | 2701 ------------ forester/archive/RIO/others/puzzle_mod/src/sched.c | 423 -- forester/archive/RIO/others/puzzle_mod/src/sched.h | 53 - forester/archive/RIO/others/puzzle_mod/src/test | 19 - forester/archive/RIO/others/puzzle_mod/src/test.in | 19 - forester/archive/RIO/others/puzzle_mod/src/util.c | 748 ---- forester/archive/RIO/others/puzzle_mod/src/util.h | 96 - 315 files changed, 136393 deletions(-) delete mode 100644 forester/archive/RIO/others/hmmer/00README delete mode 100644 forester/archive/RIO/others/hmmer/COPYRIGHT delete mode 100644 forester/archive/RIO/others/hmmer/INSTALL delete mode 100644 forester/archive/RIO/others/hmmer/LICENSE delete mode 100644 forester/archive/RIO/others/hmmer/Makefile.in delete mode 100644 forester/archive/RIO/others/hmmer/NOTES delete mode 100644 forester/archive/RIO/others/hmmer/Userguide.pdf delete mode 100755 forester/archive/RIO/others/hmmer/config.guess delete mode 100755 forester/archive/RIO/others/hmmer/config.sub delete mode 100755 forester/archive/RIO/others/hmmer/configure delete mode 100644 forester/archive/RIO/others/hmmer/documentation/man/hmmalign.man delete mode 100644 forester/archive/RIO/others/hmmer/documentation/man/hmmbuild.man delete mode 100644 forester/archive/RIO/others/hmmer/documentation/man/hmmcalibrate.man delete mode 100644 forester/archive/RIO/others/hmmer/documentation/man/hmmconvert.man delete mode 100644 forester/archive/RIO/others/hmmer/documentation/man/hmmemit.man delete mode 100644 forester/archive/RIO/others/hmmer/documentation/man/hmmer.man delete mode 100644 forester/archive/RIO/others/hmmer/documentation/man/hmmfetch.man delete mode 100644 forester/archive/RIO/others/hmmer/documentation/man/hmmindex.man delete mode 100644 forester/archive/RIO/others/hmmer/documentation/man/hmmpfam.man delete mode 100644 forester/archive/RIO/others/hmmer/documentation/man/hmmsearch.man delete mode 100755 forester/archive/RIO/others/hmmer/install-sh delete mode 100644 forester/archive/RIO/others/hmmer/squid/00README delete mode 100644 forester/archive/RIO/others/hmmer/squid/Docs/abstract.tex delete mode 100644 forester/archive/RIO/others/hmmer/squid/Docs/formats.tex delete mode 100644 forester/archive/RIO/others/hmmer/squid/Docs/gsi-format.tex delete mode 100644 forester/archive/RIO/others/hmmer/squid/Docs/intro.tex delete mode 100644 forester/archive/RIO/others/hmmer/squid/Docs/main.tex delete mode 100644 forester/archive/RIO/others/hmmer/squid/Docs/selex.tex delete mode 100644 forester/archive/RIO/others/hmmer/squid/Docs/squid-format.tex delete mode 100644 forester/archive/RIO/others/hmmer/squid/Docs/ssi-format.tex delete mode 100644 forester/archive/RIO/others/hmmer/squid/Formats/a2m delete mode 100644 forester/archive/RIO/others/hmmer/squid/Formats/clustal delete mode 100644 forester/archive/RIO/others/hmmer/squid/Formats/embl delete mode 100644 forester/archive/RIO/others/hmmer/squid/Formats/fasta delete mode 100755 forester/archive/RIO/others/hmmer/squid/Formats/formattest.pl delete mode 100644 forester/archive/RIO/others/hmmer/squid/Formats/gcg delete mode 100644 forester/archive/RIO/others/hmmer/squid/Formats/gcgdata.1 delete mode 100644 forester/archive/RIO/others/hmmer/squid/Formats/gcgdata.2 delete mode 100644 forester/archive/RIO/others/hmmer/squid/Formats/genbank delete mode 100644 forester/archive/RIO/others/hmmer/squid/Formats/msf delete mode 100644 forester/archive/RIO/others/hmmer/squid/Formats/phylip delete mode 100644 forester/archive/RIO/others/hmmer/squid/Formats/pir delete mode 100644 forester/archive/RIO/others/hmmer/squid/Formats/regression.dat delete mode 100644 forester/archive/RIO/others/hmmer/squid/Formats/selex.1 delete mode 100644 forester/archive/RIO/others/hmmer/squid/Formats/selex.2 delete mode 100644 forester/archive/RIO/others/hmmer/squid/Formats/stockholm.1 delete mode 100644 forester/archive/RIO/others/hmmer/squid/Formats/stockholm.2 delete mode 100644 forester/archive/RIO/others/hmmer/squid/Formats/swissprot delete mode 100644 forester/archive/RIO/others/hmmer/squid/INSTALL delete mode 100644 forester/archive/RIO/others/hmmer/squid/Makefile.in delete mode 100644 forester/archive/RIO/others/hmmer/squid/Man/afetch.man delete mode 100644 forester/archive/RIO/others/hmmer/squid/Man/alistat.man delete mode 100644 forester/archive/RIO/others/hmmer/squid/Man/seqstat.man delete mode 100644 forester/archive/RIO/others/hmmer/squid/Man/sfetch.man delete mode 100644 forester/archive/RIO/others/hmmer/squid/Man/shuffle.man delete mode 100644 forester/archive/RIO/others/hmmer/squid/Man/sreformat.man delete mode 100644 forester/archive/RIO/others/hmmer/squid/a2m.c delete mode 100644 forester/archive/RIO/others/hmmer/squid/afetch_main.c delete mode 100644 forester/archive/RIO/others/hmmer/squid/aligneval.c delete mode 100644 forester/archive/RIO/others/hmmer/squid/alignio.c delete mode 100644 forester/archive/RIO/others/hmmer/squid/alistat_main.c delete mode 100644 forester/archive/RIO/others/hmmer/squid/clustal.c delete mode 100644 forester/archive/RIO/others/hmmer/squid/cluster.c delete mode 100644 forester/archive/RIO/others/hmmer/squid/compalign_main.c delete mode 100644 forester/archive/RIO/others/hmmer/squid/compstruct_main.c delete mode 100755 forester/archive/RIO/others/hmmer/squid/configure delete mode 100644 forester/archive/RIO/others/hmmer/squid/dayhoff.c delete mode 100644 forester/archive/RIO/others/hmmer/squid/eps.c delete mode 100644 forester/archive/RIO/others/hmmer/squid/file.c delete mode 100644 forester/archive/RIO/others/hmmer/squid/getopt.c delete mode 100644 forester/archive/RIO/others/hmmer/squid/gki.c delete mode 100644 forester/archive/RIO/others/hmmer/squid/gki.h delete mode 100644 forester/archive/RIO/others/hmmer/squid/gsi.c delete mode 100644 forester/archive/RIO/others/hmmer/squid/gsi.h delete mode 100644 forester/archive/RIO/others/hmmer/squid/gsi64.c delete mode 100644 forester/archive/RIO/others/hmmer/squid/gsi64.h delete mode 100644 forester/archive/RIO/others/hmmer/squid/hsregex.c delete mode 100755 forester/archive/RIO/others/hmmer/squid/install-sh delete mode 100644 forester/archive/RIO/others/hmmer/squid/iupac.c delete mode 100644 forester/archive/RIO/others/hmmer/squid/msa.c delete mode 100644 forester/archive/RIO/others/hmmer/squid/msa.h delete mode 100644 forester/archive/RIO/others/hmmer/squid/msf.c delete mode 100644 forester/archive/RIO/others/hmmer/squid/phylip.c delete mode 100644 forester/archive/RIO/others/hmmer/squid/revcomp.c delete mode 100644 forester/archive/RIO/others/hmmer/squid/revcomp_main.c delete mode 100644 forester/archive/RIO/others/hmmer/squid/rk.c delete mode 100644 forester/archive/RIO/others/hmmer/squid/rk.h delete mode 100644 forester/archive/RIO/others/hmmer/squid/selex.c delete mode 100644 forester/archive/RIO/others/hmmer/squid/seqencode.c delete mode 100644 forester/archive/RIO/others/hmmer/squid/seqsplit_main.c delete mode 100644 forester/archive/RIO/others/hmmer/squid/seqstat_main.c delete mode 100644 forester/archive/RIO/others/hmmer/squid/sfetch_main.c delete mode 100644 forester/archive/RIO/others/hmmer/squid/shuffle.c delete mode 100644 forester/archive/RIO/others/hmmer/squid/shuffle_main.c delete mode 100644 forester/archive/RIO/others/hmmer/squid/sindex_main.c delete mode 100644 forester/archive/RIO/others/hmmer/squid/sqerror.c delete mode 100644 forester/archive/RIO/others/hmmer/squid/sqfuncs.h delete mode 100644 forester/archive/RIO/others/hmmer/squid/sqio.c delete mode 100644 forester/archive/RIO/others/hmmer/squid/squid.h.in delete mode 100644 forester/archive/RIO/others/hmmer/squid/squidconf.h.in delete mode 100644 forester/archive/RIO/others/hmmer/squid/squidcore.c delete mode 100644 forester/archive/RIO/others/hmmer/squid/sre_ctype.c delete mode 100644 forester/archive/RIO/others/hmmer/squid/sre_math.c delete mode 100644 forester/archive/RIO/others/hmmer/squid/sre_string.c delete mode 100644 forester/archive/RIO/others/hmmer/squid/sreformat_main.c delete mode 100644 forester/archive/RIO/others/hmmer/squid/ssi.c delete mode 100644 forester/archive/RIO/others/hmmer/squid/ssi.h delete mode 100644 forester/archive/RIO/others/hmmer/squid/stack.c delete mode 100644 forester/archive/RIO/others/hmmer/squid/stockholm.c delete mode 100644 forester/archive/RIO/others/hmmer/squid/stockholm.h delete mode 100644 forester/archive/RIO/others/hmmer/squid/stopwatch.c delete mode 100644 forester/archive/RIO/others/hmmer/squid/stopwatch.h delete mode 100644 forester/archive/RIO/others/hmmer/squid/test_main.c delete mode 100644 forester/archive/RIO/others/hmmer/squid/translate.c delete mode 100644 forester/archive/RIO/others/hmmer/squid/translate_main.c delete mode 100644 forester/archive/RIO/others/hmmer/squid/types.c delete mode 100644 forester/archive/RIO/others/hmmer/squid/weight.c delete mode 100644 forester/archive/RIO/others/hmmer/squid/weight_main.c delete mode 100644 forester/archive/RIO/others/hmmer/src/Makefile.in delete mode 100644 forester/archive/RIO/others/hmmer/src/alphabet.c delete mode 100644 forester/archive/RIO/others/hmmer/src/camJul97.c delete mode 100644 forester/archive/RIO/others/hmmer/src/config.h delete mode 100644 forester/archive/RIO/others/hmmer/src/core_algorithms.c delete mode 100644 forester/archive/RIO/others/hmmer/src/debug.c delete mode 100644 forester/archive/RIO/others/hmmer/src/display.c delete mode 100644 forester/archive/RIO/others/hmmer/src/emit.c delete mode 100644 forester/archive/RIO/others/hmmer/src/emulation.c delete mode 100644 forester/archive/RIO/others/hmmer/src/funcs.h delete mode 100644 forester/archive/RIO/others/hmmer/src/globals.h delete mode 100644 forester/archive/RIO/others/hmmer/src/histogram.c delete mode 100644 forester/archive/RIO/others/hmmer/src/hmmalign.c delete mode 100644 forester/archive/RIO/others/hmmer/src/hmmbuild.c delete mode 100644 forester/archive/RIO/others/hmmer/src/hmmcalibrate-pvm.c delete mode 100644 forester/archive/RIO/others/hmmer/src/hmmcalibrate.c delete mode 100644 forester/archive/RIO/others/hmmer/src/hmmconvert.c delete mode 100644 forester/archive/RIO/others/hmmer/src/hmmemit.c delete mode 100644 forester/archive/RIO/others/hmmer/src/hmmfetch.c delete mode 100644 forester/archive/RIO/others/hmmer/src/hmmindex.c delete mode 100644 forester/archive/RIO/others/hmmer/src/hmmio.c delete mode 100644 forester/archive/RIO/others/hmmer/src/hmmpfam-pvm.c delete mode 100644 forester/archive/RIO/others/hmmer/src/hmmpfam.c delete mode 100644 forester/archive/RIO/others/hmmer/src/hmmpostal.c delete mode 100644 forester/archive/RIO/others/hmmer/src/hmmsearch-pvm.c delete mode 100644 forester/archive/RIO/others/hmmer/src/hmmsearch.c delete mode 100644 forester/archive/RIO/others/hmmer/src/masks.c delete mode 100644 forester/archive/RIO/others/hmmer/src/mathsupport.c delete mode 100644 forester/archive/RIO/others/hmmer/src/misc.c delete mode 100644 forester/archive/RIO/others/hmmer/src/modelmakers.c delete mode 100644 forester/archive/RIO/others/hmmer/src/plan7.c delete mode 100644 forester/archive/RIO/others/hmmer/src/plan9.c delete mode 100644 forester/archive/RIO/others/hmmer/src/postprob.c delete mode 100644 forester/archive/RIO/others/hmmer/src/postprob.h delete mode 100644 forester/archive/RIO/others/hmmer/src/prior.c delete mode 100644 forester/archive/RIO/others/hmmer/src/pvm.c delete mode 100644 forester/archive/RIO/others/hmmer/src/states.c delete mode 100644 forester/archive/RIO/others/hmmer/src/structs.h delete mode 100644 forester/archive/RIO/others/hmmer/src/threads.c delete mode 100644 forester/archive/RIO/others/hmmer/src/tophits.c delete mode 100644 forester/archive/RIO/others/hmmer/src/trace.c delete mode 100644 forester/archive/RIO/others/hmmer/src/weetest.c delete mode 100644 forester/archive/RIO/others/hmmer/testsuite/Exercises.sh delete mode 100644 forester/archive/RIO/others/hmmer/testsuite/Makefile.in delete mode 100644 forester/archive/RIO/others/hmmer/testsuite/Optiontests.fa delete mode 100644 forester/archive/RIO/others/hmmer/testsuite/Optiontests.nfa delete mode 100644 forester/archive/RIO/others/hmmer/testsuite/Optiontests.nslx delete mode 100644 forester/archive/RIO/others/hmmer/testsuite/Optiontests.pam delete mode 100644 forester/archive/RIO/others/hmmer/testsuite/Optiontests.pl delete mode 100644 forester/archive/RIO/others/hmmer/testsuite/Optiontests.pri delete mode 100644 forester/archive/RIO/others/hmmer/testsuite/Optiontests.slx delete mode 100644 forester/archive/RIO/others/hmmer/testsuite/README delete mode 100644 forester/archive/RIO/others/hmmer/testsuite/alignalign_test.c delete mode 100644 forester/archive/RIO/others/hmmer/testsuite/evd_test.c delete mode 100644 forester/archive/RIO/others/hmmer/testsuite/fitting_test.c delete mode 100644 forester/archive/RIO/others/hmmer/testsuite/fn3-bin delete mode 100644 forester/archive/RIO/others/hmmer/testsuite/fn3-bin-swap delete mode 100644 forester/archive/RIO/others/hmmer/testsuite/fn3.hmm delete mode 100644 forester/archive/RIO/others/hmmer/testsuite/fn3.seed delete mode 100644 forester/archive/RIO/others/hmmer/testsuite/masks_test.c delete mode 100644 forester/archive/RIO/others/hmmer/testsuite/parsingviterbi_test.c delete mode 100644 forester/archive/RIO/others/hmmer/testsuite/titin.fa delete mode 100644 forester/archive/RIO/others/hmmer/testsuite/tophits_test.c delete mode 100644 forester/archive/RIO/others/hmmer/testsuite/trace_test.c delete mode 100644 forester/archive/RIO/others/hmmer/testsuite/trace_test.hmm delete mode 100644 forester/archive/RIO/others/hmmer/testsuite/trace_test.seq delete mode 100644 forester/archive/RIO/others/hmmer/testsuite/trace_test.slx delete mode 100644 forester/archive/RIO/others/hmmer/testsuite/viterbi_exercise.c delete mode 100644 forester/archive/RIO/others/hmmer/testsuite/weeviterbi_test.c delete mode 100644 forester/archive/RIO/others/hmmer/testsuite/weeviterbi_test.hmm delete mode 100644 forester/archive/RIO/others/hmmer/testsuite/weeviterbi_test.seq delete mode 100644 forester/archive/RIO/others/hmmer/tutorial/7LES_DROME delete mode 100644 forester/archive/RIO/others/hmmer/tutorial/Artemia.fa delete mode 100644 forester/archive/RIO/others/hmmer/tutorial/RU1A_HUMAN delete mode 100644 forester/archive/RIO/others/hmmer/tutorial/amino.null delete mode 100644 forester/archive/RIO/others/hmmer/tutorial/amino.pri delete mode 100644 forester/archive/RIO/others/hmmer/tutorial/fn3.slx delete mode 100644 forester/archive/RIO/others/hmmer/tutorial/globins50.msf delete mode 100644 forester/archive/RIO/others/hmmer/tutorial/globins630.fa delete mode 100644 forester/archive/RIO/others/hmmer/tutorial/nucleic.null delete mode 100644 forester/archive/RIO/others/hmmer/tutorial/nucleic.pri delete mode 100644 forester/archive/RIO/others/hmmer/tutorial/pkinase.slx delete mode 100644 forester/archive/RIO/others/hmmer/tutorial/rrm.hmm delete mode 100644 forester/archive/RIO/others/hmmer/tutorial/rrm.slx delete mode 100644 forester/archive/RIO/others/phylip_mod/IMPORTANT_NOTICE delete mode 100644 forester/archive/RIO/others/phylip_mod/src/CHANGES delete mode 100644 forester/archive/RIO/others/phylip_mod/src/Makefile delete mode 100644 forester/archive/RIO/others/phylip_mod/src/cons.c delete mode 100644 forester/archive/RIO/others/phylip_mod/src/cons.h delete mode 100644 forester/archive/RIO/others/phylip_mod/src/consense.c delete mode 100644 forester/archive/RIO/others/phylip_mod/src/dist.c delete mode 100644 forester/archive/RIO/others/phylip_mod/src/dist.h delete mode 100644 forester/archive/RIO/others/phylip_mod/src/fitch.c delete mode 100644 forester/archive/RIO/others/phylip_mod/src/neighbor.c delete mode 100644 forester/archive/RIO/others/phylip_mod/src/phylip.c delete mode 100644 forester/archive/RIO/others/phylip_mod/src/phylip.h delete mode 100644 forester/archive/RIO/others/phylip_mod/src/proml.c delete mode 100644 forester/archive/RIO/others/phylip_mod/src/promlk.c delete mode 100644 forester/archive/RIO/others/phylip_mod/src/protdist.c delete mode 100644 forester/archive/RIO/others/phylip_mod/src/protpars.c delete mode 100644 forester/archive/RIO/others/phylip_mod/src/seq.c delete mode 100644 forester/archive/RIO/others/phylip_mod/src/seq.h delete mode 100644 forester/archive/RIO/others/phylip_mod/src/seqboot.c delete mode 100644 forester/archive/RIO/others/phylip_mod/src/test_infile_fitch delete mode 100644 forester/archive/RIO/others/phylip_mod/src/test_infile_protdist delete mode 100644 forester/archive/RIO/others/phylip_mod/src/test_infile_protml delete mode 100644 forester/archive/RIO/others/phylip_mod/src/test_infile_protmlk delete mode 100644 forester/archive/RIO/others/phylip_mod/src/test_infile_protpars delete mode 100644 forester/archive/RIO/others/phylip_mod/src/test_infile_seqbboot delete mode 100644 forester/archive/RIO/others/puzzle_dqo/AUTHORS delete mode 100644 forester/archive/RIO/others/puzzle_dqo/COPYING delete mode 100644 forester/archive/RIO/others/puzzle_dqo/ChangeLog delete mode 100644 forester/archive/RIO/others/puzzle_dqo/IMPORTANT_NOTICE delete mode 100644 forester/archive/RIO/others/puzzle_dqo/INSTALL delete mode 100644 forester/archive/RIO/others/puzzle_dqo/Makefile delete mode 100644 forester/archive/RIO/others/puzzle_dqo/Makefile.am delete mode 100644 forester/archive/RIO/others/puzzle_dqo/Makefile.in delete mode 100644 forester/archive/RIO/others/puzzle_dqo/aclocal.m4 delete mode 100755 forester/archive/RIO/others/puzzle_dqo/config.status delete mode 100755 forester/archive/RIO/others/puzzle_dqo/configure delete mode 100644 forester/archive/RIO/others/puzzle_dqo/configure.in delete mode 100644 forester/archive/RIO/others/puzzle_dqo/data/Makefile delete mode 100644 forester/archive/RIO/others/puzzle_dqo/data/Makefile.am delete mode 100644 forester/archive/RIO/others/puzzle_dqo/data/Makefile.in delete mode 100644 forester/archive/RIO/others/puzzle_dqo/doc/Makefile delete mode 100644 forester/archive/RIO/others/puzzle_dqo/doc/Makefile.am delete mode 100644 forester/archive/RIO/others/puzzle_dqo/doc/Makefile.in delete mode 100755 forester/archive/RIO/others/puzzle_dqo/install-sh delete mode 100755 forester/archive/RIO/others/puzzle_dqo/missing delete mode 100755 forester/archive/RIO/others/puzzle_dqo/mkinstalldirs delete mode 100644 forester/archive/RIO/others/puzzle_dqo/src/Makefile delete mode 100644 forester/archive/RIO/others/puzzle_dqo/src/Makefile.am delete mode 100644 forester/archive/RIO/others/puzzle_dqo/src/Makefile.in delete mode 100644 forester/archive/RIO/others/puzzle_dqo/src/README delete mode 100644 forester/archive/RIO/others/puzzle_dqo/src/gamma.c delete mode 100644 forester/archive/RIO/others/puzzle_dqo/src/gamma.h delete mode 100644 forester/archive/RIO/others/puzzle_dqo/src/ml.h delete mode 100644 forester/archive/RIO/others/puzzle_dqo/src/ml1.c delete mode 100644 forester/archive/RIO/others/puzzle_dqo/src/ml2.c delete mode 100644 forester/archive/RIO/others/puzzle_dqo/src/ml3.c delete mode 100644 forester/archive/RIO/others/puzzle_dqo/src/model1.c delete mode 100644 forester/archive/RIO/others/puzzle_dqo/src/model2.c delete mode 100644 forester/archive/RIO/others/puzzle_dqo/src/outdist delete mode 100644 forester/archive/RIO/others/puzzle_dqo/src/ppuzzle.h delete mode 100644 forester/archive/RIO/others/puzzle_dqo/src/puzzle.h delete mode 100644 forester/archive/RIO/others/puzzle_dqo/src/puzzle1.c delete mode 100644 forester/archive/RIO/others/puzzle_dqo/src/puzzle2.c delete mode 100644 forester/archive/RIO/others/puzzle_dqo/src/sched.c delete mode 100644 forester/archive/RIO/others/puzzle_dqo/src/sched.h delete mode 100644 forester/archive/RIO/others/puzzle_dqo/src/test delete mode 100644 forester/archive/RIO/others/puzzle_dqo/src/test.in delete mode 100644 forester/archive/RIO/others/puzzle_dqo/src/util.c delete mode 100644 forester/archive/RIO/others/puzzle_dqo/src/util.h delete mode 100644 forester/archive/RIO/others/puzzle_mod/AUTHORS delete mode 100644 forester/archive/RIO/others/puzzle_mod/COPYING delete mode 100644 forester/archive/RIO/others/puzzle_mod/ChangeLog delete mode 100644 forester/archive/RIO/others/puzzle_mod/IMPORTANT_NOTICE delete mode 100644 forester/archive/RIO/others/puzzle_mod/Makefile delete mode 100644 forester/archive/RIO/others/puzzle_mod/Makefile.am delete mode 100644 forester/archive/RIO/others/puzzle_mod/Makefile.in delete mode 100644 forester/archive/RIO/others/puzzle_mod/aclocal.m4 delete mode 100755 forester/archive/RIO/others/puzzle_mod/config.status delete mode 100755 forester/archive/RIO/others/puzzle_mod/configure delete mode 100644 forester/archive/RIO/others/puzzle_mod/configure.in delete mode 100644 forester/archive/RIO/others/puzzle_mod/data/Makefile delete mode 100644 forester/archive/RIO/others/puzzle_mod/data/Makefile.am delete mode 100644 forester/archive/RIO/others/puzzle_mod/data/Makefile.in delete mode 100644 forester/archive/RIO/others/puzzle_mod/doc/Makefile delete mode 100644 forester/archive/RIO/others/puzzle_mod/doc/Makefile.am delete mode 100644 forester/archive/RIO/others/puzzle_mod/doc/Makefile.in delete mode 100755 forester/archive/RIO/others/puzzle_mod/install-sh delete mode 100755 forester/archive/RIO/others/puzzle_mod/missing delete mode 100755 forester/archive/RIO/others/puzzle_mod/mkinstalldirs delete mode 100644 forester/archive/RIO/others/puzzle_mod/src/00README delete mode 100644 forester/archive/RIO/others/puzzle_mod/src/Makefile delete mode 100644 forester/archive/RIO/others/puzzle_mod/src/Makefile.am delete mode 100644 forester/archive/RIO/others/puzzle_mod/src/Makefile.in delete mode 100644 forester/archive/RIO/others/puzzle_mod/src/README delete mode 100644 forester/archive/RIO/others/puzzle_mod/src/gamma.c delete mode 100644 forester/archive/RIO/others/puzzle_mod/src/gamma.h delete mode 100644 forester/archive/RIO/others/puzzle_mod/src/ml.h delete mode 100644 forester/archive/RIO/others/puzzle_mod/src/ml1.c delete mode 100644 forester/archive/RIO/others/puzzle_mod/src/ml2.c delete mode 100644 forester/archive/RIO/others/puzzle_mod/src/ml3.c delete mode 100644 forester/archive/RIO/others/puzzle_mod/src/model1.c delete mode 100644 forester/archive/RIO/others/puzzle_mod/src/model2.c delete mode 100644 forester/archive/RIO/others/puzzle_mod/src/ppuzzle.c delete mode 100644 forester/archive/RIO/others/puzzle_mod/src/ppuzzle.h delete mode 100644 forester/archive/RIO/others/puzzle_mod/src/puzzle.h delete mode 100644 forester/archive/RIO/others/puzzle_mod/src/puzzle1.c delete mode 100644 forester/archive/RIO/others/puzzle_mod/src/puzzle2.c delete mode 100644 forester/archive/RIO/others/puzzle_mod/src/sched.c delete mode 100644 forester/archive/RIO/others/puzzle_mod/src/sched.h delete mode 100644 forester/archive/RIO/others/puzzle_mod/src/test delete mode 100644 forester/archive/RIO/others/puzzle_mod/src/test.in delete mode 100644 forester/archive/RIO/others/puzzle_mod/src/util.c delete mode 100644 forester/archive/RIO/others/puzzle_mod/src/util.h diff --git a/forester/archive/RIO/others/hmmer/00README b/forester/archive/RIO/others/hmmer/00README deleted file mode 100644 index 71bff7e..0000000 --- a/forester/archive/RIO/others/hmmer/00README +++ /dev/null @@ -1,57 +0,0 @@ -HMMER - profile hidden Markov models for biological sequence analysis -Version 2.2 (August 2001) -Copyright (C) 1992-2001 Washington University School of Medicine ------------------------------------------------------------------- - -o About this software... - HMMER is an implementation of profile HMM methods for - sensitive database searches using multiple sequence alignments as queries. - - Basically, you give HMMER a multiple sequence alignment as input; - it builds a statistical model called a "hidden Markov model" - which you can then use as a query into a sequence database - to find (and/or align) additional homologues of the sequence family. - -o Getting HMMER - WWW home: http://hmmer.wustl.edu/ - Distribution: ftp://ftp.genetics.wustl.edu/pub/eddy/hmmer/ - -o Installing HMMER - See the file INSTALL for brief instructions. - See the chapter Installation in the HMMER User's Guide for more - detailed instructions. - - You should also read the following files: - COPYING -- copyright notice, and information on the free software license - LICENSE -- Full text of the GNU Public License, version 2 (see COPYING) - - If you have obtained HMMER from Washington University under - a non-GPL license as part of a special licensing agreement, - COPYING and LICENSE will refer to the terms of that agreement. - -o Getting started with HMMER - The User's Guide is in Userguide/Userguide.pdf [Adobe PDF format]. - It is also available online as hypertext: - http://hmmer.wustl.edu/hmmer-html/ - - A quick tutorial intro is given as the first chapter of - the User's Guide. - -o Registering HMMER - Email eddy@genetics.wustl.edu to register and get on my - infrequent mailing list of HMMER news, patches, and updates. - -o Reporting bugs - These programs are under active development. Though this - release has been tested and appears to be stable, bugs may crop up. If - you use these programs, please help me out and e-mail me with - suggestions, comments, and bug reports. (eddy@genetics.wustl.edu) - - -Sean Eddy -Howard Hughes Medical Institute and Dept. of Genetics -Washington University School of Medicine, St. Louis, Missouri, USA -------------------------------------------------------------------- - - - diff --git a/forester/archive/RIO/others/hmmer/COPYRIGHT b/forester/archive/RIO/others/hmmer/COPYRIGHT deleted file mode 100644 index f50acd7..0000000 --- a/forester/archive/RIO/others/hmmer/COPYRIGHT +++ /dev/null @@ -1,36 +0,0 @@ -HMMER - Biological sequence analysis with profile hidden Markov models -Copyright (C) 1992-2001 Washington University School of Medicine - -This suite of programs is free software. You can redistribute it -and/or modify it under the terms of the GNU General Public License as -published by the Free Software Foundation; either version 2 of the -License, or (at your option) any later version. - -In other words, you are free to modify, copy, or redistribute this -source code and its documentation in any way you like, but you must -distribute all derivative versions as free software under the same -terms that I've provided my code to you (i.e. the GNU General Public -License). This precludes any use of the code in proprietary or -commercial software unless your source code is made freely available. - -If you wish to use HMMER code under a different Open Source license -that's not compatible with the GPL (like the Artistic License, BSD -license, or the Netscape Public License), please contact me -(eddy@genetics.wustl.edu) for permission. - -Incorporation into commercial software under non-GPL terms is possible -by obtaining a specially licensed version from Washington University -School of Medicine. Contact Jack Pincus (jhpincus@cris.com) to arrange -licensing terms. - -This software is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this HMMER release, in the file LICENSE; if not, write to -the Free Software Foundation, Inc., 675 Mass. Ave, Cambridge, MA 02139 -USA. - - diff --git a/forester/archive/RIO/others/hmmer/INSTALL b/forester/archive/RIO/others/hmmer/INSTALL deleted file mode 100644 index 268dd24..0000000 --- a/forester/archive/RIO/others/hmmer/INSTALL +++ /dev/null @@ -1,45 +0,0 @@ -Brief installation instructions for HMMER 2.2 -SRE, Sun Aug 5 16:22:52 2001 ------------------------------------------- - -For a source distribution (hmmer-2.2.tar.gz): - - uncompress hmmer-2.2.tar.gz Uncompresses the archive. - tar xf hmmer-2.2.tar Unpacks the archive. (makes a new directory, hmmer-2.2) - cd hmmer-2.2 Moves into the distribution toplevel directory. - ./configure Configures the software for your system. - make Builds the binaries. - make check Runs the test suite to test the new binaries. - make install Installs the software. (You may need to be root.) - - It should build cleanly on just about any UNIX machine. - -For a binary distribution (hmmer-2.2.bin.-.tar.gz), -for example, the sun-solaris distro: - - uncompress hmmer-2.2.bin.sun-solaris.tar.Z - tar xf hmmer-2.2.bin.sun-solaris.tar - cd hmmer-2.2 - ./configure - make install - - Note that "make" and "make check" aren't necessary, since you - have a precompiled distribution. - - If your machine doesn't have a C compiler, the ./configure will - fail, and you won't be able to do a "make install". This is - an artifact of the way I have the configure script built. - If this happens, just copy the man pages and binaries whereever - you want them, for instance: - - cp Man/* /usr/local/man/man1/ - cp binaries/* /usr/local/bin/ - - HMMER doesn't depend on any external data files, so the installation is - simple - the binaries are free standing, and you can install - them anywhere you like. - -For more detail, see the Installation chapter in the HMMER User's -Guide. - - diff --git a/forester/archive/RIO/others/hmmer/LICENSE b/forester/archive/RIO/others/hmmer/LICENSE deleted file mode 100644 index a43ea21..0000000 --- a/forester/archive/RIO/others/hmmer/LICENSE +++ /dev/null @@ -1,339 +0,0 @@ - GNU GENERAL PUBLIC LICENSE - Version 2, June 1991 - - Copyright (C) 1989, 1991 Free Software Foundation, Inc. - 675 Mass Ave, Cambridge, MA 02139, USA - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - Preamble - - The licenses for most software are designed to take away your -freedom to share and change it. By contrast, the GNU General Public -License is intended to guarantee your freedom to share and change free -software--to make sure the software is free for all its users. This -General Public License applies to most of the Free Software -Foundation's software and to any other program whose authors commit to -using it. (Some other Free Software Foundation software is covered by -the GNU Library General Public License instead.) You can apply it to -your programs, too. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -this service if you wish), that you receive source code or can get it -if you want it, that you can change the software or use pieces of it -in new free programs; and that you know you can do these things. - - To protect your rights, we need to make restrictions that forbid -anyone to deny you these rights or to ask you to surrender the rights. -These restrictions translate to certain responsibilities for you if you -distribute copies of the software, or if you modify it. - - For example, if you distribute copies of such a program, whether -gratis or for a fee, you must give the recipients all the rights that -you have. You must make sure that they, too, receive or can get the -source code. And you must show them these terms so they know their -rights. - - We protect your rights with two steps: (1) copyright the software, and -(2) offer you this license which gives you legal permission to copy, -distribute and/or modify the software. - - Also, for each author's protection and ours, we want to make certain -that everyone understands that there is no warranty for this free -software. If the software is modified by someone else and passed on, we -want its recipients to know that what they have is not the original, so -that any problems introduced by others will not reflect on the original -authors' reputations. - - Finally, any free program is threatened constantly by software -patents. We wish to avoid the danger that redistributors of a free -program will individually obtain patent licenses, in effect making the -program proprietary. To prevent this, we have made it clear that any -patent must be licensed for everyone's free use or not licensed at all. - - The precise terms and conditions for copying, distribution and -modification follow. - - GNU GENERAL PUBLIC LICENSE - TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION - - 0. This License applies to any program or other work which contains -a notice placed by the copyright holder saying it may be distributed -under the terms of this General Public License. The "Program", below, -refers to any such program or work, and a "work based on the Program" -means either the Program or any derivative work under copyright law: -that is to say, a work containing the Program or a portion of it, -either verbatim or with modifications and/or translated into another -language. (Hereinafter, translation is included without limitation in -the term "modification".) Each licensee is addressed as "you". - -Activities other than copying, distribution and modification are not -covered by this License; they are outside its scope. The act of -running the Program is not restricted, and the output from the Program -is covered only if its contents constitute a work based on the -Program (independent of having been made by running the Program). -Whether that is true depends on what the Program does. - - 1. You may copy and distribute verbatim copies of the Program's -source code as you receive it, in any medium, provided that you -conspicuously and appropriately publish on each copy an appropriate -copyright notice and disclaimer of warranty; keep intact all the -notices that refer to this License and to the absence of any warranty; -and give any other recipients of the Program a copy of this License -along with the Program. - -You may charge a fee for the physical act of transferring a copy, and -you may at your option offer warranty protection in exchange for a fee. - - 2. You may modify your copy or copies of the Program or any portion -of it, thus forming a work based on the Program, and copy and -distribute such modifications or work under the terms of Section 1 -above, provided that you also meet all of these conditions: - - a) You must cause the modified files to carry prominent notices - stating that you changed the files and the date of any change. - - b) You must cause any work that you distribute or publish, that in - whole or in part contains or is derived from the Program or any - part thereof, to be licensed as a whole at no charge to all third - parties under the terms of this License. - - c) If the modified program normally reads commands interactively - when run, you must cause it, when started running for such - interactive use in the most ordinary way, to print or display an - announcement including an appropriate copyright notice and a - notice that there is no warranty (or else, saying that you provide - a warranty) and that users may redistribute the program under - these conditions, and telling the user how to view a copy of this - License. (Exception: if the Program itself is interactive but - does not normally print such an announcement, your work based on - the Program is not required to print an announcement.) - -These requirements apply to the modified work as a whole. If -identifiable sections of that work are not derived from the Program, -and can be reasonably considered independent and separate works in -themselves, then this License, and its terms, do not apply to those -sections when you distribute them as separate works. But when you -distribute the same sections as part of a whole which is a work based -on the Program, the distribution of the whole must be on the terms of -this License, whose permissions for other licensees extend to the -entire whole, and thus to each and every part regardless of who wrote it. - -Thus, it is not the intent of this section to claim rights or contest -your rights to work written entirely by you; rather, the intent is to -exercise the right to control the distribution of derivative or -collective works based on the Program. - -In addition, mere aggregation of another work not based on the Program -with the Program (or with a work based on the Program) on a volume of -a storage or distribution medium does not bring the other work under -the scope of this License. - - 3. You may copy and distribute the Program (or a work based on it, -under Section 2) in object code or executable form under the terms of -Sections 1 and 2 above provided that you also do one of the following: - - a) Accompany it with the complete corresponding machine-readable - source code, which must be distributed under the terms of Sections - 1 and 2 above on a medium customarily used for software interchange; or, - - b) Accompany it with a written offer, valid for at least three - years, to give any third party, for a charge no more than your - cost of physically performing source distribution, a complete - machine-readable copy of the corresponding source code, to be - distributed under the terms of Sections 1 and 2 above on a medium - customarily used for software interchange; or, - - c) Accompany it with the information you received as to the offer - to distribute corresponding source code. (This alternative is - allowed only for noncommercial distribution and only if you - received the program in object code or executable form with such - an offer, in accord with Subsection b above.) - -The source code for a work means the preferred form of the work for -making modifications to it. For an executable work, complete source -code means all the source code for all modules it contains, plus any -associated interface definition files, plus the scripts used to -control compilation and installation of the executable. However, as a -special exception, the source code distributed need not include -anything that is normally distributed (in either source or binary -form) with the major components (compiler, kernel, and so on) of the -operating system on which the executable runs, unless that component -itself accompanies the executable. - -If distribution of executable or object code is made by offering -access to copy from a designated place, then offering equivalent -access to copy the source code from the same place counts as -distribution of the source code, even though third parties are not -compelled to copy the source along with the object code. - - 4. You may not copy, modify, sublicense, or distribute the Program -except as expressly provided under this License. Any attempt -otherwise to copy, modify, sublicense or distribute the Program is -void, and will automatically terminate your rights under this License. -However, parties who have received copies, or rights, from you under -this License will not have their licenses terminated so long as such -parties remain in full compliance. - - 5. You are not required to accept this License, since you have not -signed it. However, nothing else grants you permission to modify or -distribute the Program or its derivative works. These actions are -prohibited by law if you do not accept this License. Therefore, by -modifying or distributing the Program (or any work based on the -Program), you indicate your acceptance of this License to do so, and -all its terms and conditions for copying, distributing or modifying -the Program or works based on it. - - 6. Each time you redistribute the Program (or any work based on the -Program), the recipient automatically receives a license from the -original licensor to copy, distribute or modify the Program subject to -these terms and conditions. You may not impose any further -restrictions on the recipients' exercise of the rights granted herein. -You are not responsible for enforcing compliance by third parties to -this License. - - 7. If, as a consequence of a court judgment or allegation of patent -infringement or for any other reason (not limited to patent issues), -conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot -distribute so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you -may not distribute the Program at all. For example, if a patent -license would not permit royalty-free redistribution of the Program by -all those who receive copies directly or indirectly through you, then -the only way you could satisfy both it and this License would be to -refrain entirely from distribution of the Program. - -If any portion of this section is held invalid or unenforceable under -any particular circumstance, the balance of the section is intended to -apply and the section as a whole is intended to apply in other -circumstances. - -It is not the purpose of this section to induce you to infringe any -patents or other property right claims or to contest validity of any -such claims; this section has the sole purpose of protecting the -integrity of the free software distribution system, which is -implemented by public license practices. Many people have made -generous contributions to the wide range of software distributed -through that system in reliance on consistent application of that -system; it is up to the author/donor to decide if he or she is willing -to distribute software through any other system and a licensee cannot -impose that choice. - -This section is intended to make thoroughly clear what is believed to -be a consequence of the rest of this License. - - 8. If the distribution and/or use of the Program is restricted in -certain countries either by patents or by copyrighted interfaces, the -original copyright holder who places the Program under this License -may add an explicit geographical distribution limitation excluding -those countries, so that distribution is permitted only in or among -countries not thus excluded. In such case, this License incorporates -the limitation as if written in the body of this License. - - 9. The Free Software Foundation may publish revised and/or new versions -of the General Public License from time to time. Such new versions will -be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - -Each version is given a distinguishing version number. If the Program -specifies a version number of this License which applies to it and "any -later version", you have the option of following the terms and conditions -either of that version or of any later version published by the Free -Software Foundation. If the Program does not specify a version number of -this License, you may choose any version ever published by the Free Software -Foundation. - - 10. If you wish to incorporate parts of the Program into other free -programs whose distribution conditions are different, write to the author -to ask for permission. For software which is copyrighted by the Free -Software Foundation, write to the Free Software Foundation; we sometimes -make exceptions for this. Our decision will be guided by the two goals -of preserving the free status of all derivatives of our free software and -of promoting the sharing and reuse of software generally. - - NO WARRANTY - - 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY -FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN -OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES -PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED -OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS -TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE -PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, -REPAIR OR CORRECTION. - - 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR -REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, -INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING -OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED -TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY -YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER -PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE -POSSIBILITY OF SUCH DAMAGES. - - END OF TERMS AND CONDITIONS - - Appendix: How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -convey the exclusion of warranty; and each file should have at least -the "copyright" line and a pointer to where the full notice is found. - - - Copyright (C) 19yy - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - -Also add information on how to contact you by electronic and paper mail. - -If the program is interactive, make it output a short notice like this -when it starts in an interactive mode: - - Gnomovision version 69, Copyright (C) 19yy name of author - Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. - This is free software, and you are welcome to redistribute it - under certain conditions; type `show c' for details. - -The hypothetical commands `show w' and `show c' should show the appropriate -parts of the General Public License. Of course, the commands you use may -be called something other than `show w' and `show c'; they could even be -mouse-clicks or menu items--whatever suits your program. - -You should also get your employer (if you work as a programmer) or your -school, if any, to sign a "copyright disclaimer" for the program, if -necessary. Here is a sample; alter the names: - - Yoyodyne, Inc., hereby disclaims all copyright interest in the program - `Gnomovision' (which makes passes at compilers) written by James Hacker. - - , 1 April 1989 - Ty Coon, President of Vice - -This General Public License does not permit incorporating your program into -proprietary programs. If your program is a subroutine library, you may -consider it more useful to permit linking proprietary applications with the -library. If this is what you want to do, use the GNU Library General -Public License instead of this License. diff --git a/forester/archive/RIO/others/hmmer/Makefile.in b/forester/archive/RIO/others/hmmer/Makefile.in deleted file mode 100644 index a8e4cb4..0000000 --- a/forester/archive/RIO/others/hmmer/Makefile.in +++ /dev/null @@ -1,284 +0,0 @@ -################################################################# -# Makefile for HMMER: Main project directory: installation, documentation -# CVS $Id: Makefile.in,v 1.1.1.1 2005/03/22 08:33:51 cmzmasek Exp $ -########### -# HMMER - Biological sequence analysis with profile HMMs -# Copyright (C) 1992-1999 Washington University School of Medicine -# All Rights Reserved -# -# This source code is distributed under the terms of the -# GNU General Public License. See the files COPYING and LICENSE -# for details. -########### - -# On most Unices, you can build the package without modifying anything -# by just typing "./configure; make". -# -# You may want to modify the following make variables: -# BINDIR - where the executables will be installed by a 'make install' -# MANDIR - where the man pages will be installed by a 'make install' -# CC - which compiler to use -# CFLAGS - compiler flags to use - -# where you want things installed -# Sort of uses GNU coding standards. ${prefix} might be /usr/local. -# ${exec_prefix} gives you some flexibility for installing architecture -# dependent files (e.g. the programs): an example ${exec_prefix} might be -# /nfs/share/irix64/ -# -prefix = @prefix@ -exec_prefix = @exec_prefix@ -BINDIR = @bindir@ -MANDIR = @mandir@ - -# how to install the man pages; -# cp is generally fine, unless you preformat your pages. -# -INSTMAN = cp -MANSUFFIX = 1 - -# your compiler and compiler flags -# -CC = @CC@ -CFLAGS = @CFLAGS@ - -####### -## You should not need to modify below this line. -## Much of it is concerned with maintenance of the development version -## and building the release (indeed, several commands will only work in -## St. Louis) -####### -SHELL = /bin/sh -BASENAME = "hmmer" -PACKAGE = "HMMER" -RELEASE = "2.2g" -RELCODE = "hmmer2_2g" -RELEASEDATE = "August 2001" -COPYRIGHT = "Copyright \(C\) 1992-2001 HHMI/Washington University School of Medicine" -LICENSE = "Freely distributed under the GNU General Public License \(GPL\)" -LICENSETAG = gnu -COMPRESS = gzip - -# The program lists below for HMMER and SQUID are not necessarily -# a complete manifest. They are the list of stable programs that the -# package will install. There must be a man page for each one of them -# in the appropriate places (documentation/man for HMMER, squid/Man -# for the SQUID library). -# -PROGS = hmmalign\ - hmmbuild\ - hmmcalibrate\ - hmmconvert\ - hmmemit\ - hmmfetch\ - hmmindex\ - hmmpfam\ - hmmsearch - -PVMPROGS = @PVMPROGS@ - -SQUIDPROGS = afetch\ - alistat\ - seqstat\ - sfetch\ - sindex\ - shuffle\ - sreformat - -# all: Compile everything (except the testsuite), -# and stick the supported programs in binaries/ -# -all: version.h - @if test -d binaries; then\ - echo "You appear to already have a compiled HMMER distribution.";\ - echo "You don't need to make.";\ - echo "";\ - echo "If I'm wrong, it's because I'm only looking to see if you have";\ - echo "a 'binaries' directory; do 'make distclean' to revert HMMER";\ - echo "to a pristine source distribution.";\ - else\ - (cd squid; make CC="$(CC)" CFLAGS="$(CFLAGS)"; make module);\ - (cd src; make CC="$(CC)" CFLAGS="$(CFLAGS)"; make module);\ - mkdir binaries;\ - for prog in $(PROGS) $(PVMPROGS); do\ - mv src/$$prog binaries/;\ - done;\ - for prog in $(SQUIDPROGS); do\ - mv squid/$$prog binaries/;\ - done;\ - fi - -# version.h: -# create the version.h file that will define stamps used by -# squidcore.c's Banner(), which is called by all executables to -# print a standard package/copyright/license banner; -# then puts copies of version.h in all directories that are -# going to need it -- HMMER src/ and any modules of mine that -# also produce installed executables (squid/) -# -version.h: - echo "/* version.h -- automatically generated by a Makefile. DO NOT EDIT. */" > version.h - echo "#define PACKAGE \"$(PACKAGE)\"" >> version.h - echo "#define RELEASE \"$(RELEASE)\"" >> version.h - echo "#define RELEASEDATE \"$(RELEASEDATE)\"" >> version.h - echo "#define COPYRIGHT \"$(COPYRIGHT)\"" >> version.h - echo "#define LICENSE \"$(LICENSE)\"" >> version.h - cp version.h squid/ - cp version.h src/ - -# check: compiles and runs test suite in testsuite/ -# These are public tests, distributed with the package. -# -check: squid/libsquid.a src/libhmmer.a - (cd testsuite; make CC="$(CC)" CFLAGS="$(CFLAGS)") - (cd testsuite; make check) - -squid/libsquid.a: version.h - (cd squid; make CC="$(CC)" CFLAGS="$(CFLAGS)"; make module) - -src/libhmmer.a: version.h - (cd src; make CC="$(CC)" CFLAGS="$(CFLAGS)"; make module) - - -# install: installs the binaries in BINDIR/ -# installs man pages in MANDIR/man1/ (e.g. if MANSUFFIX is 1) -install: - for file in $(PROGS) $(SQUIDPROGS) $(PVMPROGS); do\ - cp binaries/$$file $(BINDIR)/;\ - done - for file in hmmer $(PROGS) $(SQUIDPROGS); do\ - $(INSTMAN) documentation/man/$$file.man $(MANDIR)/man$(MANSUFFIX)/$$file.$(MANSUFFIX);\ - done - for file in $(SQUIDPROGS); do\ - $(INSTMAN) squid/Man/$$file.man $(MANDIR)/man$(MANSUFFIX)/$$file.$(MANSUFFIX);\ - done - -# "make clean" removes almost everything except configuration files -# and binaries. -clean: - -rm -f *.o *~ Makefile.bak core TAGS gmon.out - (cd src; make clean) - (cd squid; make clean) - (cd testsuite; make clean) - -# "make distclean" leaves a pristine source distribution. -# -distclean: - -rm -rf binaries - -rm config.cache config.log config.status - -rm version.h - make clean - (cd src; make distclean) - (cd squid; make distclean) - (cd testsuite; make distclean) - -rm Makefile - -# verify: consistency checks on the package -# These are private tests, not distributed with HMMER -verify: - @echo Checking options for consistency and documentation... - @for prog in $(PROGS) $(SQUIDPROGS); do\ - ./checkoptions.pl $$prog;\ - done - -# doc: build the Userguide and on-line manual -# -doc: - (cd Userguide; make) - -# dist: build a new distribution directory in hmmer-$RELEASE -# Exports from the CVS repository. -# tags RCS files with $(RELCODE) for later reconstruction -# squid RCS files are tagged with hmmer$(RELCODE). -# Adds a license statement to each file that has a @ LICENSE @ line. -# Virtually identical to squid's make dist -- keep them in sync! -dist: - # Delete old versions of the same release - # - @if test -d ${BASENAME}-$(RELEASE); then rm -rf ${BASENAME}-$(RELEASE); fi - @if test -e ${BASENAME}-$(RELEASE).tar; then rm -f ${BASENAME}-$(RELEASE).tar; fi - @if test -e ${BASENAME}-$(RELEASE).tar.Z; then rm -f ${BASENAME}-$(RELEASE).tar.Z; fi - @if test -e ${BASENAME}-$(RELEASE).tar.gz; then rm -f ${BASENAME}-$(RELEASE).tar.gz; fi - # - # CVS tag and extract. -c: make sure we committed; - # -F: allow more than one "make dist" per rel - # prep: must have done "cvs commit", and CVSROOT must be set - # We also need the squid library, so tag and export it too. - # - cvs tag -F $(RELCODE) - cvs export -r $(RELCODE) -d ${BASENAME}-${RELEASE} ${BASENAME} - cvs rtag -F ${RELCODE} squid - (cd ${BASENAME}-${RELEASE}; cvs export -r ${RELCODE} -d squid squid) - # - # Make the configure scripts from configure.in - # - (cd ${BASENAME}-${RELEASE}; autoconf) - (cd ${BASENAME}-${RELEASE}/squid; autoconf) - # - # Include the appropriate license files and release notes - # - cp Licenses/LICENSE.$(LICENSETAG) ${BASENAME}-$(RELEASE)/LICENSE - cp Licenses/COPYING.$(LICENSETAG) ${BASENAME}-$(RELEASE)/COPYRIGHT - -cp Release-Notes/RELEASE-$(RELEASE) ${BASENAME}-$(RELEASE)/NOTES - # - # Attach license stamps on files that need 'em (replace LICENSE keyword) - # licenseadd.pl is basically harmless, so we can overannotate here by - # trying to licenseadd everything. - # - find ${BASENAME}-${RELEASE} -type f -exec licenseadd.pl Licenses/$(LICENSETAG) {} \; - # - # Compilation of the documentation. - # Documentation is not provided in source form. - # - (cd documentation/userguide; make clean) - (cd documentation/userguide; make pdf) - cp -f documentation/userguide/Userguide.pdf build/hmmer-$(RELEASE)/ - # - # Remove CVS-controlled files/directories that don't belong in - # the distro - # - -rm -rf ${BASENAME}-${RELEASE}/Bugs - -rm -rf ${BASENAME}-${RELEASE}/CHECKLIST - -rm -rf ${BASENAME}-${RELEASE}/Internal-Notes - -rm -rf ${BASENAME}-${RELEASE}/Licenses - -rm -rf ${BASENAME}-${RELEASE}/MAILING_LIST - -rm -rf ${BASENAME}-${RELEASE}/Release-Notes - -rm -rf ${BASENAME}-${RELEASE}/configure.in - -rm -rf ${BASENAME}-${RELEASE}/checkoptions.pl - -rm -rf ${BASENAME}-${RELEASE}/licenseadd.pl - -rm -rf ${BASENAME}-${RELEASE}/nodebuild - -rm -rf ${BASENAME}-${RELEASE}/rootbuild - -rm -rf ${BASENAME}-${RELEASE}/buildall - -rm -rf ${BASENAME}-${RELEASE}/documentation/userguide - -rm -rf ${BASENAME}-${RELEASE}/squid/Licenses - -rm -rf ${BASENAME}-${RELEASE}/squid/LOG - -rm -rf ${BASENAME}-${RELEASE}/squid/configure.in - # - # Packaging commands - # - tar cvf ${BASENAME}-${RELEASE}.tar ${BASENAME}-${RELEASE} - $(COMPRESS) ${BASENAME}-$(RELEASE).tar - - -# make ftpdist: install FTP distribution. Assumes a "make dist" -# has already succeeded. Doesn't do the symlink; -# doesn't install the on-line manual for the Web site. -# -ftpdist: - cp -f $(READMES) $(FTPDIR) - cp -f build/hmmer-$(RELEASE)/COPYING $(FTPDIR) - cp -f build/hmmer-$(RELEASE)/LICENSETAG $(FTPDIR) - cp -f build/hmmer-$(RELEASE)/NOTES $(FTPDIR) - cp -f build/hmmer-$(RELEASE).tar.Z $(FTPDIR) - cp -f build/hmmer-$(RELEASE)-*.tar.Z $(FTPDIR) - cp -f Userguide/Userguide.ps $(FTPDIR) - cp -f Userguide/Userguide.pdf $(FTPDIR) - cp -f Userguide/hmmer-html.tar.Z $(FTPDIR) - - -# make stable: Set up the FTP site symlink to the current stable HMMER release. -# -stable: - ln -sf $(FTPDIR)/hmmer-$(RELEASE).tar.Z $(FTPDIR)/hmmer.tar.Z - - diff --git a/forester/archive/RIO/others/hmmer/NOTES b/forester/archive/RIO/others/hmmer/NOTES deleted file mode 100644 index 12c069b..0000000 --- a/forester/archive/RIO/others/hmmer/NOTES +++ /dev/null @@ -1,197 +0,0 @@ -HMMER 2.2 release notes -http://hmmer.wustl.edu/ -SRE, Fri May 4 13:00:33 2001 ---------------------------------------------------------------- - -As it has been more than 2 years since the last HMMER release, this is -unlikely to be a comprehensive list of changes. - -HMMER is now maintained under CVS. Anonymous read-only access to the -development code is permitted. To download the current snapshot: - > setenv CVSROOT :pserver:anonymous@skynet.wustl.edu:/repository/sre - > cvs login - [password is "anonymous"] - > cvs checkout hmmer - > cd hmmer - > cvs checkout squid - > cvs logout - -The following programs were added to the distribution: - - - The program "afetch" can fetch an alignment from - a Stockholm format multiple alignment database (e.g. Pfam). - "afetch --index" creates the index files for such - a database. - - - The program "shuffle" makes "randomized" sequences. - It supports a variety of sequence randomization methods, - including an implementation of Altschul/Erickson's - shuffling-while-preserving-digram-composition algorithm. - - - The program "sindex" creates SSI indices from sequence - files, that "sfetch" can use to rapidly retrieve sequences - from databases. Previously, index files were constructed - with Perl scripts that were not supported as part of the - HMMER distribution. - -The following features were added: - - - hmmsearch and hmmpfam can now use Pfam GA, TC, NC cutoffs, - if these have been picked up in the HMM file (by hmmbuild). - See the --cut_ga, --cut_tc, and --cut_nc options. - - - "Stockholm format" alignments are supported, and have replaced - SELEX format as the default alignment format. Stockholm format - is the alignment format agreed upon by the Pfam Consortium, - providing extensible markup and annotation capabilities. HMMER - writes Stockholm format alignments by default. The program - sreformat can reformat alignments to other formats, including - Clustal and GCG MSF formats. - - - To improve robustness, particularly in high-throughput annotation - pipelines, all programs now accept an option --informat , - where is the name of a sequence file format (FASTA, for - example). The format autodetection code that is used by default - is almost always right, and is very helpful in interactive use - (HMMER reads almost anything without you worrying much about - format issues). --informat bypasses the autodetector, asserts - a particular format, and decreases the likelihood that HMMER - misparses a sequence file. - - - new options: - hmmpfam --acc reports HMM accession numbers instead of - HMM names in output files. [Pfam infrastructure] - - sreformat --nogap, when reformatting an alignment, - removes all columns containing any gap symbols; useful - as a prefilter for phylogenetic analysis. - - - The real software version of HMMER is logged into - the HMMER2.0 line of ASCII save files, for better - version control (e.g. bug tracking, but there are - no bugs in HMMER). - - - GCG MSF format reading/writing is now much more robust, - thanks to assistance from Steve Smith at GCG. - - - The PVM implementation of hmmcalibrate is now - parallelized in a finer grained fashion; single models - can be accelerated. (The previous version parallelized - by assigning models to processors, so could not - accelerate a single model calibration.) - - - hmmemit can now take HMM libraries as input, not just - a single HMM at a time - useful for instance for producing - "consensus sequences" for every model in Pfam with one - command. - -The following changes may affect HMMER-compatible software: - - - The name of the sequence retrieval program "getseq" was - changed to "sfetch" in this release. The name "getseq" - clashes with a Genetics Computer Group package program - of similar functionality. - - - The output format for the headers of hmmsearch and hmmpfam - were changed. The accessions and descriptions of query - HMMs or sequences, respectively, are reported on separate - lines. An option ("--compat") is provided for reverting - to the previous format, if you don't want to rewrite your - parser(s) right away. - - - hmmpfam now calculates E-values based on the actual - number of HMMs in the database that is searched, unless - overridden with the -Z option from the command line. - It used to use Z=59021 semi-arbitrarily to make results - jibe with a typical hmmsearch, but this just confused - people more than it helped. hmmpfam E-values will therefore - become more significant in this release by about 37x, - for a typical Pfam search (59021/1600 = 37). - -The following major bugs were fixed: - [none] - -The following minor bugs were fixed: - - more argument casting to silence compiler warnings - [M. Regelson, Paracel ] - - - a potential reentrancy problem with setting the - alphabet type in the threads version was - fixed, but this problem is unlikely to have ever affected - anyone. [M. Sievers, Paracel]. - - - fixed a bug where hmmbuild on Solaris machines would crash - when presented with an alignment with an #=ID line. - Same bug caused a crash when building a model from a single - sequence FASTA file [A. Bateman, Sanger] - - - The configure script was modified to deal better with - different vendor's implementations of pthreads, in response - to a DEC Digital UNIX compilation problem [W. Pearson, - U. Virginia] - - - Automatic sequence file format detection was slightly - improved, fixing a bug in detecting GCG-reformatted - Swissprot files [reported by J. Holzwarth] - - - hmmpfam-pvm and hmmindex had a bad interaction if an HMM file had - accession numbers as well as names (e.g., Pfam). The phenotype was - that hmmpfam-pvm would search each model twice: once for its name, - and once for its accession. hmmindex now uses a new - indexing scheme (SSI, replacing GSI). [multiple reports; - often manifested as a failure of the StL Pfam server to - install, because of an hmmindex --one2one option in the Makefile; this was - a local hack, never distributed in HMMER]. - - - a rare floating exception bug in ExtremeValueP() was fixed; - range-checking protections in the function were in error, and - a range error in a log() calculation appeared on - Digital Unix platforms for a *very* tiny set of scores - for any given mu, lambda. - - - The default null2 score correction was applied in - a way that was justifiable, but differed between per-seq - and per-domain scores; thus per-domain scores did not - necessarily add up to per-seq scores. In certain cases - this produced counterintuitive results. null2 is now - applied in a way that is still justifiable, and also - consistent; per-domain scores add up to the per-seq score. - [first reported by David Kerk] - - - --domE and --domT did not work correctly in hmmpfam, because - the code assumed that E-values are monotonic with score. - In some cases, this could cause HMMER to fail to report some - significant domains. [Christiane VanSchlun, GCG] - -The following obscure bugs were fixed (i.e., there were no reports of -anyone but me detecting these bugs): - - - sreformat no longer core dumps when reformatting a - single sequence to an alignment format. - - - Banner() was printing a line to stdout instead of its - file handle... but Banner is always called w/ stdout as - its filehandle in the current implementation. - [M. Regelson, Paracel] - - - .gz file reading is only supported on POSIX OS's. A compile - time define, SRE_STRICT_ANSI, may be defined to allow compiling - on ANSI compliant but non-POSIX operating systems. - - - Several problems with robustness w.r.t. unexpected - combinations of command line options were detected by - GCG quality control testing. [Christiane VanSchlun] - -(At least) the following projects remain incomplete: - - - Ian Holmes' posterior probability routines (POSTAL) are - partially assimilated; see postprob.c, display.c - - - CPU times can now be reported for serial, threaded, - and PVM executions; this is only supported by hmmcalibrate - right now. - - - Mixture Dirichlet priors now include some ongoing work - in collaboration with Michael Asman and Erik Sonnhammer - in Stockholm; also #=GC X-PRM, X-PRT, X-PRI support in - hmmbuild/Stockholm annotation. diff --git a/forester/archive/RIO/others/hmmer/Userguide.pdf b/forester/archive/RIO/others/hmmer/Userguide.pdf deleted file mode 100644 index 74eab166cac3474ed789a8c8f98e9e7f32200623..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 299122 zcmbrG1#DeevaZd{7&Bwc%*@Qp_A$lG%*+ro#EuzaW@ct)W{4d#=Oww_(>?0Gnx1zh zOJ{3quf2|?RrP=MSJ`9=B4V_RbWAYJgbakXMwWy;JTUY!02?zWb3&FMx0GP$#Vo9x z01kxoVpfJu01<$(tqA~zj}OMt$pK(!4db?Ats-l?$^ZxMGFCZdjZsDURAus^wXVCy zrQbnw=Cu>=Y-WwWt$D!OM83Tmj-6IgCLT$KpixR z5tOY<3;l&$J5g9#4mU^m)KP@%ZJ7whc;wyfpjmJ=h^vNyN+MQRUKeP_rr zR8(8%&d7sVgAM0nrLqjM+i5*@P&l|+aS3}1T=zHj)#mq}tO66vH#=WW<1Al@iNe^S97FOcL4*{7MEwZjkP=F|EtyeZ zjg&pV%_q!OLn8}pUSA56K5wmio^tVVR za61oZ?~BKbXHZ|?Er-I3WjC?7qe-}@Z$y>BO@IrCkG)r=g&`eU6>!Qarm*GkmJ|We0+qz z?e}x_@3A5FV~-y}@*^CWm^a>8P#>xOELhT=4 zRz!@DUIpOhM5y~Ckp8vFpLh8!0vUh)rvD%W3)B>CR~V3zpN1;87~{o~Q<=h^J`$U! zQ4&I>Pc=X$zB6JCCmFOD^?AkhLLieYl0`+Sp8U4%y6Nh=Ud>s`>HO6JF`o<}ha(@Of?F-<|o(U)W2q;H<)=J3c| zyBf&ZQ#?g0B;)hfL-y1X#cTm3W&xk1r$o-}UjQcFWsdg?eEGM5rm8SCq)j1)MD9HnG=Q4(F)7QYbZPwVTRjwyU;8 z4AM`go>XxMFRsmZXBH_5K4p`Cs2^JvY)g)Ne~*Qb{ViT+Hd=dT7HVP<%XeYeAeRcX z{(kGM3p!lhIz5QB9|dCu$C`E~?(+TxeqvcA6IN17n4QB@!^%)lsi)qr%!puC;LBmc z7Rw^;2fPu?D+!XNPnI@COsK_M5WcfDY^h+h(vbzOBND}Jqm@z@gHeAOW3H1CD=wa|+I5)#A%pTVJTwnNDB?bS%`b#GNr#G6!<)~7$C7Rxk*dRUW} z<*Np##{2E0zRl!A(nvzlTl;Av$DL{!4R?;=O>O6d!;X?iIOZ@fI;c5q_HoU)HLr|g zAo^NvJzDW-T9MyEKvj`ntB&YnO5V>vdiYM|oqOw!BTnoY$W(YX*3aL7RqTk;klcb5 zPhH;HO@GnZ+f(T4!f%4$G=1B3I4z5AT(XGAeT-?;^Mv&^YZ+>4Me5y~7jsRF^l{E- z88-4@^{~q8;ZET8?x7Pz zakC>i_nEYZtW9fn+RuN<7r$cWXTo4-WBom0F#Zhp-y`^MGX~>t8RCzW!TCR-_y3tP zey;v~!#|}A_Wvp){7f&uzvTCf@GH6eXHh;+AGO(KLksbzyGGN~g0=6_;_7t|FGmKb#NP3O(ggadkxAd`UwIv(iV z^)NB_v%AmTHcBKkv7bF#8hwW1_0i<9d;Frkp8Ce(VfWF0^!8R5SJGmAs7Y}a**TxX#v6< z+#n+6-HmM4$2dbmk)@q{PZmJ>TGEw!DljB)KXa5uwnUq9?1}5TjW5>HHX*8reH+F15qOeHdLUP#<*Zy%iZl}vn@M_mf0c4Z5$*CR@X z|ISK0=**h>dZ3~>cmAFmsu-F_eI(iPcDbNVR*T}psqJdhn+zo;h{7E7{&*YFdl=xs z8&&Zuyq0$A4o2>fI4;GCII08wL+4{bK3Zau>x_VGCM3IzrY-*nK847TiMt$Y zNq$G;Pb=UwL!R{9`YqwYw|pxL&$Ud$`LdOfNn;mZ^fZ%259(xYsKCCLl8zi}gY`oQ zE~EG)OC{PYg`2G|3TNJPl2&EH#$(lh0 zy+P16=#Ye8C=WAojv&JI_K-#e2W9CzI`Jv{GSWl`D1zlMkBJuY9swlNWZL*F(2#&} z0i#{!T!~~QG-_vAsZP{TGMBt28F6VX^3A;BS_59(b6H&Rmp7DcV0-G>*uWEw`IJ=9 zrV0C}6xxRBTlq)<)5iebqaLN?@U{;dh|p2u3D2Y@z{7AuA5`5_#9P=U3#nzIHL#!i zc4%Qf1ci=ByaT~CDvIbUjA%=E9I#BOp97cdj1t)^0FXl16>rZ%?00pjD1yuR^2*G& zFMDiUTCU3Yakt;-JM8&M{jR<3Q|_pr40;FG_|_FVg|2YbuT&A~YPF)kTXh4U+Q+$WvAF!iM>+3v;z;isO)*kE=XZb-Ml;v1iaC$aaNOyw-3 zZ`5IHt;|FGPBg5-&A0XVDohmJjb#Ecr+AFItdYjjGYFvO1hJnC9nW_na-zK$;GHFC zW}G~Vl9|^l5=oqD*ERT4OF}^#tv7w5ZVgrU^pll7`F3_JP+Sws%a)Ddrkt=z1Jw9U z(CpqGAHtB*i5(ah(T?f*Gm2>RlS+9~I-h*c^4bmw@L!yq@(C#;?F1Drf~Z`;MWa6r z0r8X7vloi$4C9O2?-_@*#&}q4qAOAD5^JVjQ&*^HmgGx1M;&uIz*ZlaentLh5=cim z%%r4{Alj6ZA1nIdA~PJpUMYK>ea|%-&7R;yIY_t_1K~yFriKxuna#7)SrU|F1pWd&^6D_@*&wFrKjg2B!g>6*rD@ItPObE`2 zhi>!T;1eR0O49h=ZkK>8Of&vz2GOeu!2VnQq^*t5QQ`r2MxVD%iUh(znZfjN;zz** zt+O<^ggI)ZrF#>9Yu~sCV8NF3!Q-yd;R8#v>@)#ygkcu?r%|eJ*5x%dX^u!OzQqw$ zcp|tX#2DO!XT6-zN>DP9cDXcHa;ZkK-Uw? zLT|?-B=EEWV-7_f_0iKvZbuq+-NMQ70iiTK;E3(02OZ<=PB5gEP4AoBAR10n zBK;*d{R+#Uxru{~<@eOY^dA}ZSAhR}S^Iy3RL1{6ssDWn`Ws?p`p3j78=J@W&adE( zt&>=ZUXZ%n+A0u{SEezoqBKhl+o4$HRv+Cy{_$>##`TM{)h_` zYQZi9k)B74G*DH>FuXF3OeX(gC*{WqBRL7_n??-N^{E@$(iEdeO;}gw#@!?*9g!}~ zDI$29M(EVK;F2bHzixb`%|IW=t8w%Yao`@El01=d{spGwry4!n?pzNf#+2r#{yFV! z!kQP8n7L?8+S>euLHp)u?vmmN4g9-tO-I!4O^1M<+iCXZ6@RG5@2vd-$}G`*h3~xS z`YR?>aEPFUSFf|>8~L}seA2@$L7$7V`;yZ^SBDQ9;pEek=p)rE(_ri&NrhfX>RSR91|&ttHBfO72M$N}L$8 z@^w~ac+Y}UrC^3s(d1r?&tderhiOZhapPt-Ql4#L8UM~a{B;FF5&0=lnkArtnuy`!Ou=TjIsRg&3~x<9J7eSD=R z6k+fp!;L>M$F_!Nh1AUu8jqTzwnS*^!#??-;X;t!6|b!Ww>mjb*JFv(;Af9&Fb}V; zX%@GD^M(54&f5URjnG6)GAK%7wEvwO@QJwHS@*&6N%%!Yr8@h7R&^j(BHq|M9G+$9 z#3k9uMGZ-9lq=81WoZT!DOY;!R8I!6l=#lQ^Vodd6p4Woa|`}u1zOFBThc#itkf0R4W%QpA)07TWPfJ1e+FlN zPPM<)^2|Ro{y%W{&Qk%s|is96n|z#%!1F+Q^{IR#2q~WceDX1T;prwNMFH zlHPBF*6YPEU40i>PcxQ9)PQu4usu^wE4BsJN1>FKWsuZwEMh=R--X3%Bs$?Pd4{u5 zlYmuF1i)`r%0bhC210XYJTav6M(jWxBZ$8Z?8_d!^>6f+!?Z!Kr&?92s|$x9U;(0PqdY6tr!6H$*Jvt ztl}DxScDjfNFscgn^GL27u2e?aU^H{kB@o-GpOZ5$Po^8-Qiuf$(tYnyMzPeu6|)| zU!I8W(1P#Ti^1_}`uySjNrf$vJ+PET(vH5FbtiaD1vA~ov^(fE#72h`PGTp<&0Af_ z_>4K^sh8TEO@2ju{7%|V8N0`RJtT~;kJ&ij0=SviPP;9JZu>^k;=JBAVJ~DvB$kk# z4@S1JAe4^W=WbW|wr0Zh=>-IN6=CEr5c2Ez{fs+Sjz1veXT9-z4E$|<#Pl0-{9%;+ z2$26%*!^7o`<8!%-QU(k|0vX$|6FkW9%}!vm({O6?y$;+ls!BxpQ}BL6-o3!65q~R zlGwt`$)bjs)c_e6MAJu_n~;ObEB@5A-cLt0Y+9P15`&-v6#2SoOH>WDO@VQ0ULBE0 zi1GN@szT4YLa#$oEs_#Wt<1c7nX5wfw)NhF;$F@9jY&#a{=wJA$LEoJ0#gyCyq1-b zE|F(Q^>?Ds~NWP&>sY9Y&zGiu`vnpc1 ze8If=1c_E<*DOnT8+YIWsX_C}oU2c|2k*uZk0j>#JEO+W40DLmt*1jKYodqZSu(6O z6_N<%Z#hz{wLsR7Gx8T3>urc(9qSVujtrm=>A^+Sp-Jn8JPXZ5+M{x5ci59m9EmPB zz?xy0bho7})}l_s&Jb3`e3R|tLoQAWx+`h92h!m0&#Ch0rM{2{!g*5}3dC&>@c`gl zWVv^DHBeM;D#DZ~{8s%+q05*c1~$F?&p@-1YREv!(kQPmFM#`rm)5h@>NB-u2zgWe>oMG)!9C)}5G$u}*(+{+i?#F0>5uAKT-4eb}W`J_RRxaU6tu`B` zPF6(AH)1M&=cKF4bd+RFJWgR5w&qgE$&X`M20S&vYk=Vd&*-qwM}u(1V}`1p7fz-_ z%@wqEXnGYL=a1VW%xCb;uuDYFzlT2GX013(VO-A@+r(IPOROL#2Zs{Aqa{TFD*^_8 z5ek|H^b{sMSDgkPHE=UzezsJ-=DjQ7rF$n^VFt+D-4mL8r9~%4)VgL0sMD%sL=gym zZjA+oY=0GGm=kCi8-Zd)d8nc$bV_sT8`SKjDJSDi6-KRvCdp&D0}v#Es+n0r&kIjf zcLJ%&A{SVLNRyOgj4Bv6y-qvnl5@q7Vc87JwTd_tw6^vv>&(tlq`}TnLpdaVr+99G zMGoy_&r*$mn3K+tnoDR}RNUEVb$8PSKjz4|S`>X6DAYH0TNXNc<6`kC@8_rUAi6Cz z{>^COur1!}{5jRULy(a%v?-juG*Czd^&z1}C`g^f+E+R%#*yrj zQzF5<7WXBN9&Y5Mx|+E57OQ16j&-$@S+Y?Z|KZyq-x<&JDk|dVFIIG9XA`+y;90n6 zy(H+=)4+qAC$o?V8+a4%4HkCL$JSI|Sd@Y!nc0}1qsAQt-eg@G5BnQ54v3+$EI4p2 zY~-_Gcq3W6E6?o(aY+FZWDf5Rv);b=5*=0>1lcf zFB+_D=h<#5kVt zG|!rRSMuaHOocf*^^helACBB|bA4G2`sV0M0u_|L8wqwuVzieU*8_K_FZCL}?U$t= zc|>^loynulfrxmF{YEPjeA(k1t>SX;#MJQ?QD7e{;je1*dmopPne|_N8Rq}o$^DJ# z{*W8y-#Rt_kn))Sr~Le*&-!cq-{=k7A0FJFy}3W*ANwDVuy7DE{{ql|=FR5jjUxi%VJIb|5O7B;V54#6qM5lk4ZK5acps+w^Y#ii zDmOWK^2s=*YA|p>`beEce0UZw!sNXkc2V)AF#5Vl@01Thk4-eQeSAGVT!<2&?=??~ z5?AYy+jU_IT=cSg=AVD0=u~xehmNj4&MmtlpzI61( zmX)U>*Fw)5JXNBK0fzRSq(Pn5V$0YHGKbn)$@mDbXF2HN0%)^phgfiBl3LQbzl$To zwg!ic2n6}PadP3Y3Bkw>FkZzuuuyY*_Z0>q}{Y0(No)#aaK$S?U zx9vKt!L4e=Jc{0`wnE#z5^l8$R9R%LnM*83q>x!J5ly@YzmEQ=O^z&V zE2IzHru(1|3?G)S;iFw-dkK}9a#gyH$HT_C;}#a&Z+EvgQs?_5iBPVQuU($D&Swg1GYdDIdB#RoRv^MQkIX(+(@!6`i$J>nKS;F0*@MDZhm(Yz^&l#=^2eu_}p zCq?HoKLklNd&nAF)qSDkLX!hhhHg;ps=ZZoQ9H@|KAr>ag(R(zl zwUt|u#`G4WtKUt0y-uSYWb890gh)Qtxg)Y=XG1@WVB-%m+jLQ&;kbscKZRM>j-~_Z zQMB3;*`kAZ2L0mfG$>;oA2Qvs5&ytcX1;F-BigcX&qC4|)eK1|{(jUW62;Slmf5S< z$|APWd996Gs6vR}M5t-WJw!)R(nrhzCnr&YkXYif^3u!IW`U)320yy5+nh3rEYgH$ zs-{4WQWdR!T2^m;dItFvIes>d0o#L>^<~fn3B-0mz=leu8O^@2K98pgIKO~9D=FG@ zyDF`4n(YJo<_^O*nTb&K(iU&o_Xjerl1LS3c;HQzTFs-Ly0Y|31te~{^j#`$BsZ{` z+u1bj4CoM_SR8@8rJ*t~;&PFmVTnJd35dx6A)J^6otp`*PL8P-7V&&b&TpY;EkplU(J!H9 z*K^q*4X<5m=si=mM}I4LQ4(W!6N}OeDGz$aY94Q?y2S&Waut$1rERy-#ab)|4i%n^ z>!Wjaxy#&pXkS$_+ERSsGV)bi+?BdYMmm+4 zcb!Mkq_kT2YGHN{C4uB*f#ehH?KjahgZ1@J0dJR)uNVI?Z#3pos7;-XVN>tb>Uaj- zp(wUjjSa_co{XizAZsxui5dWumI|fg2!rmuy0Q#NzvS@OagU&telo+3339SmbgNv=q@1OpfL9VAnDlx%Lx*_`@$ZOWFkCn)baA%6d zG78k#V2ojn+>)vsZhAWDj&L$GxT*{DkXl)$4*DT<6B>)GORSGacQdq+diFRSsQH#O z5fV|g`fZl%^l;|3?MuTq|N0eDQ-@`caYQe=HtYh-aQPm_O@>@p9o?-pdYko}Du>9&m{(Oe>zoI6V zziB71{KIy_{ts%}Z9su|YU(8J60BxJdXGZ3tn~cRhP{<4omVi+$bRZ8DD5?GTyr^FouES8Xmt$@qGr?OxZ4!s}W65FYv8Gzb!@<}7{`nbx z>2@cq>~-t&!q%6flqvPjVaPJt(1U}EK!_sv69hR*1p@YSz>?1%{SwP5Y zAzfP0j%}{1Y6&&-ce{JMwMHlq`skFqjYf9%O5Yso9#ocx<=EFzo5lJNOfP;QEe4}l z+UK&T%cZ0B4Yhpb~MStz@CN>zpr722~$vSw&FU& zdrVEsGL*1Q;MAcmZ|D2&>JwxeT!%L1quD$mywudS+fT7AoOHVxiTjjWdX55Hi7TZ` z%DNkoTtz*1bR(iOh>EvNy3|s(yG8sr%{zr5!D4I`Bp3^i*mijlL8qn`q=y{^3Y8mh^=1YWf6?NQ zmu~N1wd{JXo^O68-*ftru`IHpJdU=@9QJijrQ9}2QMRNEAI`E>6$$)4g6@NOSIGkg za(CY~52i!X&bItcZ;;)^pxY_>&V!;ZXt=IfO_vBkZn^wKcBwR-o+lcjVeq3cjzZoC ziRejgq>(v09W~b^Lugsq0S=a1F&n4cXt8Y)d&}UGDdt2WwHlQ}E62ocB7cHR7%8=L z=Cjav$iQlLlMIni>F>Tt9@GzKfzP+QBoWueqj~~23Ji`3v*;Yd zc$91wt+7(`;`V&HfAJr@kYJYTiwVPUK91{EYn<57eD9O${fOD-yu+F zw{Y~f?DT^cko9&$YzT$`ZP>K}Snaruz`p5K!+~*WdbEGyU`rmruY1%zZVYad24@UH zk=XQm2ILXp21R5;AuTHI(NV9O4pBpb(GbQNP?F3L1bM{+Iy=rD(dnJrxUnb z2Xsm0E0v0?<}R-&{(CW_su_Y86Fe(Xh@(mIEreP`(>?Z9?HCkr=2D&darMos0!ATn zJ>4PdQNh|1D|Iit3u!xG-X*ZDj9>FmLpAum!B&`?&vjOEP#OVYLmn)UBe|VF_V6}_ zK{AxH(`gfK1FvY9%Ev5$RtKd@g`_$}S%S>LHsavx#?^vBwuw4N(Ux&d4Jn?0!VpF} zrr!beN$RbxnKa^17*G2ywL?&ec~99% z!!`dV8L5FrWr{e(%2CwaQ0HewDe4uyL^9^f`uC}xqu zx2(t~qkYdyCTxq5&gmc_JFp!I40GBKOpi-Sz=~7kDGjMBcvUhfn{h$%ck z1u!G`&uCSFwF8DB$9b-hfeeHAOVAoD1jwny>?R1jm7W~EsN^IvO-XHJm$;z~6XRsgSkdd#W!J6S| z!QN6t8i83iA~b{|@{gZhZ#}%9LAlO4A1-3lhNC{_4P=GnoY};dDj4UV0l0H3OIlk!L~ka2UgF-v|Y8{LORW?IO#seYzJVCg>=i` zS|^6%@h^ePrk0er0D8Xk`S$d+|2uGH&gki13F=>CU7VcEzY`S8f9w|iLWzGPD3<@O zTlg1(`nCSAJ`&rHS+4&X+WL2b`ZG2CFQoLt4f~Ch{!yQU?JonwCZ;VB^v z4Zgtmmcffe6o&##5p&+p3{E-q?|cSr@yQ`uL7@)M79kt&+{BBmLldWBvwE#TXY3)l z$lYxpHU)mE*2JEe@p~5RGt=~bnti%?;WTFcdJSOFwuPVO`p)p({+s>l_2YFAA|=M7 zMN{3w=L%-UqzpCl=F4P3hD}vVWi~#-FlQ{d^6dTFr5SmRmoTu&JZGm&J3e1`XVJqf zG;1{t4Om^5mNBjuY|4GcgKJX8&;Gkt>Yrg>fK`|y#KoazqWUaD|BfNfOYydNA@Z z7d{Kov7f9mAD-Ib;T&v_(*VdP$hDjJf=QAia6_nezoP{tX~L9HM_N8w4Tcm`96=ho z9uuEhgW=2*?3pG2}wxpzaxVLbz*<(EcHX!E@^_aA(u9Lc0D+VZzj)3^XR5HT= zO(S&@JfJIfAPV&2Fq8oht3(Ic87RYwL!!k3aZA;9MiRn$D;g~C$Z_^NWjkc^{J{9? zb3#26&Dfc4A!Jwz5p3fg?1wS@I#T3_XfprPPsl24bzh5lmz7T!TE(sBl3ELShKD++ zLL>5~Pm$$wdrgf&^Ir`7oSe&P38zuPRI-SnZ1f^z0V+DQg0UD}9Jy6nJ5mV1gs2)J zp6|=wJ3ivMHxZO=@Ax(ed~wipHc8Q1)66;JGJDaoI1Lk;FI8&A-%S^zAHvFz)K*3? zNQV(3xzl=^9wo2MY@RYA;cD$AvglTG#QkXJt1To<-`=9|1p`iC2MNSav93 zc;p{|h80re76_#il^j8+Kb`bRwu~qUI(BH>%0!c6Bb^dBhw<1^TBLU!df62cLJ;j) zEsJA%>xh}FJxRVwLnX<$7HzL_XkL;u1beYrHF;VI>L$!R6DHAig_C`#b{& zguvSv%;F5r(p>Kb9~fB%u1t1r7>&vH8H_eV%H$JGG2gC=vskNGbCmyh)(J%(%`MUB zFi~^*Nqc-}ROTpCX~e^MG?UtZhawbg($g?8u;U68nyUf)rXEfBtDWCBVp882Y{hvNlFSh|U;P1fYGkLb@X zS2E>c`5mdAD>9DlPISkt%V~W1?s3*D3nCbe30}>wc+1BZv;uw~_vT$1TM;1ykq8BV zz=-UVx;mSv_f+GLI24~OJv|=p4)#y=ulA${xzyF!J;j}BpHW_Sty)^=x#Sa$77+ zVZID|!FLdD!WQFs)GEe8GIMR#+rl!4+GqFriMp7LFW-yPYYlr!nZZ(Lt=H!T#(RB= z<*Q0vmGr+8N4}5mYOK4_Kz!}7fx#%Y2?W@|&^Q{XJZI(E z0q9Lc*JyWPjP&O-Evd_+ne*-I1(&xQK?Kim-gP!p1O3api(%u;tELFkQr7w@MOJ4^0Cnpd;hEmi? zucA^X1(3`l#=TT_zP_G6$C^MLT;9z-$6i3ayqp%@6ZnJzh=UbNuQhsdfL2)4Olb*G zk)+$VnuufTM+gSuWnd4HppvFxXV0Jxg`gM$DJ;uSg^xyROL9-tK1U&hG(V7dqaSGz zudNnVAN6j+M+$Cce_I(xfHh5TVfAqyyb5zeLsLd_!=n%sXcjt9RL+foF+m)hU_hBa zhjwZXFLbXdQv-340(FrDn-*r2hgQxGoqAW`; z>PhHENF`NN@#M3X!8CEN#8%x;6&QtL*ad=)FCSxs*I?6OV#GE1;WB$3s-H9Vl{qpu z)g~gH6Fwob1pOGhHkU~qI`-tnh1RbRN;?uoh1U^fVa0vF0nK45q%iK7N^Q{@a36W& z+?i9ti`aPRkMLT}oZ`;5f#}Y+fyl9Ka`2dU9~eP)DMfBt;OxZR)iY=V35*$;y@^0Q zI^0op*3;2!`YEJDTDbKbQn)t^9zGxa!qmn*)j)VxY)2YWGpAoCTzF3H&2#_vh(V0fc&0;uOgFSJM+0hCqo-5g60jl zjNpVih0(e}Rh#-MLYB3;0^*pzkU!hkHnipiPn}M7Jg93ug#oRTG)yno3b)0<)Uv8< zzo{<`;w>A2izGY;36kCd&0z0=XEMn>pWs(LG!*P4Kk9xIVsLR4WRoV8$9UJNt{Z!5 z1`d&NiCps_{1as~*_8Ziw^panMnwpp&wCL)?99Qb&NB={^J2rRwCqFxO`a zG)oX}`|R=EU~rX4NtLz*I15dKd7uOlC6hj$826b)B4XO+x9=m`R0Z!D6!OEQLCvl| zN??g_*;KH_Yw>CuWy_OF#ijC)DT0SnL(d~&2u&sx?*xz?$PH4EB~wm+L6=PPXq8Ih z(>FyiF)c2bk{_PSlS!kdS3W*GO#@UTB##T2R$p3X4$A}rw!M3k#}juRQ2?2fqzCdA zX^`Qf@l_(UBWBVv*FXNSkQ1X%UJ{Lx(Rlz)XIZdy$-+m8!i&{i$N~j=o5@SR2ZeS) z{o1Xk`PCY(7=Oa5Ndo}nHkRDjV593C4_l+~brT*)Mb_I~exM%BuA@EmQ#+U=qo-Lu zb<0MpQFO|UPkfn3)z~nqtw7)6i#B6r-g#BS8e@a?cAb|jy%!nn=L>PrvDRQ$Q0}1g zl6c7qEtMNkZA(>0c@4TI;LWr*jEPSW75IV2T~Ax!;I_ZjU z8ifAvEob814A^owA<>PIJXokoNAuxEft2|JCN#Lrq;g6fPGD@X@Vd|p2hs@m@JG{? zH_9vXK-(y0W})Xge#0n+9z2uoFJG`S>|78&V8*2B0dwJ!YCXNbBT=2wcwUIeUdH?; zC?E7qGRPJI3b!zX#-OF4Cnc36w+nPjVB_c=8z%7pFUf-5*pu~0d|K;Dooj)(u#1nx zaveR#=*>gRNl&ZE3a-|W(_&5*?~wzey8YtOmoF3yp% zckZp!G#txW7CI!<*U`mBnRwi;6BpjsQ8_F3Q%&k@e^jqPXt{t z^^`P18;EBBt#INHYzj$>3l-=-b+jux=Q<=P2pqi0#!^Y=Pgf!#9bsvGg9ok2p?gP- zV3+0a?&I3=^|P}o1mU0%2sw{~d&;F-Y!aOt=;FG8oO6AZyy}1*B+rA7KS)*du@^}{ zB8b-b7$@5XnYl}8$oaHr+?KYi8#)j&F?5_DURUBd?i0T>H7W6ga%IneRT;C+H4;~4 zG5hw$Lzk)9-5hw{eS2orl!97H;>5^-4tp!Oq9*p?inf1()jAxeWmBapf-$tAcFGjc ztj2S+RRI$+EL9;UbXL7IZNp*E6?{+gr<9m(_mmcYkQb`q`6KxyqsK#-uLV=ZpHG-r zimG^p#%b@n$?N=eEJkj>tj|BUM%KBmE@Q&?=G!MnQENoBjOs-{2h2Wy=9)GLFv=}P zM>_t}>L<#WY>6;lUvataxQ!Z;>N45C#0xp4yD89=JJ7tK6jn$+c3GPi382zSBAF_!}9G|}fKUVM15{$P4)M_PL??8}F2 z*OB$flo0X2VeiG+a>v}?4y70ID+iVIk}ixqKVAx8xLq&PELH_r7 zD<(G9e@)f1{>OQ%Uv&F#AkX@LoVWV5{vZ2fe-GwA_@0T7^}m?6`q6;=%e>W(=How_ zw<>t|!FIO^`DpMJ{n4S=E5_MFYv!}3Ut@LFoK#$2NTZ-OT9oK&fXVRt;Sq?V&ZXYh z4FpbM%@xW}5Lv!BY=pztt)(`Gt+T^3Yua7V+4&CDozWSoj*=ao-n^#1dXP2WFK>6FV|a~}X(74K+2S`E z*cUaRlhkbqpGtR2dbIK0vh%bzQt6HZCj6~Dw|$YD~SNPDYi^W!i|v$RGR$-h^^ski&7m69hrsj zr~^?StPkO*1irb5(L=i$9e5CFoC14dv~8zy8IpO{y0j?@`b(rQlL&(!eMKCjqay_n z2iQD5m@Ky(uP7CU6N7mThUD&R>j>s4HV;Q~tip@gAjP#5kW!kb7UIY0Nzb~_T$AXj zm?C>tN;5Z!D6JCnBlTUOaqGzTI)=w>P1v7mw>XEdQqg^a(!J*_rU5Vo=km6aEKla? zpL%OCO`s+(NNRSTC8Semj=S@kWd^rN%(nQnAF3*~O~ee#B!E1F6-s^_NSHw?hLgk% zk31fSB`gp!OZdDCuHamRgb9KX@Ir$Znxz1~AIU)Y=ID30k07_<< zEWD@21i36xcQ}E|@L4yZd^Vzh8(3zBa>kig)`bQjsd06qh2n7AbXg(F=#5tx*#N%DCWbSVTsRzoy= z8A}36`=ms&4BO}c4*RL%LsN(L+NRLn*g>RolMh_)@wzI9uUjT9fNn3t3WaQbG(|XQVf_>_9=a zv?P^o5gA2qiR-DB^z?g~wTq^qz{c9ND~1=%>~Oiv5<6*(#(gQ`Zjk2HWKC#9sMTG+ z^PazGYQ%x$x^F2rU3K$cV#Rx>#4HnW7m5B!HN0xE|+>}pBy$c3PQv_d2y?>{ss#R z9Jg~8AAG$-$WEyQwRuwW3 z45tV>-|7s=xq#3M2f{mzQt>%i*+y!ayJx)B zL)qZDYf!{;Wd@a5(^mzI)k-1#rdwqoL?1=v+sw&y27um^;*PB*%h4%Owu~UutFz~JQ#F&( zChd9@gFbSzIi$uAQpgC_Doo5P(=qH&6J1%ZjX&tnZCzVC`r#<}%G2MJ6SXhxFpM*{ z>x~6ht0$R$up=t>Yf@;Z-4PesrgENPp!Wd#F<9TH`kk8zgRQ|{gUn7>!nZH~uyJP2 zG1WL+p1zhoFYJ9iw+G0E0gYF70hLdXw}jPKqwm=dy3mJ`fUTA3(U5?J=LXLl3l>~5 zJ2azJ>}E|u(YqC<_@qI-Uv7kz9{eKf%*kzlL&VhU{(!cST`KI_6C-aeyiG(I`30SJ zG7^HNcgaN5r-%q1tQ_Q=zik*ER6E9`qNa`vUU)gD7*NSFjUU7|VU{qLarwt2QgyY~ zix9ES2mBA3l%MbDaviWC_36vwYl^kl|qS_@b_Q z^KS0TQ()PI6@6c+lT-<81lFjMQvPA8{ZKnOyYWJ}bFcU=srYVt`j!q&_@I(eZ)+OSn*z6^o@I zpJ|2pU3u?a7|8;ZCI=*sDZDvUjapVJqd9HUJ@8Z+MD&+A+hQQnNSqXZ(d zc84JGXmg5+Y@Q=BDreVdb%$YA;k~0|u0ks791k6~Io4NKW$yxZ)mCQYa)3m7#~g!p zw6aG+h1iiW6zxKQX#ebdSRhYrRu@g7L&~Z|o-|~|?A~wOFX91ZP}nmX+8_P|%bja) zsKHnW_Fk!Q5IaYkMhSCnKh`$FzKq@w$o{y~^1FEbu2-12-{*M0-8Bo8g!!-h_NTiC z#mMm2e8n$0^DnIVxAm+)z~`U&?a#o}|1~fFi^%@^&cFIuzoo-|XSUyU*uVTtI#GsS zvuJ;KlfP16|4YG@_1_4ttpBCpTA*$fv)c>{_OSaA{;UeVWO}!=NF$#Uu_{v4!L|lt zOMVgFE0NgW)R?6CFm~Wl&)1w!&f&bXo|*eIL>^g0a!)9{wt4po$;BT!`SAJcAoQS$ zU6Ykgmxrgg|&4Y(JdH;Ct-il}*)@-H#y8?N}$9Pzk(Kc&)ayUwF#q%Ju z(fmyAMn)l5I|~31`@Ni$ekN~fC0S37n;X|G5gK&3uH)X04@WKX#m`4PLohpiP&#@{ z3xaPYAmGH*+d2a^{PrR?a6S8apmD>!tHL2ujz!|9FIs zUOAmIYT^MiUjF=+L>$+|tlojwp1g5tdN<&#%d*nxXGfYhE_q{mg~jD$tYm${L}Ml+ zca1_pr402MPGYxh0LBOzGhwW^7YD+ERU?Mh^<#e6(_bzgoCocvVU1fWUuzH zUs%5~Q(ehh2@=B`<^7cU5pdN->c6TdzeKe#pmNyh+n`4I#u^;)l|9ZKZhgblh zu|I*RvA}}xjfg)uYRc56weT#!we6Ns)C1K$1fpWy5Jr(IPS`|$HDN>5s3X7C+y#?_ z&yQ=#yuuax-HT*v${62|#*~l*dEa*`;*+lMhP!mQ16)R^?33ll`BV&PmY$Ofi62`E z<_*dGT1mwM@VP(rs8-DP0Q83uCG+N7 znI;it1og~WH>&I@zXzb4?@ukLPWaj0Bi}=VPcpdi9YO$2rqq`kH_NnGvG1oLE0(3n zfGLBI=NlU_SM5!cNez@OO|hgMlIUa9IdTjXdPo?%A{UXKZ6WZ`0+nL=7w&uQuNRYW zF~YLBzJ2gP%wgRklEw-wg{|q2mEHW?=u$mu<4Jc-Wt<)7KU$}+*s^S;gN~I8XsSUO zjRHY&$g*Ql+#|=1CzBz3$Tf;J!qH;MGD&&Q-{*u7`)hpDN-DBbEw{|0M$6Q`6m$>r z5G<0wL^?&NY?qu!uO2lP4U+|$FRRF}Fo#(U574^Td>5ue1&u)3zuu0%8gC0CAA%ee zxfp<`J!O~qKGr`5`QybFg-4B+fqkAJ_t)Ijr`0hObG5Zx_aT{{V;~&5yjrUsM^Zv9 zhXU!h>WBJxL|oxYMBG9_-zRQWUYt5XhR>?}^2t<;h4qQi?=wRk7O*ZzB?LJL-KI7}ijM6RN zn|qYqO2_o*?$soOA&~Vy!`!F0qxOm0RE!0FF*}^G&5brpvp?r@H_dTP9-qab)jL;!FpqA9 z4s{$rE4el6*WaJbKdZ9k!u7MhQT6HYd5f0ao|~U1m=nqNoYOc#xbKh~B+e#BHXBop zU*8HOp4DKw6e*I)zdUf0T;~pv1r2yBxm2Z$aooMoHB^J;)yWaBE_eZh%der0W)s3p zlgueD0pZ~ig2Ce_zV`Mu-Ew{GDz|qm4+d)}IK@bbRT6;z#`pCOn7OzCOhX+47gMv1 zr`5>OVYbltT-B!%$Cva&wEVjF5r7*%@-+gwsF*vekNcir>KMxsM*)D)2h$y#ABZG| zHK}C0r;5VQt1+nw#LfL^WN>C}V#;UV9n_QpZyG#UIFo9VdQE;(?A#6$Esv_t0!Tft z$Tur%j$oVx+q!pAW6=R1?*=i^XTA_%{cU8skFbE{721BH|l2gO5<3&FT= z+q6M)l9r?S*JXpHjMGWG$Hlia`}dR#AAo3;rV}2>oi;cqTW$ILt0eGHpHJl!s|URJzOyR#BkN$=`f7O^`$}W)G_VDP z3TOEozrM^nI8B%`BLf{8=KiS-XUwc5bzzmWO?}CO56}`~p2XNX9?XCW`kO3@B;VH> z^NtLv_3qSyB5W1Z1Rc=jd}pBjAP6egX+yi>*jSG9B*_;=`FXCY+oQ)v4t^m65{yOzKLp5J5u}H@rR2WJXgl$Ewk)PjzCk_3H z=mGpb_OVN`=raX}-fniTGPht=6k7Sh-|VrapfPRDoJ!wIGhgdX4mo3z*zE0}p)ztj zk2yZ+o9WaH7PKGB3Y``yhJYS@H5<*}gP*a=G9L z1;4;K$;)mlY+Q_3i+MWqN{!9TWmzYzIk6UYw92#-wky!&ah*+r0yHnuaF?IHW1%c! z3n62(*J9i6oe*KGG{PXm;wUJ!F)kHQaAj1p{h&R;Wj1~*bcIuJnv9e<5J#D27VjUsY-|J?C7O&5Cj1hNoZJP^h6XVyRNvy-S z-4JZEJjFeehXJoeAn%fpCcCHSbja%iAN6YZ5=$Ms*h z?+>#1n*q-}Y$#`22sb+x?bl`=31e zzqs#L6X3Uoz+e3MU*+Cd|BbfC`d?~$zq*uFtE`A%H?H16o0Nbxs`=s)XF#ActUXx+ zq%`rjiuJGft2Z_9U)$rghyGRyxM0IR*}-N>siW{mf(_Y&Iow?{t%16hE02VVlrIww z^h2FC*IM`VaJh{^W7P075UgumT}^Un%kH>LD;+t>>{1oPygVCPf!5UB{p7t`jDzT) z{oJIeYHI01{Sk(i@^-o}#io@x(z(QbCXq z)8p|n{m_NY0_QvN*9w)K9n*qdVtP)EAOXV}mGr6ywq6Z%aLwa+O<_NUW78Z&XfWJT za|g;1oMTtG0OZvM=hG46^rR2X5YoGgQDdP1^6)5^VXmA6(lp^DoY5A=250{xZp~DS zhEsN!j{6UXIWA7d?nBAfkGPdfJS)$2U#h-DVqQhJNA=k%$80e?1jZ>w|AZs{P&kg^ zX2xJXfmQI`Ez}Rpo+37@xXWyR*&wyEFf@;pgZ=~)yoszH-Sgs+4V3wk9f6d^uhVFI zGhL|fL8+-)%Xs#r_PVB*>j)M&dCP4!Ut4+PPQEH^GIdy$8l*m2(}bv)z>#&#;`;~E zX-9?vBMo>mycopgIOXKc3Dk@}F31Kl?r4d`xkM;Q5}`v7o^Wq6%(K0DDRK>Vj$csq zI}Y{f3gWa%6?{`geQd}a3`7@ZHdG2-77$-Jkj{vxt+yR>T4x#yEYNc1ylJwagdQ|M zr%rp_@1ufBN|M;5C1dXk-6}s~Cv1S{6GnAo3>7_m4`i3i*OD?NoynC*TdK(MI8b#Y zJf+5GbvsmFkP4l>3t3Jsq;Xlm5#F)##Ble7rwT;7$}xB5Kul4QgxB&lBrnHUgfKdP z=+=1AzKhnc5T52Zl0palf~j8WFFt@ReI+hjP&*@>Qw#=>%ockJJaz7OiAc!!kyBrf zMq0#aaCkYq_*ym~MwRcT*vis)246G!VwnKjbg$!CRRE>PJIzF$g1~Hrb!wJlg)Tv{ z%t{~Z4qAKkWbyt*YNXI@G{b9_@fuYE>cg?p?MQIWs>)4ku&+ni1Az6O0#?a}sTR^Z z7@{w-$wf~+!UXFaC0Nac4KSIqjYX!TcBwv9B@L4mUK)KVjw}~Q$<}*u&MW;|v~NPJ zEIO1Qi+sFfmm+5YO-9Cql>!U=;#pojhG53Zh{@6)U=hct9*DY=EiHXQ`d}h*W&%$v zC04(E-&aWtWT^EPX_v)%#p_jQT0;*hrE2ec6~ED2YAi;TPPg%)Rt}D2{$9w+b!!4k z68E;1$Sa@OxqE`L(srly>8)F%tGNc5LE=xtn*`LN zz!)rhQP3WF-zDSw>LLs5vhg}zyhV=XiJGq&dHqn|A{wD710WLC1Xrp9Lm8Us>YYd- zN{+NYeHbDEH8m86xN;$vqC>Qq_#Ht@4}#1B=fZ3kkkklCQ;9H;Vce1 zZ#|ra@oHXyYk!n+C|btjVjaQFqu&W?dbN`+FosTf+;Z9z20fW-K&G1*=hRx6H;cjt z5?A??dF`Ltmd=KIeyMD1W4HUF_!)SI>pjnea{Q^)QEG|7@W|VuW(MZlZY0nBAIMmF1v-Y8) zN{^w{YFg-da7NjXUODx5h6}CCM=oh4Uq`2DS~@zu#?luiDa;{wTPy71+q`(jvSjXi9@RB<@a`hb zNuF;-a^Behn5A%jyNH>p(?)LfqknzjK7d-wU2qu!LD7=oE3BYWzY)s(}#63S>X=`5{RwL)#Af zg%(|;mp#^fOuKfUw2@gkZpNGDRJXhO>OD)|W8pmn7KgjC} zcqKtNal94W66E;Q=S*X@!SjgSA+9%|JzX(qpExV&QDhBovJY*7v-|b^Jp8evviiv- z{Di$buj(jp4^4rFg7Dt9?_L-A+C4i1>x5n5-cR%wP+V=53?AG_)>^j%0-2kXdhq%r zDnYP~ho$QU!@`D~0+Jl|@t9_aArxOoFj0hSAPF%iuF0PG0FVEbe*Ug{F>o;bW3-j^ zKdN4TAi+P=&;L&K`d9k-kE+-2H1jL0_>VEy-y?_r6AAtQpdYs1QSi?t3AR704FBn+ zj&=2Q`_)#2EQ!k-5ydnFjx^v;=Q@aXzqCE>FAS&Wr43HulO>8mMK8^ce+w#hel zd(KuYHfmk2YR`vZTUl9e?yrYxmq61>aTG{#N~;&nQ4=To$#-B%L19*LJ26PQ!D~#b z(-=sDea-3iT@%hkNMMlA0Sop{^sfOwG*aee+2+QiUaanfgE!uV1G6gYa#3hybdq4x zpP(|@f(F!|BU%}NVcFDf$8-CNyN>V;&B#|(PAv;g<3y+?3^Ca&7W3h+6A}yJdM~}< z2dNZ}b>&d@w!=nuxBog61h^rxhf#FM7RbfO|=A=(+ z)|X(B&&zG+2~GLx&X4_^(~xZ(ADMMALE$(aEg)28U$dO@vtd+8k>SvT7VBiEPPTk1 z;C7E$xLK_4UIGYd<36pKon6PYy?0g-$P7r*5Mhh{Q8_uGmM4JhMg(x>91)$85O9?Y zId^<84~8M#Pml=cLAwytuHCM=*o~}1M1e$rJPv_?*d5f2q1_Qi#$>n}8BiE0=iG%W zv&gG!?6;s>5XAH(22~T1l95!3tsXg>B|b5bWK>@;Yo%g?GkwMpyIX!Zt5#a>5x^tt z`US$uj}3IOfs$si2j%-0(;{8Wzqr7o#cS>fZr4)kmj5)0HxYwPZ{^>I#=%5(zV5+| zWciLEUO_pu1O35hH@5ZN+?F^$Ei7Ein10MDcGz|2PHlby!&ktiwRi-!n|qlFC5#Cw zI2?$5p8=Fe+@p`;$Rd*6$`sihev@9Fw)5&fh*lL{C)IV;lCWj;ReJjgN1l(ltBlvkQ!pg#5PmB7C#Qy+#j5@n|*s*=cL07EfIs!N)ueNsXm_zSF*Pw z@tB@){he4Rem=UZ{CtDGP#+7`TOvLckM!FS2dzU<0~YL1x|f_7Sgga>3dil7El<6# z_}J9R$xh;+R_yJrwtz-3bL|7#Ll@Qntr3P!1s|S>uCfz5cKV&WnpwTI34JyG>`()Tv0BFC3#_Jo;J5 z_?+fMp6WGG%#^MBd8BDG@wLpz_BK70=@c9g8c>M-)ed#E6KgV11tXtF9S2eC>5jPu z1-smyp?Q$mI(yy~35&-Y*lm{*gQpmXA-K9+ z+6GLyr%r5n(EhnDO3P@<9ds472svwm#9r?kT{i$V*kVXGkd|0=HCiPVV<$K?f?Z+s z@{vR}JhfsUr*S!btRkbH%sVv4=t<#z?diiFdyW^6{TMjfl2v|34*WMCnIe?*!Cxc9 zUSc&hW@`#IN4cwU=OR7_pqEbAqxMbQNBumo^A=hT=gga%!X81u9!xqS^zlMe^4I-M zO`M69&fmtokS0tKr$ThSo%{htzM0U7ZozLG_?p2FDI8f&@E@9N z1PAF~Mmoa78?6TmId`M&vIhOD09J+euGW;C5#Ex+VUDT@QRpvF4CNPMX^vaTyijK-4O z=TQb1#~n!PuT+nnA>om)?hy?u0DsbA<$%W`&yW;8;9 zXSKnP3qXukQh~C=x?8Q&BIIu_14XhYL%l9fx`I3z^f)-Ins(wR1c;s20h@U%@@fx> z0tVB{skJ7%)6L5-nB5+=f6W0K^xC>Iw_N!{r{Yn?s`y<64YWbk(Ol|epCpOi>WN=j z)h(bQJ3Q&pTLur$_VbL}d7uD;TF^1(JGmp$MvKiTP;p2l8DLR^ShXS3j?IDk9DUZW zNa}J+Mff~kX9;2`x^bHq!vi{%ps!Vu_bsNBgu7%{MiijnN)gLgrYwI5KkVuEi{%DO zJmc4xmac6`RdUFtCK~oSAlXyd6l40s$J0n)(wNDg(FGsxQ=I^4F?^oD3K06wTDqM> z#hHrmWx_BUo|7y4Y<*3#O6Okf;;3|S0z5sB__ysB34)I&oEh}?Nkgrb&dhlQGKLY} zempI1*!L0Ga`r;&98Y;3J|{J#eponVJ*D24pJsz;mc}E!Yw7bHX3|REbDx*a9)qn$ z#(CM&%W zj6tV3*E3pc0pqRF!~M25kgzZs3Fsw(L!CnnKS)J*Vbe=YRT9-OYG zM=ZY{4-`Gmzm4cpqs~H?WArg!`wK^BU_o-QNGno*YQ#>}02m+Fu5VzW8?UHrG@Qp>e%1at$(Yi{Q+A4#FT7*mMi~DC-q;L@{iB|{X74yllnVT{-uij z-fsD?6f(Bo;q~u~_-7yIKQZDy`kxh*SE@JFHmzC0lF9nn)N!N5VJEhA!^}0q6_Ia^9P*T_Rj?>d-xMX~Kg6`}!5# zb`9+@YeAdDYU0Gafk8#u1s_blLrNyOkP3!E&a^R08@$UVQ7;y`{gmvgtebP^i39Na ztG44RG9QgMh_=FvI%0^ki_u}`yY?FU-B~~YTwrA!I=c0}^Zoq=ZEqG|FbRLHX5Zt{ zjAXjQl-mF!o_$;sf*23P+||>NuFm(np*D-5FXfV#i^o5GN4PNNXZ9Q z4BNQhsiZAey^Ui8r23qN|9Zq&=SiYINvL(--ZGWVc$P^fSE|3Ww}FQR z*zW`maNFB28B+EP>p8JgY2a9n?$%6DDJ@XtMYQ*(8fGMi`w|w#wFA}j$>>8&kc{Kb zRM=~y!3tP7<-5AL918ZA%<#w-rWk4B#xd4@z;lDRsMq`Z{zR5BGUEW8&=QfVV!X%l z4aBYrU(4q#TI!L3ig{q(?om%N6HRF2)J-v$DSsHyG(w?4#=^vNFhL;XNDNZoENXD7 z0S83ER^;#Q0v9UK5OrK(AHXq_oXwfNbF5gci&tz>ZLtHakZNEI;^ECP>m>H#&aANSmp?;@X04uA~F1NaYI$aWa-Q zBS&58WL03YbMSTQP31b$OO?8H@2ZhyHbK9T_#j4G)?VlX2bXgrAlwPYRube8#$f)< z%bq?Igyw31Vnwr-6$DsNFzHqgVF*`mavg&MvZ$D9QG)4#`yIg>Ux}Z|O1PrbVg7N3 zK$11!3D?ESdE=liDwv84(0w@tmLqr2SaC3veuv4i{vy95%&IT|#Osm?x%Vk>_8iT@ zlH~UdmA3o>U@>7rM^8#4Zu!bD2`= zDKKl>JF@3t@5v%>k7?C2lqd|ZUcdp=<9e)`y!0{0sf82{M4a>u`$K`E;e+g){g_K! zf)k1NDzUm>330(#Hvx|mGN&+v@q>c)UzzE3;U{J}^RH0@N5E!71ecf>*=`sqx zc7c=d*9wWbY`pmIOc4StFD39rT=y>K!Z6lzHmtS1L2InUyJdGNY(W)`ww+*9Juf3DZ zI?E71ufh4F^_m`yIrhSkU>3=wWv8(%DkKevnwW2)MSIW?#Oe71jJd1bH&#Y!U2$7G zY&<{Uh4v~apKK<7AjLZXf3uf7U(QA{@em-FRY=}oZWI7Xn78q{36f6(~i(mm6EE{ z!l&w9a3k43mbSU+z56tqW$+OIsPb0D@S0sV$0EEGh(M$Ny(3FHuh<80Lgvx-gpO+KVL9YBoIcvgM1#^TeI;V0O)ns+wzYwMk% z2Gw<|%K?3(X&|TT)<8p!yCJxC1;cxtLy_~W@_cSm-jg&CZPT*-8B6;=6XKtxjX$2J`il^M z57qrHFaDOn`<)#BC)M$HmGYO({J+_+W&a!c{{6@JlPKB$G?@PrQAS~`Mz1xZ*lxa| z+#17fsW^7w!}*<)jc;5=Cp2d?>hZ{a#iI?H7bb()TvbgbFUdD4n#lQ36 zeLRiBnRQ6jR+mfaxTeg1Ln*3%^S-;~tpr-7jbd}&gOzBB$g7SvUyd#MCOZ1r7_&=W z4INrFo5NCB^U4nAJ@9!XpF{b9anCy(AQvfJcfS%D$^Jf?J0!~V&j>Va6eF;kRld68m>dw_435nEmaVpZ%TsA=gsG)CwQ zPXQO_K?0fDzTXs79^B-0;PeB;LO-WRNFK%Ac)Y1ynidKN$x^0x3>TDr2qpg|}c6`dD=Yxb2iSEJ!OBh(nttXGlAc#fuDk9Dl5 z5xZ!)CC3jHh*o-#?L7`^W$XHtr{^}!{dob7TA_H=p2BJ3abW-eHL`p+J?vj^DQ09m zs;z~Ghjh!=m5+Cyr%Lc%)@S5>e}0g&;pMG{DVkq%l>RXE98zWp_lva@MF2Bad~db7 z)#V;DXY1^Rzh}cOn`9%9llDh57zN-#0kj30gl}6P3~$QAo|Q9I22tj-n9o9Y!>kZy ztoh=6y$PhP()=;cE5>G!rKKufE{An+PtOGbc@wObcPV!Y>T%Pj#oM}0M?vKrP>W0j zw0&zDHLE+tKrly#jpS-QlPC;;J;52W9!s7Wu$$^qB5-BLG0b8voLD2L$#cCi-^#y+ z7-7i~>Ke`0lGDVdbMK#^_2ruoT^ZXW0A?4g?=m*m5W@{qL1eMKyi(wyAz7u~0E}n6%kxvr=9~+-kWC z^)OG-g0MLO599=s0kO`5BI-p0Nre-S(g~F8=92)gHuiB~q0$$yT~Ig1VkM~XUmRf! zqYsu{HN0iF@WY_KC^m4Oq;ey{GqCxKPN<3*i9!_b4s@wh3RN%#T6v62xPV8e z`4oi7Kxf>|%LEsh&xVMp!%;N|YgU>53@L~|!zKl=9^~I^WQcW$yQ|$P-!mLnZ4OQv zD^nx!i2#x9#2^K0W52)wIE5J?<%cc6P4tu>pS4)7t6cqZ<@{2b+G=EJLYe%%kAfB8 zNEV3~j*`Ua5<11Po!=31N+HG*+PCNaqW~*rJ9Pbv2GY+m76fiBq-rXQotj)BWj|!r z5-sfVinH5w%amPBS63sOeP)Q1!xpm!?dZX~SwfxN0Sk7gukM2fJcryPFmoSj>-*ye zDDx_Pss)BrL-?2x1SC|i6(t~%FJMn7T)@JMftwm^?l-D?ud|k!7$P4D7>gG<(R8iA z+;V*OAwcx=f*_jf-&J)-!7Uyqd4e2pT^J}H5D(A#owAGhE+fcYpTgoMzBD!sfu@Ht z)8zvJ1MB#jz>jGl8~xl+`_7ER)PrJnSunaa<1pT*5`UV39dNwLX^!P5;fCbh#QGv? z(jLi-8=I2C;Yb23O8#P_0`eGLGSi);#v>iJFKGv^i<%FXKdjVga7I@<8A`ls#FDo= zkT|``DF6k_Us{Cv<)IfJgvJ+wcrm8SkK zz&N+?ed}a4>9UiLkB>n|62Ul3ALELF%`)1nQEV>j{j9AD1{>I_kVBR#YMa;ol`*}vsz zkH&rw>#n9?bHOteKlhVeX}6Wfo3>-E?_{~CU{`Ug>#zx!-KAro0wHSy&sa&#qS8J= zrZq&r#B)CJhy|FX-sfpz=Py+_0A@<+2JRl<#JHCi$zT^1l0Tht={zF1qYdP{+}_{? zk3j@jw=ru5`jK8*vkc~c6(={8@Rz@IG!XvL@*u711%WoUbz}F{#1uksI`wR?BomPX z#&z8>?>R`*xS!9d5Fu_6127d_Mh9@Jmjf|JEK*h5+y94bAi4%<0$6C*HLmy$>I;W- z#*(X819xP2a_>|*A_uI5C#Ik%Ie5VQ_FX%t9WoGq>$5Qv_OvS3I;Gtr)-zZ_+CqhW z*`2ehGL!4W4An&p5&B-vsahiJEQB?Wbm{dGd(J2@ay6&}g8kB$YQHml8^J+VEqo*- zHUy{5&6a_wCdd^Mm@nA)($2jW^!H3d`)J0nD=tl*qx!XyyVBGcCX88iRRFDCYi{&(#^_7a~`Z(CpeNA2{o z_iu?9^oqK*AH70Uog(`SL$gB2QHVFa?m3Q8DxTi>NWVmlXj zl$#7L-{ov5=Svr}+)P~JbtR0&i*we389GTDbGd`tP?R??stHOx^=d8~ci`dcd zeMVQbUueoEUwi)LnaFq40ouwD`pSr}$c0V~>Gv1cM$|swp$XMLk<`f z?3<08o|^z#zNTK3R?`;eN%oafUfZkGJ4M!w4Zgil>q;TUX248Lk>W+^b|C-+q_N@O zCJHif0k^P2$1fa!mt=q2qCb3>XTrM8MaZvVD$0WaM>|3wpzwAZ7kUnf!P}gz51xHh z9RV=AJ3~>C`*&RdxduZ;`w_X51kYU}tGJ~o5M%HMs z1iTl!$FD$B(&0(&dFJXpM0?|LJD*2kE_#a2S*`K)>-yJAO3GH~>xc_`hm;T__`&_J z>z(f}*XJplX>YVp0gTj_xNwp8&7X&b=inY)EF$&C;BKtW*hb8<^LvBC^D0EX9XO{w znPiJD;WK!99GQudQ536Dw-L{hxaVuq7i;v^%^2rC#;fzKwdQLz7FKMakE_IyY%%y4 z1?eWKnPMVbk9m2(mY1LJPiorwSOWrSH(v+qD%R6qys^aab#{%}MX;08f!CvaRZd%V zLK;?2@49-K@6gw^*&4I4E1_)jo7Z*d-w#>`KG&=GRwtZ>SWsY|bh_jkS4Xi`mDgW5 zC59jE^!D*rQ`?aN_d)Zg)cyVC>{6|8FLxc8eht~6OF^+8j=;)Ay9FmfTkI!YcwEix z(W^k0yalmmSap7EZ!Zo(9|$o;-J7ZQI^Pfji^{PnXax6Detkdp%CZdQ$FRY*)(#i{ zuq^wwXCYEhJ@DQ3Hh`;1Ma&RJfd)!$-q79!KOzh8d2vCei0;!)E}Ur@5E@j6Wyrf* znd+$?EANE*5|kY^*n8V(DAXCb28q?t8u^Z@;Pqyexve(eBK4dmu#?P~=^|;qKm%SZ zE)CZVhRPT4R|{0W0TmomqCCj&rA?s-tazH9XB0~-k$;m}RpI1>dpy9$;k*T}P_iFq zR~+71%S}X*b-ps23e82 z0Fe7qw8tUS*Ubb|+Y>0^x)iFUzl1Ff+Ix@96?L^|?BvLQ8G2`Sc%BRpr;N!;4qD(6 zjyj56yscKLA0RU1LE0Jbs+1ghguPAkjG%InKt%(gB9;BJ6de8Gxk0?sH3^r{cI2&mhoEgLftr`K4oBeJ}LDG z1c({elS5;=-AqMH8t_nmAYnc{Id3Ai5zo`;qT}GuA#dYdxp0}?jZLv$tag_$8T?FYv7?RRa#3DV%U%ZMw@iVt-D7<)GxvqBEV5Y=$U8S z<)Hy|MYss3_v83$m1L(8gc~H$V}m>7%`(Z>p&CX&O)y{Ms||IY-j?=|eO!5z<>k%c zzI6iA!zP`c>G+2HumRuiqcM!NwTK3KlN z-)O%So+N)VB3GfliFsZTf&%WmAk-1n$=#i2bY~e}If2je$S2J|K_^mD_^A8jjC0y#C;S$KzrD zG~u0LSl^_ykE!1_taK@1e>rSF+>6W z4-pc7CQU>V08+Ac+Xu9sh;>r(iQid`LPnN(`Mtkc&#?0Oej^Tvffr;7pvK+!FhvnP zl^kpufh)?g+xDc33mOHR3K6R#zAIOxyiZaYcc@csr~Emv-jb{{zBqn}AZq@*^k}*$ zvfW*CtI%akYcqN~i!~cQ@ru^Mr~=CJ6ohQI+Bq)Ww#D zK72ug&*fZd6jP9X!d|);i;8U`)FDKwt><^@QLt{W&F~ER=?8I_`LFJWFLlKVROlHy z_g`H~SCZ#hrm0aJ(h7k5sacBnZyEsVC1c3U|6C9g?qD+kLm>s}jx z%n|ce9xl)@qTj!{Beq>hh^cgmeYsvv6sq8w&GA2s@D#B07zXs4V$WyDsHicm)fP!e zvcKcL>w$<}yyNJc;zyEk69}CGf*qA>=2(g`uzdYC>5u>B&98os)iuI$CkU5GY=Y=T z9wzwHs-n>Hq{B@Z(`;*rqAOV&NRJg&lLXEXAvBH2thHS?oVA^!pjCFRs`!ZXc%F3F z2x-C!LKhhBvbtmcW_^D}jh8PP$RIaqGREe~Gcvbaoy)7ir|yBa=12H<*u*V!YUboy z(R5w!+?OjqV0&#-vv4;Ms$wxKl+HS*0Zv?pRALJi+-;Hp`X8)-&aExq8GB0h?igir z@o7gaFxU(x8_|U*R_e$-^cRwCSn-Nx^{6N#_#pY~aCN>vMQ#x9eV!U`P(?|R1ffx$ zh+;^_MF@oWJ3LYwoC99Ty8{Ty2}pt*RyPHCXg z=?^$N<3rhH`2Dcqtjei5A?T*OV83UPC$$e#8s9WuWzOyRo^}Aq4IiJsHam4WUpDMX z$0V=G1|i-LyHtV+T3e^;;)+>(GTpGXc1Zn*mfwwo0NGHiqbc$}YmS93yr%gYCyr*RLY=Tm0iJg0(iXGX&g2 zk4DAjR%)Q!o(6X|j2JbmkPylDB)8V%w)d>ZS&lUtt$VBb7Nimi%wZv!eqI2Pc*M>7 zcaQbfgd$dVd4;jIO-r5%IMhzcPWpk)n2hd<6P@yehYS)Z_tW*3&f}O-(4T=7qTgEA z*lw$D%Q%X*=IG>=*BYj*5jN%~$!vEd0IV%+$Yoxg?7Zs zAp8Ah>pH!XNQHYY*>XWx=_TZedHLa;GxCcGRm~H^clB@*!~OKoAg3aW0aH7Y%$$Y7 z;6n1%1GqSivHk2^>PWtg?wOxr`s?kFF0TR#>T4p6b5A43ow1P*Mx{N3lfB1Vd60%s z;qIBq?-k=pgNPcoza18y++dxB%fJJ|>u|D36qge9nHS6H3D8J<5eN;sOGG-itK*KK zv*4RiWgO#M&$%w$po+(jTmaRzuiK)UDO>8QWHW`Mcr3rGZ7gKg>uE3v4&aBuHxpTo z@Q=nGVt=U=AOi@Y#J{y7SZ z!S`6VhkIXgNrIZxIj&ajv2e#JI~kSBok>iOf(E5%$a#TM5mszy_9QH$r;uejV5YPu zfL>IJSk$=Akcw5Ih19Q6?%JO~y;rXewbTr=Q`_8{g;bmCODZWV$DU*i_I4r==TjgF?7eS}ExxoKUIEl!ddrpjd$Q6?50JI^Hh+rrQvbYl z8niz;?C}ugh%x4I#AT7cB&=GW|Mg~~aX&eOs#0kUt|to1gh{EUb>`P(n&8ppJOpsD zN!1f6Ar2VC6oJ7kTrtup{!}=c<<>x^R8|1`E5)tAfhThaB)U>+e$J&SaT22pYHk?a z1#@Lc5-iAkTb!9lv@I1ZK!+4jKskqIVAuO)YN_NK>cWdV!8aC-#7gqeN|z zDl|~H*NbbK=ycLh>A^LT>L3U~NR1S54;NRg<3r(Ie^6qPc2C(cgXR*((FI*3pWHVQ z!)IUpHHppCR|4Qbzc*!;15heD*wouC_xYyQk_^h=%S+RDn3VP9#!Kz<2r7q^-X2tK z_EdbSAKy66SiUyREr3VcC1qe+B7|pnXM$Uf4>rr1)NfkU_ZE2?#tY8}p6JgvklI${ z?Gn=$LfE|4$z|%C$mEByCY0?7fo*%;F)~fp;-pZLYPZIgX@^tWr#U(izl$Rot(dw@B*=xExW4ZKiRg!bz8? zdGr99Hrs+Vq`pdYHFcI(#9e6^#c&yNYQj)b)~{66h;F2ZtbX&f;BBcuFQo11;Pp+^ z7+de)q4T0gTP3&kJ`w08!TdzFeC(ba1%yu@6bsS(wLn%vc0P0BT3!3_5Npo)Af_3H z23t50wP@p&UwNGkZzu^0Nh!jCNQ>h`Uk z0pVe!vdX%QMG@oMbN+Ri53#--Vgun$oBpnrj;W{0w4s_f;ku=DRkQ|G1Zb zx2k?+v;J`n|NSujuOd$DziR=1Uc(%J=Cl6OHC&;-8vSeF3-rORv)6_S*dY2MrG2~r ze724&L8KKAh}bR1s#T_(nj^8DCv$U2q?4^kJ)hdCY;m^{q z$3!yrem&ayV8gUArXRN#mf#^HxgBdf$|e)9XqePyuBnmQQ=lRKc=3H_)mOcZCeJ-@ zo^iroH~7`}*D$Fc#V6qFm%vAys;mxmo$%=f=hDqHy022~Jlo=WNqZc&$TaYV2tzd} z?V9iULdx0jJs5jNgc4Fk$j-X&RxOev^ro7vhdgwVoQ;K-t~J)Lzv963=9jAH&Zkf7 zg)Ae}dsd`Ra2SA2hJospH>H>kYitYa!j>vktQ}FRPvp`rQ6FFIyPXvGdjxtF9&Sq^ zMD5I6U%e-l5$-$&%Hsy~v?APuxKs_c-N`&Uqy>wJ^|wq~j>e`2klCYQuC%UDzBMUFpr9d`?cQ(m-q z8u5c0o*0UP%?R$(Z@JxSGBWJvX#86A|55f9V0rDz!Z7acQYh~3?pmO@ySux)7A@{j zio3ge@!}4}-Cg>>bhrELbI-lsz5nwNvR2k)W|FLxcQP}X%-5%d62R!BNF&|PEBf-> zFweWxc%2~bDM?DHK9uzf~ec3o}=?m-m#e7pG52)^Y%l504_B2N4QVLxTqZ1pbOjjk#mPH_p z{ID+;C|$S*4xdK&uD$k3ccNW1K%K_==8My2Ff35ELayQ|G@fX>DY^ciT2Ws5s^Nx@5V|u0_3A7n-+nedSLn zl7SxIW*{r4N^9}nY2ZlSc zS58E8rSil`p8sRC`GN5`#%>3(Uy24)NH8?DxuW;kK z>S)R#ux^1MNylPjbYSRhUs?MKlRmQA_o6y!N(i|%1XPUs&p%lTdzX@^C)v_3?o@AQ zY?5rbQ13&Rg3Ke7GSSa8ODc)4%NWD9z8|LkTFHKZVXE~xg4pe^fv+*^7nv(Q*RH$v_E4FEi^Lq$<6z^mV|CF$D!ga{ zVqoi4Sflxz{fEbHn`zu&rBuc+fb+gmo`F?P*S$U$R+Q4>n0bCK=<`p%^J!dPPLm`? zn1`?>l6|0HBbV&@XgI@_PGBK1S_AcpZL&!|$d9NNlJfymaOZ2@%-yx*pap}~%*VWK zerKuXL^3V@E;ZD9T1F86=|&Y?Em_WZj?c_v<<=d#VrJJiH3L#zPA_uecfwagT39C} zTL_NrlXb$lM@!lh`V7%mIUfEa;uR=)83kAgj+)_lRaLgaDHJ6c@I6)?ePUP;{Ijpv za_55-?eZj4mra?lTW}L8hgy-6PS0^iR8H-=wUY)m4kXTMBEzE2dt^Jfv8|8hGSNr% zBgz(}OAR-8AV;ku4^h)k%o=%TPQ*u>!-j+i-)FxK4qi$ze-jO6U*Sj!oONhti)5np z1|NBT$IXWWao3x1NKXQJ(XxB)>YI9yCrTmIhb){mWazSti6QkF{D(#?Qr}idP$E%i zH3{?GIGD0t2Pdp|F68i#NFh5u{|}V7KA+u8#SgYCR&LwnD1Mjm$0jVmVxl{RrSEq~ zKZ(kd^p=94LV9uO`O?ngfUA@~Ffs_`L~yuniS=y_pz@=Cp@DHxybxbeYN`g$)X=_G zyiwrywoXSsHDA_D8sm@qBrS4r9qje8tx*ivsN6r_V#8(p2s=-t_aZq(Pv6h?>TCFZ zmnfznZ+C2kFmC7>o4q|_cQ^Xz^QaW#_BN)iV&gekJXFMOKc2;uw4 za*HhQ6>0T*>|xQ}!+0-FQ+4pUlP>+S-|7`OCJ2!f!M$5|33x1e@blsCdvH|+a8&j2{?Z_2LbFI{|o~5 zFG2DDB;#@Z(*x^|jQ88;>VK2*qOcYnmN{xH@L$l=*h@YXxC8O*(pG1tjaDdf<4#PU z@=c{3C>cDf<4*O7yT?MCEGKfrJd$cqc_AmE@!rMCGry#Xlfh( zL397S)2CLj$K0^E;Jg#Gkn7-;k=!1LC;068HBY#evEJGRPQ6jaHtm%JYOtYk9Ap%A zcM3RF)qVsmG*;o<6CjH*XX98 z1?M1^Nlg_QFBbQ0mhPsp!0vi-vsT;DdFsn0whJ^O8z-_@JWb$ecys`Ht&)6I?O2pxCV?3madd#IcB2xPS?5g~XIxL-jR=zz-T4#q)9&ctfnga|+f4qbS!OLf|J! z{?V?=0!u~F||Uir|RUI_u?bW8=VnF>NN^T?HVa*lduGXYRDPR85!#1%*}< zo&}JwoN_)dG=&^CZBD2$Pnz?mx9puNg&mdf7O+F_T5k)N&Y$N)Lt{8|9gRyXfsvsdit^nANFpy#8TQTcC61*e@oQJspQUWohqYFY%M27t_p8vu=)M{U%Q(T`Ppd=keJ z1Wm)&(ME^Xb*I-0pmhjDd7fE z9-&yyZc`iL32Tv+5GCVvyS+GWjzActofH;UsdM)mbLUnMtz9a$7c$cH1-3~3ol9et z+Ef(N3axfazq?LC)l($*<5jsGRI0W8{5+3DtAt_xz#XN)Q0bC5 zTZhPgL6=~Y%)H7rIUJ#b5h9=1?7T>B1p2!)4yk_JF0-R&<3v7R_mGS_(eRwBx9&wq zN$rW`iJ)?9fM&UM)Lv639;+ znPM)S@2Du9O)9TshHa*<+I|27cD+2;UNHK@j8+8-zZ{p?fW|;@4W7_^K^T_|l0D*` zcy3r$yaJCM1_fu_tZroQ3B22E?k4rz3qIqcYm*#dz+XypbspPicJ4{j&UymF5)ot& z2H$zWA`NacfT!1SVf`BU_(WY&X$_oDG-FJZk+THDd%?}I)(A;Ba+77?EO=X*o1ULZE|05Qib^=EtELse5!>ohKk^MRQQ%znF~hpCiV z-kxBt!xR{bL6WRdN2x2gnZtsK-coBUQ1_W*T;VZ| z{XpBKVO#;3Z%bC#4wzl4Nw`lB>|mc6O?sFkwM_z<;I=1q0oH}O8^5aK%L`nCTGM0k z5gu`X>*Cn(MkANmFyC@uc_~7KeiHcg3vgD~GP6P>Ws_a}$tc~@d7Cbj3k};8w)-@?Q0=a*Jtx(P}eL2@o3xR=ol~a3=eMo?^o5Ujf)h)yHSJV6#9g3y zEr#=-jv@c8%=k-?`ZuK@=kJIS|3{_Zf69#i++p|^O*L5q3u8kk7tGE-FC=8^MxaT{NC5a_VPmG_TM<^1|T7*)Qpk*vsBBXjn5P4L*PIk#KA-Uvx9%D z6=9hE-YLq+@K4|O|94y8TKxZR&0oFoh7r&gZ~gFZE#%>Wp_g;8HB>ZqBG4qDmlF{q zpjR?>b0PphiMlz7D>~^r851xv{1UF;I?u)laQ8=`hoS%7GmH%X$ABO^nyt~st@dLR zsu^-Hb^Uk5i8N4b1fbtDrR`X1ZUkfrsL#(l%av&yvbQ>Fz&UWGCxzKut4q{klto>d zelVY}KR(ca&757x)4W#T!8 zifVF$@|TE{Maw1y3QwM%zLMAH#|36O5YN3MR6&$H%GnnjNj$!COX#ZkS>-$uj&;9 zTlSg0v)7W}f(c~*-}QuUSUR}beilRZ6fOo*Sm3-D6k>%n$D=Cz!tYuosRV8 z5Nj49DLueRd0(qjOgDGatpijsl#lAirs3T&a&vR>es3t*5s{D#@fDamqzWSY{Jl{ z(+(jlH92@kN6W;k!yiL|L`p6Wt0cNlbd9lwxKHd!Gc+1`4cLvnz5|W^EX`Rxbx9$R zSk(MEW~v)ZB|uDs0+-gXFhBhx=zCUeMv_A^3%FOqy!|i)0+tO8?2=dqI|6KKlxTkv z?imRfj-G*wQoWz)r70c;eGhYO0TRkV5pj@TL}S17atUZMM(hg03u?KagvAX26JfIv zlI51d*&Afl(OVQ?EJM-J$BN80;#Uo4L;)RpKo3`+v#;m4M#S)+-n^-bEVAMpOL4>M86PfZgZYyzEjPASO zU~U*~m{{R{DukcClxid5=N_kWm=OYKbW#+7kjy#a1d3-gy+;RI9Bu~52@uGW_8cBmSBP^IPP(r&;zR5HVn7yy@)4WH z1H8X_0U7ucr$(*?W9E13QTS2BxzQl=?;0N2*GC-oFzR2U5?KpUg9PBu(92V^G6G^I z@I4{obaxTUru*=2iGnMtu)sZH=@O21jY3_N3VxO7n$CyL!LTCe-y zH^c^24iATM`8bDkg!)~bt5a4&<>B@LXPxaF8?3QA8=4SmEiLDUEkCrUcQ1x>Oh_R> z+4&SnqAayL5K~h#6VH2@y$lPZn zN*Ca9G-wLw3j~y&>`1V$0VgCG!UWtaQLpj#Xxskw;aQN^s4^;}49YSE#-A-dJaEC= z+*2jNk@(4Q3{@z^=67YqpngG#`c4h@J{!kiLM^VE?+&GJcs(KkI%{LORbtcEkF8DO zVv|x~Zj?>|19;qBy7os)`EDE73ekCs)97 zx~fx^!gPQdEyE%%StHm%0+85lV8Gvz?kpg29qC^3H^*F*o- zs>QXJ9|zi~MSSbckk$3G-Sj|qbAyR39^-VgCeLGHyiS+;d?283RY0q=c>HpSo=R7)A@ zerPq~Hz1WFca=jp-HyXe`1G>G4u~zS*`br81*2EqF32eXu+&5i=l1J8VUZ;#g`+`# z>$f;$m!s3)9tYWRd36qSFc&t2-1&~FnC9j0tQ&xtd zD0&#eO@#m_82-sm`wNi!mybw&t$K>!O%nY*ZF1fwG$xZ|Ed&IiA+@A1<5028{4#)$ zl_e>Bge8-1Te4}GWo@ug>?5h&I2d5dMdL`Pfr(({Soku1neuikDAFLzadvG*ZXU_s zV!=LIL6OS-u!M+x2Q}Es6k*X!PM#=L2a_-mB#_#o10kNttPYC?)Q>D;3}s^__(0@e zi|G|iNXiKD^jzf* ziG8~Rdtt5GcFX6SUKOYmoP7L-gX5kixkr7Dgy&lOp0za>?nG_ojp(BiFJj$;rZK#R z_dJZoiFFsvfmz&qn4{HH1Lw_pGLg91BmGK}xO|u#s7MZ8Sy+%WKn`9A=jcY1u-S!m z9~Ocq7*$BP?%$O;KkW4Efkb=vI>IfNcw_R+!^zloJ4S{c9)eG-Z+})~JzhJ4_hto( zX`7NZ(1yFQzONr(zu#eo^(jANf7QT!_nf%B^-xBkIMoY%e|}tL!iM2O$=hui&2zmZ-E%#=^K&9- zl{6Ku?~b2d-iqEmYGRS77u-;}Rd-JI^{ro6xFWs2e9HGkxcAkd5MK>#R zv#sj<7+ic!X2=*?aBH3h2^*dUHeR<;a3`K!7(1R_oF%to8U~(SFgAS8c&U!_)gcRf z10PS}?D+D%=KwxIOPwnSKE}9xECjx|9Wuc{z>Y%Ltpj+kk0dZn-{8HNJObn3`N*%j z2C@ipwLGc{n>Z#>LH3RaWGO`At_ft#!HTEj$j_mx<{`3dg)WXe5jkH>Hfpi-Ont={ z+pv*h4qdpv7rnUZhdaU zSA}hi88$~*Tnj1M<3sBr+V-7eIZcbn$(-ACX$NmEVOrOsv4ZW%O!8+}^ups26n_;L z2`luGME%Tz093Jo7LFjlpz1zIp*clsB&=ZpeEoVIB*amPkXf0OgR&-+)|(*D6^=Lv z1BCQubRFx3fGFdVQj_|A)HBEdkm+|CwoPm&qnbk9Qg6o|ll^%PRID$-gtp=A+6(U^*m*QUrLF>u8-InDvReD-X zgsqPmRdluFG?N$x29~fY*@6wNN71?;g6s7uqxR}^Zd=tP-?Phn>&Y~|7klhlp{ zPW&6 zTO1Qz68O=!X+~LnV>$Hz2rr}sc=^$3QH&mPgb?{^>bn%!OvVT}{Fy9)54-ADA@-9X zwZ?H{r#QaeXjaJRA#*S|UpzX+^kcj~F*7;=_g+XJemwXd(B7z&h?uu}_j0aR`UO(? zxrHWh%}sr5CGgz;tL{Eln+uqdUIisb>p0LT&yo_W^sQw=nlm<(sB8%LdWtJfSYCv) zoexlKPl95-xv{nIocY{wy66r{1AcsqkyR7#B7FF*p@rz|1ApeU!M+-Xshf|vWBVhvF zu3i%lLqsbZ7}I0V`$)%4w2To~&QV1P{Ng(@?d@`$d=2LCa>+iW!lSL)t5n9djDV|C z&vyqnj3~q%l^a)4K2Z0Qu4<^`0lud+9kA6F5!`t~Q*q-=UP6<(=X?o@#Y|Mxuid^y z3-3}Wm^_yg~1-$3;IEXt%GPySyOm@57&c_IWA+S3#DFJ&MJs6hnvQ`-OXg zR|T@zoYLT&1TM`{jF<-G@q}yC{pb`_rG*qXj(u81Ty3dkkS*DFd5OI!6FwMylcVx= zGzU$>U@MS<7}w^a7!Z}R)Rh?&F|1Es&jLA7P3X|{Qb2Ng9XghL<7ryx^8Bxmc(6Z% zhHP6N#i0aH?S+3f;}~m*Z4e9IaB#aDORi&ru2(lm{vEW zC=Eq~A5aR61*uZn-XtasacDV#reAG!g`!)>C(_;`Czwq7{<5((wnY*1Qshxfyqpf^ zhUvi&K1F>e3Sc&14fNQ|I9*U0`u?@<`^pgylFjU=d#fr_4A_tQ_HKsj2KoiA*Iq{7 zh!2Jo(F~iw z9|5awv^LnrV6HiS9)tk>$yv0PctGq@gFDcaL7q*GMmmceBtIA{KN;dzoz+Za2dqv| zqU!|`kR8X0EvPIP@$Fi<{yZ))j9pXe=sTh@iX8X4OJY-YWYI>^)af+n2-M9zQn!=h z;GpORSeF!gz1LNe`@@)OLR2auf=SG~Iq9HvBhM~{T>KV7Sc{IYc4=Oj1%-H)s&2*_ zu6c@b*ybNxt!Q3@71XPxTUkEbE`d8fcB$6g!1d*@^#I{^*cucEWqZXGSn}3KbzE$; zR3Ns}onF$kq*8g&hUJ;c3|%V&b>7VEj3V(sr^+U=K9d~<*eM2$6Pzu6MqGe%94GSX zmZC`$j4TnBQg+R+vg}H*f3~VL>lp|IS|&Mopi3l0pL;nli3KM`$*y}BD8eu{tK(|p zjpl-b<&Z3Nt}MT;i9_jSOKz@BY&yWI0A-JcDNP(&P`rJ7NCgb?u|Qc-GE1F^6tj~&RF?V9^XVP`xb2s&xlySjc{*I4 zTY_Lb!E)%^AB?V^Qo1)QOvXp$`Ct0-sWCmLdl3gb`b){1y|U1izKXV1)Hh}yV;_D` zy@@)#+BpClnz9k~mb#5}+KLIBtCj6QB_m()JMs^>hvB%`^drivh9cj-RE;}@57a)c zD!{ua3VI-joX33mBrRUNDAZgW(!SPZ$Ub{slOt(ZfYT5iFgmE*>$=0d;+&@w>Bh1X zDYAjo=y1PM(@9}!+mbLHcP?(f>U8@g)J|E5=!b@%Kv`&*E&t<6n@Zc?m;CeT?gt^f z_j%eVhZQK;OjY8rqaNkt!xoDpoq3`;RNL|gwh>!>*{LUDNq%Kx?uhb>RFjpTM3KB0 zW}OYmO-edSHj?!zujrB{@E#7+^{MDqkdEoiBPx1=Xxx~;$Z;l80Zxw?yRj2fBf$>o zr94Y!M>$mRQd?<2j5?4#b{>PNK4p`f#O5WG*`n37N%o=8`+AMm-?+h0io*LOwM z{7Kt0zB}UMIMpYUcMz$dWbwtrjdWpT8#59=;9ertW*^9tCj(XOZPqX*D1 zzm}d<7^%4u=+{8Lo$5 zh@?NqqcSv7_otRaFu5kM@Ml%&3qDrOr+D>I;-aKZc}&_p`kMXz=_JxLUq4F82q=|3O3z{HkV^Be(z3uc`vKf^0Yz)wONhrl;!Nc+ z?cPXc4hL&@#nZc{#IEF!Dx(o00kXCeMUf#?eoNziq<_!vs=SyUg^_YW;Cmb2E2fO9 zc9zADY);_y1KTA+2Nz&I_4rprOOox%4j$ju+O)k=-wl>c`EWO3@dw$jMN${Ig$i&7J>*`K{^yh2}3!^xs-x z82XR;)&Qlu0t*bik^%v}q>-_Wlev>S485$J3;{s9t|&vmK*zuYxDX_8a&R^V9QE-p z;(wft-}w7mfB!}SP*F(V(fF69nO;FzLPSA|T35o@%Ej2p+)y7t>bIuf2B7%=%>X0A zUz%wVV@E>=b2}$n2S8i?cH3}pbP_hxcOYN}a3Zb$`-X{?6^35b+{nqy5uo>GV(0jk zm;opLvI2ZY-Vz7E;g<~%2%Hh%Si%hOFZq=KWvl>)=QsGb`#<&m)cN1?zu^Hm-thcN z09t_I0-RO{AoiBnnE-ytfb{Dg5Lt&EK^H@?2ECR2Nx$F#b^hS~RSt;L`Bu&W=zu@zZD4*4+}j}im-LJKzu$;|Jp9A_ zpZwR`0BHWNHwMs{f97wD{dtrAh6BLtvK+D9$ z`qwMO$;|j>-V(FYH+3WcOkRSHhJcw1&^v%BL0I2T!r0u@?Dy>(wYO()m0{>5o%F5D z4FzpXt&9l(Q;wpOv9$^TBMT?MkZA7cXl`TrYeIP&YXBm^l=Eg(qoB97F{XENwWW75 zb1*ihH?ehgpckSSrWc_XrI)1tKrci8kzSEriSn=J08ERTO0UriEx*L6w4pba{>g#VPCZXD_Q_1f=su zPd^@PxVSE+@~OzBo6#^R5s2vsQWW*&P=FB^%*gFPo(F;vNfaAxfv8|&K*{mCHZAvY zgTpaS-S4doH}82>aE$hxparXTjjUrtVE_A5;Ms$$Udmmc-$ zNDT&sPmvl4|bp^1_nsGBw{2Jc^WZNNPR?I4M@mQ0T1cs`+h&VFp zi>3lt<#Cc146yu*(9pU8g%dx5ztj8v*q>j=`+U*4est{SKBWzwLK|zN`r{!Be+N85;#1@N9DY8DW`80!-0Q?L4_nWVAuYIOP6pPK9 z&GQbR;(M$N$GZeeedIwQoPoqGAtD=qR*8|oM zOX^aXu$2S0>9(IiR~cQaHFiRH%4bosd)05WXyG&=^kfza`ZpZPxROlX2cYn#hBD1X z`#oxuWXNw#*#8K&Qt{$KG*xrp-%7@^i`BXp?Zq#|gMYW?-9Y}LB8pYWea^{-(Rd{% zH2_i3D3jK1&`(0{QmfJ~4+^OPf*{;L`z=Su?p%aU?8Rgh+u^BIO=R$COyH=($}pEV zphCp#1R<}}kL}V{=)2O!xxrWfsa5Zy8~iJrNpxty8DSM3AK&4qiR#v3icJL>D!LRQ z`fksNFgxMRN^FERWJX8;!y){%1{}0&3U#qvLU8oehQIp8z&q(KsmyyUTdx45D)zFc1;BNWlxJ_w=EzQ%w?H@4zP8B)rb5VDT7smwNZ9pdG82 zVAvs~^2~32(1bPb$NdS6HpfJk1Hc!8P|x@IRqmWPHr*GkIuXKO=@TCgQaKovvbbNN zzH(g-5nJnev;Yo=fo6}KrHO@`_SD0#+97wmtwa= zj_i-fX;qeun4H8mbUBpdaY7*Pzb4s_?DI7#w~JkAS%O2Xl6JjN@2$YH6g^t*eFXcC z2Sz$@98&Lvt?G_N-gS(X(h>1G-nwe49$inxwgK(bga4vj(M^U0S_T|k)LJNI9_$LYp>wuWv|*Pqi~O)rcy{4qZ*WF3T6rF-%>!RE1! zOp>Gjh%D(MDtwvavOXzrPNyMO7Z@g;5vbvCjuQ0D4eCq?$HB0>FLaA?$H?d*ozDpx z5&zf$`!h^a-SP>4wH~r^4P3odZJ`rhfekTcsayL)+ie0dW3JxOvSc2`Aw4z|5Y&Go z;=j%6zhrh6W{y81^e^8OK;f_0G{0S80M`IH+?w(2ckaz|g#hpa@?YIM-hN~Wey;(3 zIpzGp@k`=7)?6d=fZ{+;5ckW+7A>T;+le`#!JjXBL`@f_9$GPNRJTNl8d6WEM z;*UQGPXV!2+fijO93(qId*+Zfre&@LTPb8xOlSY{RuMef^d%Ep&!@)3t zG^2DjHqb|anE1*L&2wxTyvul+m|7JwDHUt29rQ5_*KSvcNBSc}L zA4O7?+m2=b$X3}}L!Yh@y>QOHJz+hK@k+(%i{QSTY`5rnyfizgJ!^G&7*8(LTzV$} ze412q7}{FXggXp27~p7P2a_lWT7f>LAF|kFXz;)gj@$yQmpvT;w8XBd3VptC19$Af zfeY&R@J*@KW6q^r_iJGyGTGWL;mn}GGLoHqKsJ=$WK4v^5lx5S8KND-w6emBHq(?Vy3_d zb*Zdez>yohN@GQ9?>6e%=&_#=r6b~gQW=#qw*QLb?Jq=v$OY|GVmgEDBEH#WW5v{( zyK_j(?f}eCpDQ?HVx`Z<625qjmjZ9C&xSA1|J8^2^j&EOkCLCIpPsAnMAV{k)jEvW zl;0t{@=k+Wqg8#gDMuzO?mX=A+y_WD8UEfg>4Eic5sEHb$gya(t+(VETNZ5X9#p6c%T2F=j;veE@~!*^^hw5cF}n(Z>w;9oD!!qkz5c?myOxS7 z#=gGT;-%E@*tc#+1w?457+5NTg=H5;eJrY&Yns(^RdUGL#d>GXU9YOjn=dTctedGJ zpKkVC&nh}q4a&6EJPaY`7wR8gt&UoN1Y@<^FP=3MrKnTY6#11q8uc1eOje~ZVA4MZ z5rGr~Qvxf+;&Yv^;=AxC;|Ep;CuiO5WQ`VvIJd7)E=6-;hXoxLKl=+xmnD&UbTbHN z82Y(j_j6LlpVB8=qLZDg#{%Ll0$cWp3O837nG%Ao zVs)=3=-#f~!q3urkQ(is0C3%L93y=gTTzTai8h+zap`O)Md%{?hs*4&!D9&A4l zZzX)U8b%$KpD`mZofuwLC4?{y>Ybq62`}%wX>yjWbZ(`Jyp3EdEb&_N_Y;sM#)&R#Z)Q3pyeku z*o=((g&FB<_$rQ`Ac$#JR#5F(8qc6(qWU$rAlOFm_UPyPkzn+yd&1r+&}rWF*E$C8 z7M_-6(qxo{J&%2&w+ftoKae z-GW&&j~{F6SS}MTl@YcP$)2?dyb!y+vr?vxvMIB{x3~+0`b@y=DM8`t)DDei@rn~b zX@|@_*HOxfTlWOO#bAfLVC~sc);8xg&7&dp3S4EXjK|g;4uL+_STtuP2EZR-89M=i zybn;%NQAN|@Ck)*Z2CN8+pB&#I9GKnFqCgZE;?!FLOXy9ld}1sKQ4sxk#&%ZaIf7Z z=%?E#Ir4ovrf10ANxM;-f)W^3;$fPK%DH)&e^Zt7wm!whtYJNDTJxh=pc%^G`1GY6(QHK@-KQ zN$=x?^5QnL!YD_y~en>)D+}t7NI0mIcgXL`EoOLJ_br{%~%}uk+^hu!bDNY zCZCGhrWVaqew%#`fr8O1`)su>ff0#)U7AJRZDZsLmQgSR?9kl*ES^b|`J!(}c#{&m z%c>L=YJJN*c&Ad0V}?+~b2(cM=(1(P7352cx|z2%BmuOq8TtLGhWK|dj!tx87A!uI zc=dkmX(uaZySy}0>1=x;FO=rWs<1NV)<+1mI;u|@59{g^5 zETi-tFJ+F>veS))1$+2nH8yPy&&Ih!+i?@f-GWjtgVAb5A}Ws8XO6qdlfI6+I&&QR z;YPC&;cVux_Sw6lSl>>;olXOhrS~22FTjm^^QlT%Xq%qwjop;#zE}$_rb<5s8r-{u zdyj{+;Bv(I7QZ^vd^)DlixuI!ai;IupXAC|zn9zTNRv(j(oK%n3{{cYzVFIBq8uzo4TR|T1Zw_ikhW#z-j2nJh5}gcqn#! zz_(QM(8TcWC|+?oI7J9!PPqO!bk1rZ)B+W52JLYPt4orgJ)=eqc485o7>Uw}rNMrK zg9>$;X!x4bEao}^YMLALSxv?KNo2$gxI_=o65~xZ6GB43H#SgJFb!MdOyGTZ=s-RPN^~65Z3^ead985K9kc7&_AQeo!82s3HE9zHv zuY?V25_Cm7t{;QpecGE0Tt1L|h10Yaxm};ls^DTz3J`sRy=~wjvxGn9DL_ERy?10vVj~GW+N& z1-qhx)z7}XZ5}&#J;J>l@(!!Zc%U(Cd*-%w%jhtj9a_Mdg@$s)AuMz`uGqB->PcY_ zk#KW!*#43+iYS6c@688H1&N(uz-D`NK@=j-BLlC)6li~@!?PGZmjL;7%jR`tFRvGS z+TfJ@)D~C)Rc)l9rTj@Fqquqca5HxG69g1R3I0DePR& zoc>ag%ITXL1C);d-6ue8=}7R`;_BZvVlV6qx~r zCB|RIC&0>*sQd3r&i`T4!avuQzcr@6 z7RZ0EEB}uQ(sFf8+m#k%h!ysq5hsfEe&zc%eHmOZUQ0WMPOD~VmK&j^j{P)DO=H}$e)AwO_9na~G zozq7n1S1)D_mX(ir8*Sc#wi6&wpq974JM!0R0ou-dx=9UR*PhmRbNmMlM@eGEou>X zO!vhbLK`%HZoO`$FEgZ^kVq)9pMxw;WTzNbtMWU&(4axA1?z_^_a5EcqLuI+&xqZf zJ~F|8>>Y*==liHVW#xsmbasKOBXh}7Sq2voyYOUE_=$z8D&(kB^*JVcM zMkgTF*%UY%Q=xVl9(NXQpR{|Gum(CeS9K$Y1v6T`%~Q!tPBFt(AHsK}W4}s@ENuY@ z%nfn};7)#TVRgE#Gxd+_#U?zc*Pj{=xI3C=ETD&Q(67$|={&TV^5@1H*6q{L{MVzV z@Q!MXfC<{I?szP&8wJ^e*?jw!qZg_CeD;;5RD+YKClu$eJ}a_kLC`lGbuyUiRYevU zhkUV#N@}L{^~OJNElK9QG?on@WQ0V9?s?KM^7DtNz5?>-f|WG%B=mP2Q7ktcZ@8T3 zzUC=PkwbWDADWZl%yzW`cD_N^1`!>|S`i;5gLvPH!y=jPn51h#`Za;SgxW6^BV#Gn@na2ob=Tyd*xjdbxt&*;1Hpk-ifl4x1Pwi(XQ@he5&W=$?C# z`4o^8btb%FM`&~<5{6k&sft+B2kj#rtOKFQ)lW!`c+C^2GSA~iJD3E_G*DP-4B=rfYFwG_sGoM z#@lW6@oZjKKeZ2L+I)0)_darr-;ph?94alw?3?uEHagFiS&dp&>m#jchCp$l74;nDPcrVu99XWR?e;MRwD{2 zGE*)~=TNB>D3HDJC_bjKbu;K#IvFH>`iauu(?DJrsRBG5vmnRu38;Z|_~7pyb`YZ5w5IKJG_~oTiC4t&f=d*Pc`qvcD5OQL4og$LI3U z^Q%bliyt+XYK#d$U)CI%^!_1zJng6KN45jyp5q&Y&kBrhJGJG^qi&iPQCTUov4@A# zYSD_?$7e=c`hpO~3w-fSc7;c)!~MD{NM1r#tNddJqkHKW35vVIkjMM!DYdZ7@Xr+xL0v1lv~^d304b($4wAmfsb!R3-IC3WS>17~U>~DtHleYL)64%q_TDlo&Td;8CBfa@-GdVd?(XjH?iSqLA-KD{1$TFM zcMBTylDw9^_j|s*?>Tpj`~5gS7%+NtuNHdMQ#GsRTqH0h%_j!sao=yf1AFLgH}8iJ z%E`=ykbH$TH+6Il4c-C456N*4d_k%jSho5NFR;M<*e4 zmC(fUjz!niy|F`A0*btq+bQ4!WcpB&K7BhEEXvi1xBPQq`baQ!Abh4Jl{(?bKt$Nd zA@Ib6am#EiC^ywoT#o>&zadM}49J%H95eYfv&cwK80z(QiDKGhR-Fo?YoSn$!jbukC~cC(?T7F9*t6xyX=S(YrmJk&A3g* zJlUls=mhN5<~e|Z-bDk#>@dvV^}Y)ukjzB|x}3;70B(!FwwllEYH+$cX18C{yr{iv zUM$=zDYd>jJlcme+|%e)_ip(t<8wv6;f1J~3SmkH0uVdzjAyO_Apje-ETernP&+3` z$*TD7P9JuHb32G2(cvI|5COA&C&!fkVxpl#0XcPxuxLiPBr45Xm`iki9h+;t4zg{pHgBx<7K^=p7%#7|4xC#Z*q9LWQ3+bjexj{p(qh(GO(wUzBGNEUbD=*~*3t$ZZ)t z;go|{EoZ%D^Y0D(_Z7t2u+b2S)YvG=@;-_am|aX=2K(eU8k*{Ou|YkiQ-9l#4<(>s z!Jjs7khsBQlNxF`&zt5STB@}1`kdgAn+g2M1PmdydVBJUbfbY%Zcw)Xrd1$Vuq4qr ztBf;Jx8;|G7gm% zG8f*^1{SJj!qqfzY^de4f-j=Bpnm&@Zo2trLVE~uQYE0c=qC)_W3q)Wab6tcS=o#) z6OrtxA%VC1`9+@*g=azAx16J&;`|6Yj?+D+*_~$KBf`?{ArR1}7ygTmgmWL{ z4Buru^rzs&ARH-4Y>*_qr9jPG2!9@WDDA{PC|>!lRj*-)6mx&vLa+1ppa=a3=JK#K zU~z(DD#LS@y0BRR!m5Rk?ku7Os_ZbZhH&jY9Je(Z86lG+m8f4gwQMpk>r9Agw`uzo z6%$1L!fO1f>9~P;K~wU$_pbjT zgy?@)f&VTk@=v|&xqtMsO9BV}>!F0m#r*NgC|IKIoVPgLI$^4d-{I^G3Xvnw3V9|k%+#vhy13|)p<&{@sv$Cwe zbT*DfEVFl`Kwc zVT!?8K|;?u!FjY)Xl$C6amEv?^^Rf3!a&}ok9v{qYE%%D&1IGw2+^MAmSrK`0hZTw3{fmbb3j51aUhFK z&CV=hgPUSCpA${DLaoKxdsGK#_0h5Ak^7;X+Ym3NTY$h9fHpaS=UC-)#ZEy^I*&~zgD z!{@C1Z|X0A@zGmvUo3q)ds_n?i$4K}{*PSkTO97M8-F*#|IF;Z1=fCiRewo={rtNA z*kC_ny+1O$^uOg?f4$F-NBwc1-%#cMo6PRdr}|^ze~h60n%U)-my=bN{O@FT-*N}P zZB3K^&FubfX7@+@_eWgzXZ-blGrK=>y}$qbUz^$exq<&ch<}&a{TrP4zb3OQNF(%* zfn56E>c+pq>>o_>Upp9n3FOlMPP_gc%?g*15SHh`%4jG_vD`C4h~MRkQI(*A)paH7 z8Q=|`A&$vRc4uf|ySZ(hPg$H5k}Wu>l(sLTrM)BrT5Wb7klX0Z+2B#m#-sJ2-7BNX zaw!P_5=eZ7YESy^9v(2nD?DBM0C_nXeXJz8+RPu0g)H9x4&w^OOjB;a4C1lX!%@#R zr{(M5=k{?nS^qU8)Z@@Hhu(x_f|=|}0v?(P@xq4nYK_3WsrS0Bl=io&r>3xmPA0cN zHTzK4@RU3{GrK;9Eb7b-!&TIvkfNUe9-sh4p|G%qielvstdu%fwBcY#7=6=Pw25_s zk`k;fAlw1HBy5*#y^0#xQ#QhAPiTjnS|ba{;SzM;V+KIViy(^P`Q9)x6e08gkwQTz zDx$sPuIz}CkE_>|;^!fYXp|M#O|zx_A}LWW><3&P4~=!iMoebk5-_&Pnd8Z% zD#}$I@1alH8WUE4Y%v(73h2JupC4iUTH_s)9dhW?3=xcXs7N5Xgg@*ng4O_i0D>}AoL-rbjR_5zQEkJQZK zH6en;rz1fb+8Sdwo1Qv281GXHsp>&Z1-dJmqfT>wJawxI!G&us%-U2* zp@n*Mwa)(VoxA(1lk^8hQTV}(7A_V>M`&0s$z95)Nz7`A^e83I&xzrb$9GT7O?x!a zsVCpm5})~wo;v9~N6xukdn%%{wnwexa4mp<`>XcSF}aJjrD_=rqK| zAD!~`m*x4#I1}rnK2mC(sYH0JxG}2EJ)h_(5c9SJG^mLDO+(Souk4Ra%*?>_CpY;i zAKq^Ko3tVQ8wvS29_sJ%e!r#-e-li9Eb{lp(VMjS$Xo?lI*zr3Hf+kZ8b-Z~QA zJfb(&@WWjCb2#h|I{%NE#J3muQ*HfiB=T=UiErlAU)I_m%HZu{^gBN>w>PkTdq(rO z;aL0zdRF>Bdz*gec+N|8%B$tejqo70F6mu~^wvb7GoBDgEFw~lH4CvoDTtZe%aBX? z7R)h)N2G&`^TxQ&Ml88R%R`2c`kRok-S`4ypRY!v%#phy%jnyNx(L8>bn8kiHQCq9 zHG3CBdU=ovO8R{#T~}Vaw|S~>RnP*~#55GL*DBo8{WObAMW|P=Mdw!^v?94IG;Nd# zph7A$i9h3jx|q+oxC7jzx=6WZ_VF~BCAbMK>See+i%fMyv~4d-dF8qlyIuM((GEX_ zGQqY=nWa1P#_Ou7Dl}l$M3xmctDT@&`mQW@GERLq-RVtL zl}C^YGt!rdBH9ZxJS3_zov0MkBfa4)dEwtuoo)wtLld7cI{E4|wyB1C$ej(5L%NGeWP}_CMm|I@_o<{$WQP|477Yy&>>_T zn4Tsi3EE3!BduAnZsrEDn9@|LB2zh~29%-&YYG$j(XzL#=99qi`vB#UW+<*Pw8%B+ z`OaNFU)0PWbl*8twp3~>5-LC1;JCmpS@6sTD!v#2P`*}45OSNXW8q@s_P`O@0fy~w z7AdKcZt;6K=CgcnWvHU{zp-^HY^lID{e<|z^EEFT4Dg8_1o%a)hjj>fOE@lcY29SfdM*B2b=>0-~9BTx`63yim(zF)-ioJA%Mh~1&(rtETy2*lrRdzMe-oBy;R z`vCVwgxOvV(LhJp)nS0FtR*3HZ;$}Hj#-HFfw&A9Hl}~B-pe4nn8%82S7T zVyz4TP^~CXepy}?x72s^^W00uU>Idji0&7#FdmpcScg;* zO3#I6oUd`klw!u&fH8)zZhF$VRw1XwJIdCwwRnjY&hW9X#k)7KLGMyikA(PSz54ox zZAbWVM(eI?+}~yn%jTGIMV7~o#qAsN-?1@ZOd_G>@BtHy!r&rHW?d8t;Yn|Ir_6g6 z%$2jr_?jZH_u3ube%{L?VuAum=0S+52xEG^)$PpF21{>By{%aT?-ugtA(PwoG_swO zlpRlM%dzxCct&u~lj-OlD0eHSILj2P_z!DDN( zV=!J$xbU!;v!(R`mAVt(a*PvRG@9ttsn9q4FYXnpO~*_jZto}e2XQ0{50Y= z81g-6`%CbfaQJsY6=0tUF()CmRHPR;C&SV{NKor3wySk$uD{m_YdjL&Of93v!!s<+ zg>SVW55-l$r-T6IMs&WA;)&}`-^C7*Dv>{iwD5h&t88pguG7{tR{#>gP!aq>{t}NA zqbut*NuK7|QF>EQml3cDCNjM^6KKgmU}pD>yuP)Ryp<7|Vf^eL>f0C8i6 z5o}sNq$IGVKd)oRCv{iJo2d#dKha{o#N&$~#bkpiDVtD$R&W^*f)#V`9159}l+e{P zNJ#kcDw0h{=z?U@02t>tOAP=#Tw@`L5^;=PiR3T?dSZVCSs)c>O`u6?;vr}?4D$k< ziBl0+As}PEicJ9b^#RM=Bqwknfn1ST(vF?OZeP#9{{wA6~1$-b&DO*(`vmRr{}eJ1YpRpj&=} z`R~2e##)qwA)R0w7FM7C9e4FbnQS6VQ|G^3=5&#Ac7 zzl`V4lZlIahrC|!@J`go#sJxQDab0CluQ~at}1txR~0nb`vMFKe_vqD`HZA4{c`hS z_{0SmVf#J#J_DkI^;Iv1v;P}wX&a4y1PN^=Mzf=w&-$=LQ9WiR@Zt+N|HG*jXFGti z9o#IzS34X(s+NF{M{6rwYx;X@hotyVSNg!f0wBNwaLC__<4d}s?J7h};5y5NdM`!D zxhORbsjM{H9xLsS^>{FvD0ZlQsNcVJ$r&@j*%qkr>mtEtj#__Yv`6K+Y-u>J66bUA zv^@Y}wJAF?4ubGB>E~BD9Qz&*g8`Hv!;5iV006i7rvZDIN@`(##5t%tvTok3 zFVYx0lT_pQF(?Vau`2n!s&vCp_*Cp0xjZAU+A_)EG#sa1&ca_vC#=Bxfk4h;@*ZaS zIXuQ2{jjVw?WJ2xL=8sYlf0u6GGZ#KtWfiS7_zcgTfB>6q?g3w44+y+?j=LL-RvG< zI#NcB;b-afS0jr6yk(Q6{ha(ZkRsFPNOpG0a=#zcGZ?90K|CfisACkov1OnQCqL>O2K?>jIz%u;F3f~$j|6)mhA3*v^ zr2a90^hV%*(y0ILC|J6m3;ZjO{EPqnLkr8V8KPhFM{nt=Ke;9oE8ZL3{6QmsP{)6t zEn@i9F!+-m{#m~IKhr}8c@3)+TBP{fFIt_1!r@hPa0hA(lVxT#^3Za)`5YIUK=QtX zm?$^v=Sw6O3He*z$7YZBD;0cLP`EZI-ZdTMU5pOJmh~e>FBXl~!a>GpY~SZnhmO}R zy-?OtwZm3#CTI^weZgAX4x@XJwODG7&r9N&mMq6r`X92;R3|CVrCz$*p#e~HkP1+o zaQw)?q3;7+cy?ZHgm>swtUCY}?4Ybas8`jcxUsfEa6VG7urV`cD>}+t*R3Xmp#qka z-PzQ9cXr-HiIHgd3IGVNEm!-MjFoRgO3sO}c^jOSV^}Ru2cHoeDvK;coYun14!|TC zDqL?g#wM#1RuijfU?rT-uN$cvK9fwu{8Nu3i!;!2gE3OUSw#V;yKBO34H*wwF>@p# zQb11@JP$Z~IJ^X+5WNu_p`1nJL?~Py&y(VM&8LM2UW6J_=dk=9ivS(Z(?^MUo-9H< zSG3s<_!6O4Ms+V{b1S?Y_`c4|wN35r5+pvoJPJD*`d$bXCJ*9f_eBUTWNg!b2rcf< zUq>CZ%I0A)h1C!y3BL^xmylsRCgc=1Nmg1tZXi&DfiXMqGjbZu)nt{y(_S(GVv}J# zhmAIz7~Otu81#aOn6e{NxNadA6Y%dteXVtVsX^_cB|}lyy}!?Onit~^nCd;71GrOI z%uaVIw8E@KbR~Itc&xg1V^&qyJgyWIglhw88M39xI~)BRUq;VSWAY?oU@Dg+Ls#@< zOWNP&aic>O0@DZc?jbO20Yrcb_`b46UCY!Gtd`d*0(PC?0!s_3)x6t-ty z&hr+S=Jb7^QL%tR5T5#03oEKUjjg|I6jR^FJ6!8F*T1WDlVOYUGj zFbZ%r16^vU5HM=2Ha z**nhT)8s=BVm}?cBerc4$nIrs3ySlXlsAHXSJ2ltRz4L zT8f$Sh)pnanl@d*m6$dTJ$9S*T+G!yWj3jP2{B4DV_alwe7=_eb_rhsG8R)I_M5Md zCoYNkTnD_2V0*VY*gUOmNaMW#L>nI%8V0UezG2u`LXa7$QJ`v988o(oK&`BqyED`R zzyN3;Bh;wiIAb-MU+4;3GH z?TI_UGdKB%+t%X`(x5eiK+-Tf$GEP7vguxu%XCM+J8-v)febZFV5hE3M?tVe-keXl z_!(?^t>qLNC5VmIN8yQ3pxuRn3}N{&E+?=ZXm)axjIc10#Y$=I1|839-yaGm)Lz~9 zfaV57SQr5Smp{K0qy~S5*c;(7Pptw~EtWZ*vCbE~xsidD`2YdlbWf08SCg_*z_QbH z)(=KVB*f$q5DJjt3od{fdZ}>RbjflDikB_7eGveaaYWb~%YVkXLTwe+Ld5cgq@ufo z4e`piJjS5A&Nset=)_A!TtIUl`*p5V5* zaY`ju^H|*(?Ix6UFFahsNbLoLnXAfen?=!BtrVp0hm0-8=+a^@beJZ5;+vbSv(dp6 zxJ}?0qZnjjUEHjU0B7Oz?RQJxy;@9c#4Wf#7=no~@NXG`c{BL&A0NISACsNC6FUbv zt27$gFX9^u-egLS7?S8BoRPX=xw(eMc*s0u=G~!{$=0BI5Z~H0s6g)+5L zWp}U0t!bm!<%y5qZap?;`?`C(y?x5%POU=yubBQ5;C^8GTL9q~-uSPI(qA6VoSy#LX<7{tEcUF04b%)OYDRxN}mHwP9aP_c%FSt z_x#!<xFWH1w$#s z>B*~wq_Tw3r(Ho+t&Y4o8Gwe+)1-5L2W4xSxCb4y8bA+c*XBoqP7#r_g5WEJg%zjw z=6yUT*!)s}Ocx!c*)wh?ChsSCd`8aKe0c#%eOqPg(Yu0dp;FOcX z7_Ehy8;UA3DXSON5a{x8GsU={1UedNBW(k@Or?8<)9Vq4u=+afMU4yS?elR;IzeI; zYGAzb`ckO|>wK@V{C%aDKwzAK&E?aw#koZ`ld#JcuC~2weEC)SRrV}$iF&(fvD(|4 zSQ*1lCU{j^Nozy$!1s< z>X4pOG9^mq_{0KNhZP;=A%JQ&H8PiLOjw!vxI1S$OXcm>dJx?wm{d>FnE{^H;;3RA6K>y?W95_#0)I zO>W5WL(rIOQ#9a2mO{V0yJ_^jFkjT0!nqCB4Upu9>9~mMl8)}$(p_5VGM}4Q5EH!T zXW7fJ(3wa(CAMDKQ>f=^ab-Y9h(Q2cqPrJb^|$&ZT4tacp?$+5*mCv(NEn0b*ujMJ6FDLk*`saLIMXsvAE6t7bb_c{ ziR8_(8?YP~1V@+Efx~OcCfMasV<9qN{ z0MQ)ebQZC=3>L8%8ruY-8Vgs9z6e=YXa@-6a24!-AAXz4N0Tei;BNmruD-b%1b0=o?;EN9EPe{bVCQvnr)QFil zvH)AX@Yq7y>ZmrhGJKyx-?^vqP!iNmtt4QWyA5z;)jz1DtNCK4JR@T^DJ_LxL?n3J zLI&x@4nCI-j7yM2j7#j%XHHFke)v95RWzVp4KZ-BXY3B9wp{b}C<1&Dq6#BTl{O1lpsVsA!#U2s^nOu5yXZY< z(h-m`*rFo(#M;7nknZBx$2tUp)%aB0Y3h|hm0(`RN-F)R`i%f2;Cu6aQ89%o(>&Aq zHtz@7(DHo$Kq;<@p#isc(f8-w#Ms3}zG`rk<=bcw@l`yC|c1R;VbGN#7H{C$rxZZ1nMM{-Tn65WprLHzefAj63kwrD0 zM%}hYpp!|(i{cn0XW-FCL;$+&qixsP4ZicZ`bwd9{>)Tnp((#kDC?N+<|Ne+`Sn_+ zz9IYB+{pj#s+(rMc9gG0vk)|Dz?#KNUpn4`mK)|{N0ON}TxL*AY8vR_%VoC^hP1V6 zb1dNv!O_;vbC_Z76-G=RDMT+7;|*t8)9Wrg*H&N$0{=0s*BPXTrciW7z#(sIReFJJ z6A)snBrV4~9eE)Nsa~YK+Ao%R`|1Ks^ZCF+Mw{-=Pd(fm1g7n}pE6-P#Zp@OTh_}( z!hNc=J-aqc?4$rsfG5^aIY}1QqYTH^t=ntb3kI+14O>=N4F-GG9XTB?OA~xU?J}c? z6R*QK<<+3VX^PeSt&xvWm-g13HSX1s)TrAPW+7WfIT8T&!9`{u2WpgmS(?r<;rKu^d8oP3Z_g5wL4erw@7516c#akVnfL5G1BO)7`XP+2m#5*n zVHmh1_iMWjX*zZ|#}1?*36wGxDZQ$XGW${@N(!w#ErrYW5gM#smtfB-!0xJT``k7z zj@VG?)Yjxb6&BEXc6Oa;Yd$FzT!hl6p^PsJk!ioGX7_jSWT{s>eY6K##)h<1p#8Ff zRInufBoWH6jJd5l7fjly2RWv9epX_P*VL{R;)B4=YIBddAXH-ESu>lBJ0}jRVi2O% z+Ioq42AD@St|5$XJeWzE@{D&FY zzu{)T7qKAdX<7d|eCTZ`-d{TP?*%vGPa^c|K}mFszgwgKVNLIExY^GY{t)_qDegZx z)jv|LzYO93kZb+QIRD^UZ_@$)BR{uQSuJ+E7NLBp{~7rzK#mE%`MaW+OdZ>Z<^sB- znGwsrbcb&&S#W1!sR7^>*L`R0j77gBdxy8r5TB~7*pa1!=cld^=uNBZ3*_CE|I`rF9kpa{@~U$ZF9eZ#1k5 zqe;L+MqOHWxRag*O;L*hXWXOOB{!{)qYI637QCm$pbJQ~!HK?Dp9^HJpnT*I1zJvJ zY(p@RRY;bBVcp%6JG|_IJl0=>+ zw*mZc`AmYQ`#{vYod4Jwi=78-?41-o5~Rb?I4k4LMSyn2@Y;7~;Ta9L16A8msMn!# zPh6>YspC7#5l?@2{0v^F@uxfEvkopPM-r0Z!9ZHcPiSOC>u__~#GiZ>!>#WR$8Ni}x5#1WQ>T zVs|Y=k>sU(b z?S3$6fN{<{JEO4K)&L{do%>UlqjuWfJ5Z*yBv!=l;PL?JQoX5TPB7Ll$_Kch(@}ZG zq$!iDz!pQmPhuz`aCJ0BwWORgFVGaA1w!y3Vo1|w0+T)HI?77DfU>7;^&m;7d`-oW zY)c#1y>zn!@8E0Kwvm8%%}OD&XrbE*ZK{h8Ketq%(gQiMeC)YIId62_iFBtJqhwns zYB%Z=hnSK{j!GI3G1Zs1Jt6WTj>?)8&lug%@3NmNs5>2b2SJg*3GL3g@4qq0$J{cE z5)*5erhP3x{4R{tDUU8R@abB~Kfax`R9p5MO8d}}JKB{E8BScwqS`Pf$q<*93@)T! zF7WwYX!D_JSmJTSU<4`q#M(Hlj^*3tF4$K@6DYhy4r_4{mZUGJdslPGy2n*%pjv7* zrw5f~3Go-VIm zBM@|C=9oe89DLvIF7G)gPxYGrLK^*i@M6RuD+DLj`pnEE8j1(TKOOxjN^Y$SHW;~YB2oxpd;s_G2tQF>ydS0KIY8-W$B^?S2z zc+j|v77|Ju?c*uuF9H4b$U$PMhG`B7Ujj9J-|t(uIrg>(@C}^^B|e2?3h;o(!$~Ly zT}l^jL@`;korRRzH1%6}dzFX;}~V`MO4{`}5geeMSz%^ko0i zba=UCTmG<>abF^Fg!Ah|O3KIcR>iOgIuP(z(+GeIGfow52XmXJ;^B{W4n*&_tJb}Q zyt|r-D=9aKjbyk2f_6BRei zk?taki>s-5nA0nS5ZC1|X*A31x$OUIcx zyr)$rkc38R68<7X;pj#FqEd43PKINpY2#xrh`;N%(25df82GFaBK|OScxmlk5jC76 z7}QF8i9kb;{Jv4-Qaqt8XsB)kfF^D`9%>R@Cc&hPtGD?{EuBpc?2iVf*=diAIr$YZ z8tYQ5*%uRl3Bo%4B@fMfyxNM?2|U!2+Px%)%kk`HygF8Z)Mo1z5pIS@Qsh@XncVC* zkWYmG!Tm{#Z0hVb@Mx~XMQy_@TK()epqN)cW}h?NhtHKS&y@hew4!^=Z4T}fNsXra z5gL%8K29sK=m4-i=LG|t+m#hs*r0?%_mmG~7)ysIl4e;k)1GrV5y3oBK!WgNBSg?d zq`K*{I<|@6T2+SOwg(uvvBRuspRQ?xxWM~OwOvW|-usTp@})O%sdYfFac+lS(UP$8 zn_mkxmb_~A;Mg>rPz^Q^R80`qI2>b+sYn#vd%sr>{05X#eVcoW!#R#K3gsJr+|>LP z?``Jl5kNU!lEL#G>;tV6LMwrSc+r~RaGu*Y{9p+#FVh4ln+WXBk);z${9a)nDEs8w zh6l-i*V4%Zh%Fdj8UM^HxCme`7=Z9V`0X z6Zz+g)t^x}8o~dhGWBar?iWV&*9eCHj72g1V0nLVv_DzY?_SHlt60@3bVj7m!oAs^ z6$9|Oy`dJgnTi~>N2UrUGzyj$uv9GG$O|g8DWmI7Cv@gh{H^i|Fx!{xkw$Cmh|Lk3 zLi3Ctg2;M40b*Y1YTT5Jr_8Rpzwi8lWK*?fj%_dZ3YL8^k(__BbTPlTyvkuj`o#}k z6cY9+$V)yIz&2*5DAKBeXv3Q8?otZPXi+2;g23;>>%6+OpLUBJQgqj1{!E@Gp2vyp z07vO#+E-ibU2t-Qmq_56cH|K-BRe12SyF?CDK#xK)&?I#^H~^EN3-W^DoR*}rR6?Q zTpV1kh0u9;3A-4cI`~~8iPi9!6JfMG#w%ylR2d-4l5%?meLrf8*F;uX*&tRPHeK@Y z6I1ooul|hC`ln{=j10LapRlVLS)5-BN!3|Y_s*c5GCn5pl6v#EOYN;KXg3<(^a=Wes!m}I;>hr}b z3aWyvK?z=?l*KmzYgb}J2lZrN=T8cI%5XX4ujXuJb$;=oI#4MH1;gqlr#+d8&L|c63y&&=a9Zd_)Jh({ehE=+q{oHz2qRc4>b7D@5hyF5 zRLn31KN^M8_hhl5)C{^5eiBhrA$YJlX<#E9!M1o$7#PKiEEGo5a#1MNO&DPD3C-SM z?~ytwe{y=v$lq$vpy_RvUO^ZXB=94BT_3f8w1aD$BMcrePUyzFLz~jpW+N6qyryLV z?@EUrDpK7_%iW!(6#^fVxEpE;6chgih6ALWg%g!zO7-oPsfBJG@0E_NG)(Js-YLB*;L3;A~I? zAU=leW5C?_(5&(d!uj=-=x?CqcWDLjk%{ro3G<>v~2?B&1o*01&Kzu@7m=KV|1 z_D=-)4^iV+4Eqx`elO<#JJcvswus!WhX*;^ehvYC$fCF26Zs5Zq^XA2w5F+cJ+(^U1XuP-S`!Lf#; z_2t{k&9a(>6#{jfq}AD-$NclzvM_kv!`aKZm6aTaih`_g%4Nd#VQo{~Y{czoqcVA| z<5#OzRWU@Nl=v{h_H`+{2iAs?9)Z!4;>_<_T<+PO=_rz4TlUtA*=Gu}&km&l14#%N zKwvdUt56h|Qka6!NtbXv?vy|7m!BvLOY=}wv*eVN=OGu>bX$Oc2fKV)G)K@LM181d zy)%$=T8|dL*u`s0gf)WPf)A||6)-&$`-Zi2Mvl z1yo9tY|!j9HQV=CzKCCS7LTw)Qwbw+m4bsqBIX`)xNz(A1!Bmp0f@uKo(6q!<8Qhw z>;LEcm{vfP|Pmjbv30tfi8okk-m-1wFDmor+Ox zm;HKe!B^ChIvI*)SW~^u0@a-G(#8mlhpCU;R1Gc)90(>#*S+;Lj=2@i`9(A~Qpug^ z&lH^%B{+VGiY{*7^WBx0w#U^@)n2 zMAGpfZl^6DLqBb}%Mq)8I(J!QD9>axwkUjh1TG&K_7}a$Sm=3V=E_vmNrf{jt~i*Y z59O4}1#B8!elII+N}$-OXP1SX;n@1YeXPh0C0c>ylO)_qtgnXG9(F%w&>nVfR7gYk zW_mPaX_L6J&i-L6eK})N`}~E*rv)@cK}<{(IFFCvT9QIZUczmvj$_&rUmg<~ZQh@bNcxr(W6sJQ)-96{13 z#_X;5-~e=FtE@vFhFvIMdB2xBxwPagu7Wc@++vQvxM*?D7RxYD5HBhWL{-+H!yS!C z>3QiK0u4FB4wF-c?y?&IQ+B8zL5gzd0YZY00|8eg%IXr}Bac$yB}C;YDv}u)EnH^! z@t{VG=F$QHcYfe~cef8X2_OvHvHMKaU>ih+9ofVL*))r4aQ@lDy(hl18Zc9O4qIYO zK+G-Qh0?#&g-Uf#b7xua-nK{(|6>?ikoG#j!pJrS(atp@`+#AMd`v?mev0@7H4#$0 z0bjU&kRF2;VG|BPQi!>=Ju25euh&?b+%#2l9DjHHVQ;`upfN-VZUd+Cv-OuC|EsxR zWG|JDO6?7zXBuyb$8sn$U!7C978auJrC=SAc3}v*(Cw&kYj)r-2?+kASB!U~%UK({ zlYr$G?6a*pt10+{>;;cdLb31bgFeFbartAuW^}zHFiJ8E$DF#iQQ0P}{oYL7s%ej5 zu~fY1FV>g06CkXt?QD0lqGNVn5;~DQ)dAEcjZx=XfI(cR zXkT1Jn!XhAB|K4Zea~dvz(?xqyK|j4$A3J^>VK@WEoxF9_n=7t@`_k_s^8}-TPJ~p zBIEI)XD-G0;#-7kGIc|}35@1;9;kJYG6&mV4ay#4^*!g5&dToKM0Jj8Z-y7Ekw%g> zdy5ror%zmf%k5ycwk3n^O1F3x#%(lVzGynunrgHpn-lK9+Xj3#HNk!JygT9Rw$AFs zgSDp1r;pe-p%hE(Qx%a1PPK>~IxSIFpTK%TQeU&62-iVmt2!f`rqv&GHhWm0LL^k| zWC=D|4O&fG($Bua@pJk%x{O&SWP|yf_O3N+X=n16&WPn4*j1%-7G_pykU7CbD=ycx zH=@&bYlKdjk;+~zi@u)RiIs)GCqXX}j%@VxBPPQ{;;EXxf^EHft}psf(40dJ2eVB0 z*gQ)RZ|D5l@ph2o;U^ETPV`m^7B9-@Tj6tv%&xYL253hHG>a}azBP}?Ndz=&CxDy` zk9K;v@+TJAE0d?k7B2TTXe6+pm3GVrR@Z!G>)UpnQZsa9Yk&l#qFAHKVi{g&ZR=Lf z>}i@f`GepZX}J`~LvlQL#Ut-=9K!qLNO6D6RQP7d0jWf4Ezq9bV*YQ{bKYM$czfJZ zD*)R9Z;Cv-NX&r;eWt+gGY+>|>|BWvX+<`*4)kZ%SUcpr$mT+im3^FGS#J_~-npxU+z6^labwy^;1a0w zxrB15vrOmZQufR_cu5d3aG(8EQ_Nd*%nL3L<|zPm5T$4DbhQK03ni!}aLpBcm{kYg ze`I4sS4{9R-Nz^?`NGt=4^$ixW8EHLTzKTr$kVKS`I1jAGZU>3IO58I33_?!XX2C)n2w`Kl1pA|XzI z~r^@EG zr}I;-j8*4efOXH){T?$cy30!lb*5bwFm|*BS0g19EC#VDf;{25~UgP(`IT_vriZ=LZ&K9 z4T<7uYWh zry6E%pNwLFMH?(Kld?JDNAWOSVBx39dMHNWN~t}Ip)=|WbM1cZfw z8K+dMR0yBwpoB8M#W!pueVh~lWIr2&oQ+!2&I4oBw`6~yn09E^HF^jvNK^$)D~NKv zm&7j{0#F>!mipVzZWX;z!i%jPd-kj_D&kP;3r$*y*& zA!k=HXMG!Za4#rC|FQV zzZP@avW*vXxqu9U6i;QuH=-yEss8L$k=cbY6*(AaVP=IiFt4=-6Tx+iGPnQth_Ad=rI+R|AZ^bZnZ5 zyM({m!`sy2j*lTT2x(y$QxByUXSZrVutL4;NJkE%1s+QzMqOX0uZ;skzzyB7bI;nG zx~wp}$z-SMu7iuZ_Y+uL^8R@u3f$uTb4KD8n6hIGZ8zpNQ71)chu9kG>7KR>5nwRf ztU<~x$4#eiLr?^#N#b zBV;3rMvi-f2mv*D?F(w?yXhX8@rEVLi{`dE%ol`kuMrGhb(Y9!wV?hogA9iuMT%4d z;%wTm4oj0GcTeu~DOKS4Ri8}WpdxsEzsO~QC*te~yg(PNN+yD^v0b>lO+Sg&4D7Et zZIILpStAjN9)YjPbG@277wFW0>d0e6yFJEq`3L6T=c-Ut65O*G3L}>jF;Ewhr}N-} zRl-2QStyJ|!UBQ=9@#Pig6HfV*)9OES8A=@u#rx7xa(F>(u=StNMzOmZ=78a%;;^V*fG!ld3h&Q2K|g`yO6s(bl3+hr_G5L-;8w-X0cs3K=64 zeT(C4cjfn}JrWPkq?tU7O6azV3GnYogiI1fD3R$Op&HW#T0%d|>wnym~3b z8y6a);KZwlNq`E3fn>}@;8>bHuAX+q95caUvSzMQr2s0mkm4ly)A{YFT9?9ia9QY9PZM#X6WCO`c|6?=4Cbii2DRMS~bP-=AG z5cjK{aLMd#279ad%3$8B3Im;u-NRqHFGFkDwIeZ4> zVwh;up3UT6P$^YXy$Ii*Eg{$(D67k((jaLEBORi3DN;6?D6G&J^vZXa1# zl@_VTCPkTCN0%Jk%SZs?m#+Zk9>XF-7kPz|kN3l=o>?%-BVsr!0IE#9;~~(q_N9mL z9axM}dh6G-JD*2Gl@eZ^V5|1`^BS;`j*>8K>y7Lz{EAjg) zZMJYZ9NvB8*0>&QJ{A*Ok%@y|_w_;2pE%1>Wi#V0-j!R%8Lp0cAE@`SH4!6w@}krO z?dofYE`*(O?hGSNx5mycNy3p7F!RUk6)OTzZf+TN>Kj5-q6_tw1e`-SX!ePbatn&& zxGOh*vXn1&=#r(l#=0nNE2AogI`0Rl)3^-n`+^e^;~~21YyKbR-uX=sEz8mlX4tlE z+qRWq+qP}nwv7yAIKvScwry*&Z{MoQ?!Nt0^~^WVeE-1tZJ+b5b=KaS#qYsIuGW*k z*r66%T5N3s(GI!~F5ziawfIuxR9W-_u3S7 zpD}fIR}Lr#xZF&uGY7_Lde;>**Dg{h*BLo; z78V^F;KW!e=9UX$0Z5j8jE<;)V*P1Q$-acybgA>bgluUR3mW9mo5HZ}XxM=ULqJk( z;Sn_rF8V{R&)$I)zHpd!XBA2TFvP>9ylIDW)5%m zuG(g@bZtn@VXeD>nt1QiMC7;br$r!f{&~Cu^Q#hDR$Dk3jhQ%p?vqhI48n!;*zBKY z(o%eUTXkgwq1RY&F;ZX=^`Tq)uXs9}(<3)sTt86mzkO~W9?pAuH>%6bIF7sV#s)eJ zu{}SXWH}C|fUT2%T!)jzOI>mT>q5f<7{INb|_^Q zxe+383iJ#~;NZDuFQ0unepgCzOml$vqVxo>=;!oYlW-Y8RmcbD6s@i@`>KzVa z`noyOHw`4=R>VwBek=#lCWG_+D5TJ%?1*fH`8s*rD71My^~-A0?w4`rbD%6~VUK`D z*VBgMJW2I3O$UV7)E=g|(DE0xe9mMB`$@Po@$5GnbkWA#Ey0SB_$~^tDgiA*kcI&$ ze%@bP)#V0-162(0Ygl^_r^~At$0WDA{p+5uagP1kf7JwkAnq>;hh$`9V)(lz_+#$& z-`M=GHNhXi{GYt&KR>?y54`B#(>Z_bMgQG%{*PAlKXO37qT7ET9`UdF(!bVl{ssI0 zi}~yBugAX@Pa%AHo0H3b`wE+vzl;XQj>UiTvQ%Y+)xs(s2fStH!TjA}1Ojf{Bi4Is zer7L@X0E(*yc&=c9DaMF`RLY9cY4j3iJxXTosZO%70qCzHw zCyBVMbt|)ziI+>Ao)+WG*2l zee;zD=6#!=&Gd4ZwsU;tEfX!2Ke?fXKHP5)(QK2j&54PQv8=OneD^R&u_wa3aUOSd zO>^?z)h*dR$Z&JdG{kkq8aCVJx-6^aDyr2RTBNkRT9PErsBm&i3_IhPbGlyYRRHU_o`s4l-a#ku_!TNJ!eAwuGO9MFk6i8S2MIFLVXv zgm)M8FT1`g4Gr~w01qMdpuYGrr@R;&ZmVQ6p;|Q8BNfNepav8VmJb>s#n;oLCH3`}tnoY%}}gBMXBGRb4zk z+EE4e#uk<@vqB-bnX5HfnRcuy`kQC@)v$sC@g-3|i|GYaRRE?usI*UxYv%MK6$kRd zw-Gra6{#%+zC-2gz=LdRc8I7fH#vj7o~Lf#%D?IrJnS!oPVwm!O3s4l%vEFh?M!Mw z5O+3J-`3_;*d3M!#y6+h1IPog$pYC$#O1VU9gwt3stoL;6EROGoW%1^|9J6xGsXjd zr-xn-iY%#T$?{hQOvA_dnu6utez}5ylh6xiY*KnAh}FE%?mXXINZ*dv`c z%Xh!cj!zJh)zzl_rGjWKpt!}BkvSC|d{BJAp~ulEZ|(J;$1z+o&4%KE1KeT= zTJo92cA#!v>MT}U^QVo-tmWGTop)qit*D;)t>c+LA>ePLY*aaCM9oo!r-z}2xP2#_|KqPkrd7i;;Rsb9wh z!2t&>7{lTUX>mRol;4XQ!v>zK5)oMRaRu*$NsdsuEyVcbAI#P8x zAP6W}d3#=07asA<)p|Umn%D9S2ZmzpbiWDs%-VfggMrwK@-T7jU1XWPfgj+Cp`L7t=a2X$@}o;E>bU{u^gbf)ySfYLgM^!< zq8=Saym_rNz%hDL2QbweCw6wvc#9BRUtd?58h^F31moY%K)=Ya5S?&V@d^A`xEVzt z?6bXi`Q?FEb1t@5SFp6gOIF;#p{F0jXbks+&0jOPYUtlu9ptS$@gA|cS*S#3E^d`K z(M+ISw*+Nw8l)0&MyT1a(-BBsTH5tBXKr5EettzYdCpjT91b+K2{P5sb<1Kup7ZRe zS_9&bh-e1%Ug_BOdo~)C~0jvac)s%`7 z%oliC>~@D`P=&&9b5n+#2C(m#aNYA(aBveD7%;bX&nsW^S_6z^mz}E;@HP0=>-Gq@ zRDH8rbK+-4D>MQLFCmK~AkY-8?AsuP*648@^eCvR^2}``2styn0sJyS?`4dN4RkMQ`0#)wg~rw`W}4WuN1kA zVihOeR53RXr{!1hLo!?}D5lhLWla-kEhfK3)|Q5@c-LvyZU0CsSTT-o#2mZ)8TUX~ zI_eAwxUg%=)qO4xmOGF6)4>2>*RwCccD)Tkhnz^qK<@Ec0=I^*8dQ7IodpTSO#0E5 z-Mx3y1(ueBSe}#sre&>C-e}Hhyg+3|&Jly&GE!*MK@CLU3g`m~89o5_-fgmDjI-<4 z7AaRnj-*Q>gm1}9VOZP?Nv4AAhIoUjcyM{)QJ9$B0r<;K%>$%IzufuL9`SLVP``A7 z4;v1a{mD_w`6m_-w5v!8L{b+*psnW6r$lK>rH*QFjKNo* z!3b!tJRv~kApRh4R@Q331cYQk7sDci-I1pxbMBBo=Dx!n*zL3yJxK!Hm9elrI?GPN zOw8Q@V-4dK?QeU3l8S@ZpHfSA6B1{dTGFK?xrHCjPK(!g~OrSM3nywiWBz zdcKw@y_abhF$6DG+5#I8=&sFyz~6Txu30J!)?Sow=i@I_l`+ z*h=p12E?3>&tC#$zR+7iGoyx+-_g9T{GRnzha2Z5Hx0D@TCppt>a6@6TpmYORp-Mw zfWps{eH6Gap03nM^BTVEjW3h!UZUKCYf(&OmsWz&ADh@?=8UgEU}s=*OfZ|{PqN!f z4pp4tE^UP*c9Vi;HFq45+)NVN)S@pGLNIs3(OoZzbgEnEw!zx6#2!!_haw~AQ3bz4 zNfkt+I)akbVfBh%8QW_p;|1BawdLrHcxQW`RIDbvJ|g3$l5DMw;soiP6f~4QVcSa6 zIyx>ikxM=gFaWEe@SS#e@L8U0BQ!yrC(7#C>z;gY&)@KmcFO}q*Ee$Gc4V!rKX?&Ca zt@}^q=3a6DE*N$wo%SXdkf6rqGvQvC@Ei}{8#7;_gmV{qFH*?@irQlQ3dGApZ$qvv zM-{O4#VP9XX|>x)0T>QEar0fu*|yy2jWfRthF@t)JN5)@uls< zwHL3=7bIfHP{h)rT0AzXA3>MA`vkO^r8jS7cgGgGo?B|Vx7sU<-5WojEAkOdf>*rs zDWriq)_qT{Gz~rkEgBIzdBVr=?>{jz~OpHaacyE1yZ&3Q$=Gx z%YgpmwiQi2f3ziqihzFT`WFPaT{Ek2WR{>{iEQ(Nn#`>4c0?T6=%TcsLgJ> z%1?^lX?S&Hzo^4u6Ekgr`%a1=gtnN*MWl}G7ZTsle#I@oRt=*0zvSV@a3BsJbYnBJ00m=HZijD#U=Q?U|Y*G?06YJ z+fm@@u>$cd7!ePNS@;Z8o8VxwzH+Q&{v-fCu3h;7Z&2#&d7zYmK`*7%ZOndKR;YeNpHS4oD=xyU^>ouOg5vAN(FqI*w!0Whc5VK>>PMsk9tR9r=Zf z8uZ-oR2o`sBGO3!ieI0;iruM-EKl4nvQ2#gJ7QRI{)J-tTek!gJ@aph>7RE?{2?>` zqhezHA9hQy{`*JFZ_ADTIN@(f?H`KiH;i*Ax@mU(GOJ`!kC4 z&yqan8k(`IZ75%E(sUVY7nj-V~v7qU>xw*TtPsKPoAhzi`!V8i{rPqpnZ9k0POu8Sh#nrLt3D>7L za!=djKC3<&xS;Fv@V00+SM&9Jvbl}kLV`eGf`vwWrGXeRB8;nNPqgv+)&y`yyY_Tx zmQ|W~O|CDI!0w-`T(4QZOAj5f@O97qs@cZby-Hb5i?f*IvAP_sB~p0A?dDes{By&d z&uw!x__c_7^8KTlQ$?ga6BwtRkFsMy_FN5N&Ax1{+v0s5uDW2-&9vofDI*-N9_E>2 zV~o+5Vmcr}T9Om++)q&I{#|BzPb;KaVx#UdWijb>1XI(od{voRmzYg?%c&5cOU{L! zuM!w-;kIA8m0HD31!mebM1=Y+P)4`P%jA+XY#pbmXagVFt1|4#xV;wzxR#f|o zRjfG)T{~2aZC6ZBRxC%as9(_Q8*sQ@ zZ14oY>V;aq4&&4CEo|qepz3L0`&JBWVz7slcSKS>l+aNU%2h9;YHA;UEJJAo0vC6J z(x?xtkd#$RAE)+HS?-e_4@pIJ{xm7A&@32rqN!=usncY0IvUfMz86{5gd4`qk=`qR zyrW+F3deC&f?+(JvlPSVqM4!ArZPiy?lF%|IW4fSp3KX9)MrKEMsOTX{hs{uHT5{6 zG3pi6=`hxOSLu6MUV4tnPo(^!Cgbur%%%YFM0}vZNb*wj0srbu1}W5$x=z8>a-SaI z5#pjK_T;|S7dw#M<(WtHa1j=Awh3yJZgPVx(_AHcgmFSVv}Q`oQl@zNcv&UP=hiK5 z_LCl>lOI&{QiS!kk-#1P$zecqZW6_sgjCJ0i659_S73zESV^@%yDco&kx`;Vzn|eR z%s9k$RD`uUzh)zco%jm6X^R0HEyM1L`Yn({)+2!y_x)Hhbc5jaV2Fv*S(`yW4<*tR zId|uylFw9SV9o>YVKbW%Yod#hK9597J*Q$sH6Ar>y8Jp4Hfnb9@pIBL#}n#COByB= z*aTT#IBr>fmV$(gNRLYe5;PJ==eU6Lt_E z^O5HxFxnLwZ6c5R_4y!(1>AhC!h_VNUJUp=zlejOg95(TTn9U(rB`D&O$AA|lCpux znIc*@_rpuXsh1fRnTuMbPnn*;+pD&-7yyQVzdKkVdofZNL(1XX&f0K7^AKx!py{q}?tMEU|$1gb5|5fy%Z9 zE_0h6tg~Gcc!^XFsIQKQo;Fk5(o8xA6u!*#gslJRIsXK)qT+H%jDUnLU3C}BPm{av z)O?xY9h&>B_Ia=UWuB3nCB@I76F3NIV|$g+ZXz1vkjva9hi1pi&_2-x+9L8o$wDyM zFT;5mv%7xz17lv+mb8EnvBh^l3=>woLeh9nS?rm#_eJGCLMtO%*$G@us1w8~H|X3Z zvwPqCCI#W*YZTvv@YS}kxYAC>5n_CK-U+CmVBLGaed3I43~&^YclF z4h8%0>9}J_X89Un;AZEnPp}}=&xq+XB4!RyvqV#4W+rUaBoo!hf*Gq`M@c}7(=Vlx z^>UJt({aQ>UkD3R;yD%=tskD@F>~>mufIi3H^d4xJwXY+22Ar}EGzTc0p zP&!)U`0`)fij*Q#j3c^Vr+IKY?FYnUoqZ>1cZDO~=uP)1&YaMkV;(N3QR!^w!W2IS z?dG4Zm%lQTBqYse@hH1DjJ^$c_SXGN=BQ(oz*Yrrkq++1~DsOKdRMFQ1|6@91z}~U; z6|u?E8S+Pm+w8r(5*Snr$M$+oID>?m3x&}6)E*3FpyqjXm>~0 zI~*AOl}D>5FP`n~{j|;b0CLEX(hf-2kA{V(1pGyQ|LeR~C>N(M2=!U}jGcT*`4z0_ zgY44oKYF(7r54Z1n0T4(5?rKjScG4&M;n zj=t+F>b|X)oK50g3l)oT$%BkKn7u^i0ytNrYV)R0C#Md!2?%$}V~M9^edvQ);H;Re zfz1WdeJ)_fwCmmf)QI63-C=kb0`Xd*1;q=O4YYlrDz#`*Sw52o$Ykcc_JA^3)6N>f zY2~hfE0ZvxmT81ySyr^Ds!SU?BDoTrGuaw^1%|i1N0q;`x%G5ld&5Ax9?Vrm0C1gK z5ixxVDv?>|mOSJi<{)c&0`ABlOdB1-6QD1GJj-ZS4QScK-=e|IBFrtH%CcVCwH}GylNU-x>73 zO|!-6sxRQruAEDd7YUcUwqvlj|zSdHl4Rc1VZUcSMg-U4(6!KHD zPCC-AS5D5^(Mr2$(N1mBA!jc8C!?__vDy;rXb}5onmExMAKUOGi}NcK11Obll1n(j z*Ctg6rLmym%@vnZZS@a?m>(!HMx2M_44e&n*7cVD*;=&`nBPbt5x&H=PcOZed~M4B zh?&#Mo)3ir(e3`quIyOSC@^>4a_z#N3`--S`*{7mqn7ojQs**~#H+F^*lNe;3XaTE z!3Llqv-#)Ts)uD@4?iZNtSUrB3PyIQ5}p=Xb)R!>*>!(9 zT1ccqj4r7Psb=NcqJl1 zoLu<|dk)T37C40DygdK-Gm*1j2gci5w{f+ZVfjQCOl_!ITP(2#C)}cq-AoW8_yEJD zZlV4<*tdX%&aA%wNhyFs$HExLmIdWxi@;rt;cS zRZiy8d=GGc{z*5m#ZK?AYuyEgKC@TBPG9e?lV_rj(E>Sh$Q7z0ajedZIcZZBCq$rt zCtwj-`_lSZSS(MnC7W(>v#Y?LhnzeuOA=X%(cQ!WzNjxy!FmSfD5g)ptJ9T*+=l?t z9_FEzZ}bCCOJjB@(`?Cmo#p*O&y@*|7`Gr0H=`#ikV|vr*8*f)YP;l8YNi8fR-Hxe zqINCOlkoare3h>)NyN%E>Jbb!38X%GAdPv8*l!3Rv@-Cj;`yPVq9AoVCVQ?M3r9Gd z8rEHno))-P?r)eG?J@dncXB{ah@huIr}Y$h zA%x6Wy-etkD`HWr(xih_X(J@BlJ?zskrqxzS56I>;#&FnncAychjYY}vpi&hVwl(m za#5`a1L?U$w}N4y=L!ekj3wA0q=tZf8iXz)`ZV~@yqr|#uRD(ZTVIK_i=ZY=CNVDMVn;jGf|Lx24uz>QE*#05gIrNor(7V1Yg%0zk5> zx$K@#d=!VZshInDtYwjd-Ek&9`RRNEQ$5zw+_V@Onq+e}p5ZVOp9DwjqGFv<>C;IH zct4c55ABGe)jqQy#4Wfh69O?{4M`0rYIf}9Z^aQ1a}`0U!xv1fFy9q_ofp(IXXv6W zK4S?o!ll**dO)6;)#Zj=wl*pETfGP8=xHneW*-|CVx-K;EUCE(Mh|w5k4Yn&NJKsF z00dwLE zb+H`@gebfaRJ4e)K2BY1POeT~%CuyYm4zMbt8a-KV82*h7T{GyC;T{I!+HjQ`W?El zo`=m!9rqRLPk#Y4E==e!tS?|q$Z{EFc%+7)aS987z=69f7v=;8FtV+VGuk}v+|coW zuy)?7^VzmQh{W;?Z5-Yos(03>5fb(CY(11?O5Juks?=QMNz3g~u#|;?2Ko#@Iq5wt zEjgC-uOwul6*@dzI5HJU%loyeyp0`)&`!~fbRCm9Lw<;oPlP5V)|aJ+Ff5A=%to+_ z6TZ6UT2**nZVKL%O7aOggNtLRQazC9T9)_HHh8H?Q;gMofRcJ9i^MJ@c@2Us{nPN~ z_RN0GNa-;s9A!ZVWE-d>DFM|<7KCM+#}>KBR)Byp<)>fkRI;v>eMl2ecKcWlN1JAm z2*~r%=>lkyrLnh)nHvE>8CtXQc_4Ek`}fyXjOhKCL8o09egM!1WZ2l0)_o?VfxPa@ z^Eb_MXXJ7VXkkOQP&}^@-=<4Q`p|rmNpr6f=eozrF-uJ3>L&=kt2*cN(S05s{~)~8 zi^p#nzl%*obI)w>r9@|&ku9Hnv7yKjq)OC9o)RCrKNDmOw)+{P8h|1%V>noH%!P@Z zb*rWvCQ2nm06GW?bH6JrXsgdIXsbGnFXw3N=IB5BeH|~ryB~Skg1b`#5&UOKdK%Xg zT0*46{TYF9ao%9NkG_#1!Lx_OH;lk#xq!1p^GyKJKnAUK+I5L|3x#GKr6iknP%8=I8AdLLOW*=uKT&7QXe(URPW}u3KY`~aFA%X>X1FQ|V4lpi7 zTR=8s&91{ajFTnSh5BJW$dH|K$7JoveTbzt-pKf2xP{qHr1*q3-|q%rjTw_nDUtg6 z@?K$=0VRNfsLglYt%34aT)o}SKoVM&5tP6f_BtPJ%Tu84h+WoB-Ze4HJ*wX9%E;^6 zyb30VW}UnajG)?5A9hA9G&}s<>Up@~v?PhffpELM=(`BxvGbD9^VIhcVVAxO4RGP> zo1cA=ut(Ajd)i8y&dU@mv~g*P-WD^GAKJThRr6TKP&8IYjWhM-W*vZnzeWH!1x;9w z8VW?+5lw>wtV>VYk4yoQP(2vMFtbgyuoz9>*@+$Wk_cL%M0i5e=%#v$330vKx+q}7 z7Z9&+L`l@jykb~Xv;~RbcYCgtSTd!+pbF|CedXI3XlA5HJbtr(AyR3dT~1ZTOmieP zRdJV8ac@yiE(Io^KgJ;6a|a2gX95eZTc~pc2=;$?Es6YoT@7E>q5ob~5S$a~ze=8x z?nT-H(>g2WcMGzl89E*l)s5 zLYt}Q@*;w+MM=gn1apCOIlf|tOU$QH6|5hCECMDr*OToxSXGz?F>rtxhOg z38<3UmxCrw*;bS%vu<`)GH_~RW}4q&4L(!Ts{Zq2F1bJ*=?GWJ%?33u#@@fqIMZ?12r1O284y(x#5To@in zrq7$t>0FnK8<~3`5gzF{4W+!%i3lqdIrtB{yLu2>1(NN5F zk^iCy|I~qyEX@B9hit$1=l(Y}_@@&5trLXp&!E)*K@I-if%1P<9J2r3WBs@KV*m3K z=AYG9wwgxdnkak-_eE`5t6A^dg%Fv~SKF=_U{a~BIKN#m93EW%B2-T9bl=O0niSES zu?Iv{)#4M?1vS^`CHp6&PVo!E9Qv9BZJo_7pDuUzOEIo(ru|>5F$%ly)xGYJp&oK^ zKi%B%ULuu9(xVVv_mvmh&3le)@-nG7?ECxW+cQJo1-`5Wu6pKr>OKT`J|!EQc(7Zp zPfM+yi=3tFQp#L@`{pb6{RhW|@m6|`Y-mo?mpqj^VsQCW0r5QVb}j%#TCd9Yfno-0 z-HumYH~+>UGNW?=QR=9|U;(PIB0?=ML`YPSM5tdOLUNfoUqZVQO8%j!<{3`}H`C!2 z{!=3y<&pj{`nQtTE~@JJ&uBGKsji4{Gf3gmxq?kRWkgSNA-PinVrr{o;f;Ex`;cux z;UACLd6mU-EMF1^XVV1=+vPTZZ zOjm@6-n8=oIi_AqHr0<^-}g8J6U;CV&`oM_i|LsA6<^|br@|N6)BYje#ck~~NQr$^ zDXV6jp@PQA9JU4ZF_#^U*bgB0Dt{r-b#rIe@3n)_0RVwT`XxRl`SC?iEST5epu{kk zBd@5-#~Fq`HvzdVsi>F2(*QpVa5dDJ^?Mc~w0Z)2E8fLMH_;?AI{;z#s`p+XPaXvg z6}=MOB|D0_nCeZkyKLwlr1RFmah#UN=ZznoRiN56(0Wx(?&xu2mDTJ;1A^x$@Ij~& zN&bjXNCGP%odIf4e&_SU1&xFaSEDwDKyL!aX!X_uM?f`1!Wi=YW_kw>-|7a8?WfmM zCU|D?r+_@9_8Z8E0BIh7c(N%?9vH}X)(leJ=5fCAPn)JOc9Sfj^Krj&;v$A58;!I& z;&ke+%vJL((xilz+bu`P?1JfX%kTa|?bU6UJUR4i@oYTSvPAW=uH}NYY;(!Sj50~h ze)Z-~HLtk0#L(O=`m|5!!Q26*M*emWCRh(0o|}zmLn+OvA+^nD;>RW|#V5@1Z1()a z5&9zd*nr#Y*3a1&2r;2G-%~(315=$z+(r|~w3}<|243Zt-C0mwNPGPURyD;43NcQl zU4`e60SoX>SNaFhc8y9@_R*6#mRtxg_EIMfqa{ndY)n7f=zMLuBL!cXPJPiqD@;1E zv>yq7x`?&H(F{+ssn?O!0^4_L1Kh%QSZi8WVi7NvOxK1m*1C*4O(lfIaSq7Uc$JoYy0-wJKCsnjBy1dsX4<6COJeD_^@MwG(^R~+ zT$&MvJ1uER_7Vw*Gh0#;y92^d%Uhc$xBlQu?4zxu2VrQ_EX5EV>>=9X#WiKa^}9QcFhZMrDxRZ2^K;TK&=&oRiIv4t zX?tgD{1m-c(0aGS%dwuAX&UH@s}uZD(5wopfFq|)kTe50y53Q5EqK~;`zO&wzA^i0l%`5-hsX_=&_AmWK*BsTTgz|(8nRpgixVOQXx|MeJ-fNw z9$X(#8K|HC#XtNX*Y>}0Bm*-&+uu2o{r5h;zkb91cm)0p!qD4CwgD71Y@eNqAGMM;=&1)M(>*VD z2Mc6)%I+k{i4|=f93k3l6Kv^5gBPorQ+&_zWeaNQja|(4$JH$LPXqHQsMq{kFlHrrCxqNFS2G0`A zM{M}6rb7F$gR7(P(u!CVbJZuRH30jAB>_^{^62!}^mQZ5j`r?m zqU@8{rh{`_!NoYN;Za1WVg#2g}i^lbXm-ygj!i-kfD zEaRNOk2VcDGjHT-)e*+cv@)JnS?{!De7RhC))IU7$8(plalrT$3BFA{FBw^rDW%D6 z<2xE;gT!$Kn*HW{2sYM~ti5xP#gwj;;)}_~k`gmTKIdPkJV{7 ziGGbiT#KdAGnK!hpz(kP#B!n~sf<9pb}3<}k$b)0Gao+jxkw<537f<{+WmJD5g!FlF_ugAp`t8~|8! zy54N|2GT_d$;zOzhJ6}p3a|R zGF2m{a@!F^tx$ow5skni=P!C&j_SjD2l8mS4{;a|OC79JKNBo1szI8>LwEYM1|L(?-?P@!{0W1M|^6pllE z##FKMkVs}r;(R1qok^uo7ATUH@8qYOhh)a5y1;t|=|sy4cUWbrLIk)r*UFFsE60o+ z9_!YZTYKGw<14u?HA1=jhV{l#oZsAWH>w*88%c0Cw0wSa7C(~616S7Ik6X-@bw~;V z?3IU&>rLL&R@Wpc9-0vtl)Ku3i)S_~mUI7%jaJ5**C&blOvKLP>Kzu27XqJ>pE5IA z^dKpyMq2PtUsu#GyeGXch5p7c6I?SujA1Hm{vJ;+M`cxM_p|oqP&9EcYz5LMu1M35!v~yvMq4~&=lF_CO#y_j8_}o97 z1ychk8rR^Gou!Qn_NWgJT7@P2^qa*IEI84tO7xF-itvAKf$C4!$kPj*Z3>JD>O%Wh z8G}yk90i#C8q32kJ$?lGL$=Q--4+N7EB`wmK-|zXqD-PC&BJJZqVyY8f4bth5~E%a z_2Q*>+5$84MHnSc-_|w`b*j8Tgadro(;=w*?%MVySX|Y(CN`?3W@svBoS%C9gi~zB zW4UG7dyuPDd8_p__7ZxacfTW)I4H;C?d2|{JA%C&8i2YCX49UR$ z+h#QO-+PPx8y@*nK>wzx+5fb<{ttNM_ixbOH1+?R=?WZwl^5jr7xRM2D&sb5oY0^h zKh%EAelbvyS_lPudu|^w<%0k#0=KonVv6+gv{{#49ryoiInFKc>lvz{Z zX{lnTGJ-WI!le1Qmba>QV`Ic(kQkvst9sR}dBbN4r2HoLl+M;x(%pfQeOo0_+QoYl z^V@R%`Z5&?nc~mUnh~fPvjYcJ(;+TPhmwZFteGEM;kb71Uap~nSk}WIbiG&`{DUk;!2gs zGmrL{=j_sabXVWk^Ik=aj)Mi0imIkJ#h9$KqHW*htl+n2hpmCF@0CQYa`CLY+6%Tj8*7PDnhd9D58F8_g=80=M=gZgIW+0r%Wj*b^ zq}R<8h3-Mh78@6gS6a{pPGnM{g$zUPQyYKzX!E$Hnn!;44q6+0Z;qasL;8F8>4v*W z;4z6X3g9lB0U+?#^zQSQPLt+>A*S{j{oy$!_J+-|e)^x612mXR`jJ+`JX>%CO7Yc> zW%pq#P()r%F)tu;Wi@^?Oe9%8U%Fg(6X)LhmlAQR`bP+WXMdTZF~}g&84Oe z&V1Qx(6U&P=wT^TgqcY`s#Cs3s1*6UUwT7L;H|X$z|Fs-iQt+$-}GM+7oRNQva*kx zR9cXUHxppMei7T19|{NRcRKPs*Rf+NVVgVp0{R7h7+&Ubb=%zkTs^tlKK$;OG$9nw zISKkR4|64VH?{{_OfC1PnlC`3J?7wlwFdtv%YV~V?5zJ7d&>ULb0dG$_cScKfcEPHCe^+`vv|tS;g_^mHp3TRkCuQ%_==KsLKzvr!;^@%FzU4 z!%`T{dKhxmygpiOxELC`ek+dV?W(kTqSh>}y7hbNb6#dpaw4UWDM%M04S$*-N) zIhxeXt)IH>v@?%O)Lf;+dQ^xZF}{%zmYqV!)4?MM*i)EtUqOBAX3DmgYqMJ&%U6l* zK|0JIR`rXn2G|8lzH{%`%>1&L*t2P=X{qHdOhuX+JJn_rT3?HurSJP3d*N0%$j6Ep zrL+U2(vz2DEF8g@bYF6-c0eztvUqG#LMNDjjV^{7^StH2D5L?N!r;~&-Cq4L_ps%u zk)sc`iNKGK{?>A0YEPeoM0;Q-otQpAC{^dieYLSeqX1PqfSylaG7od%5%QT$Ov>0F z`6kj!^3mh04drg0^wYs)CNn$=bZ;~;jUBmaixP2 z+H(nO1*0qrWjcKa^rWWRgR!(!XsAIJbI&nNZ}hIk(z&MRo1A+9u+b%I9s?z4{c2H34gHq&;}!86BVr{eWP? zVXFh!Vpn25V0$o)1Ld)Vf*q8NrELf&16H9AL!`<`Y@cMFFJNG7AB3Qb-{s7L9(n%FXhCI8|N&3$wZKmhLysx`f_oMxN>9pLdQncSmeFM ze(mVU(`ukC^30d7v&4zV`MmI6hR?1v|B`LfTP(U8=WOyJhM6HR966YwY_ww}C~0sV zlUdFPliA~Ot*Lb5v5MV|1VKbG?%pyv(kgKi)2sm^H(rngL-R?=6Vq$;jxUjw{#3=A z0eBV4qdus4QaEMKLASl<%q#A`w!Gw~+(Pn5xX~iJ(bFzpw02u8CWd53q`ll(gf_uG zwUqP+>a3!X$eq9S1MxSjSh7_Poq3J5%T;G@A!R;pej+wxSn>J+>GeK5oW>{y0D+bU zXBjUc6sJzj@o120w(05qB9ZA|!<&Ie*q~fWM2qUE44Hja$4wd>s-$H{_}T=;`9u#t=D64%B9?3CvKrqmzVteoEVRnAX=;MPTHISc zp{y>6RZ8~5e`U!~*=U$pN2QiuFr#YYmBpHPm1TXn#yUoTP9<&O)f2RD+PzZ~dILLw zc(qOwA7JC-X(82sLJqRt-$2}Jb?lM2+eOGsAnZK0=lXqGgFaA<0YK1%D?6oRO*ye7SG>F;e0H zP&Ztd>??=l(LKq&RRPKR<#t-TC&Kv4Y=WXBh61@*i$d>y_GRoyEH@KlcwwA&ko-&_ zXkM|zSZV5Dc?c(dLuH_2W}+CkBt87E_cYlhBK!6w#prv<-b_vN4aUnkLfZilM<1Hw zZnksbbIt{-w*5LXjPXc@hz4={ov2I~JRgqClNX(n3m_FDoPrIlP!yEO0M*_w$7+WZe5r7&s zfDhx$xAMT~n1mt%Y%*Z#H{Ada^4Gtx!gk`y&b8)y_QLHm+XPcL}77CU0b0l8lS?}6mM z?*U_wLWtl(MD;@|KJ9=>U=oeyGEzT_-JRE%jK$c*` zxj6}n10bqw7cQ;|TH7_-70g1lhF2hd2K0bmBli)IZ2-X7X>nHtA3o#OK9qz-Z_}?? zNf>>&5C$BI^ISh7DxUufNM1^v*IM2xB67k(YT~II(P2%-a*s(RK2dPImG^DJ{iSTp zU1%QpkZA8yc@Si`tFxRks|92VaJ%F8)vWQA=Y;a&VXNtM9hP(cCZ$E#NnqZunbBI_ ze9C(@LG)Vz3-sIdjbd85LWlzOE1iJ!U+x)}?JZg3kRI)BenrO|i+LVnIQtGW=D6XH z+=fkh2RJo(RC=mVxyx^at%hFbY&Q;zFdFlN0`pvXh>VLtdneLW+|YcsxZrHb)}(j` zDp_2))QDUTA|Y4S0H>YRC}2^FpeB1KYg>3#Tn>QOyVO!Lf*R`fIQi9Jb8q zx}Z8TrOOb0esqqNekr>a3Uv_mmreT%`S9( z+RI6YLr5(m4Aq>2cGX!!ixBHFLYK9G?R$O(qHuC<>jiYBtu3s3{0y5C4%NK2+g7Jl zEJsaG!S*rL9XzQn%-59n^SK8b+ZpZJS+3$~$r0D_7zQ_cS zUd^8#l>QBZ1QIOwO@8KzSkVo-;u@Y-!yJ{4oqv!Gd?eyS5DpHJEiAU)YJ}xW*nDwl zk$FYT^h2SN^{a1!1*Z;$)7X-FCIZ%JrPbUujuEFN!Rnn#i$HDz%o4|{e7{3WF(KBb z$tT95o*1U7bZt59S4u49N(c09Nkq8{rSO&n1Q@h z+A%dKWF3951cV%MsDR|#3ANB6pxJS8BL!)P)!;B9`iY+GS--<2=Zk#{UgMVb`l=70 z=L!Sxc~c&|od~#vXRV!cdPP{VTVFv?V8RtdG*h|M!o#p_i1d(~R@&fc;&KORb9B#@ z`Mwgb?UM^hE$?6xJ?%FTaSi?whczo_ABgDjhEeExUC%QyK<6U9)21JDE*($)d_~ye`MCFrduczvK*8f8brtO&Q9qePzEuUmCud`K zbiaCj>I-zC;T-p0K=j`s>)(VXBNOvK){b%fuK4~N68$p}^_yYF@#jC+ziOEM1&RJ? z-~In$!3HD!?{T=lQ|X_s;6J0%^NjU>qtae~&#rI%w#oiXLq$2)zbrML-srapc zkWJok)+?y8crnDA8}E@x@+eXEOnZ1wq3;h1Lk9)0kUZ*aq*|;B(X`@WKFEI~GddEz%`CZ~UX7>P zGL!N@b!=~GF*9%}Pupv8OT*KZ-+SVxGhcQ~$qi2}j+Pvf3VB{i>)MngzP1&UrU=*Y zmsb}}m!PIdlMb?0^4Cw8f4dSbQuw+klVb?+)lVq&Xj@N9icQxQ&H9$_@uH`(QM7~n zl+l*EjkKYPKdQCU{I+9CF3WH2&dmDQVtgzO%s}U~z0S%+L^VvGOo`#w3M%u15TiNe zq4pe1)JvHHAO-)Gmb(rh{ZNYG=?kjIT5MRi-lg?-@qE2ZdvpTF$*HIjRibE4vV1knT?i{QB_Eiv2U5 zG&AwVkD%q=ScBpvM|rd_8w+*z%9Kjg>J-uX7PiskH%SJ2)GW-JuLK`yIEi?iMl_iI z+EXaG>iPcf^vnhQtaHMkw$_)P=e;y2F=cPU3RzUM3*RlQwV1Xz@-FV-`JD@o;obR1 za1GBMt-r>-=TS)E=PR8<`vLUCULW!8Ev>A_4=MOic{)(G5{@qhxb~Wp>Ipcg)nlXs zBguMtDH;lt+)U(^&@NyAyPLtRq8*5`KkN-o@s?cdufI!CG8YpRQ_~?8i!{bc7djv^ zo7ak3qPL~20n3wR8rN`ng2BiV;W0dcrAywgc%*e_QmqtMTCFGC@J{Bp)805s{6IX2 zNR;o)7TJ1jeG>;9bxk!iWj|!J^0OZXlYYMNfXF9{%+)og_@Gv`|7J)*AS^RgcHCb4 z3V~C>H9UA27FR7@GM46ZB5ak1Fwx8wk@S-W?Zw5aN2z-JsE4`VMNvQWoG&Niq?$dA zF-D|3AHO69wW#wWJ{u#%8!pZa*Hm;|howze$)=`mZ{9k60Ug-eg5sl@9-<(>Ek_QN z$_z&eOYO?3+4s<*fj;B@eLOx%n_*ea;nM%4|>?L~*DNf&*oDNKO_>?xS zTI@R}ER4n)UU>2XE`a<>gN&#cbSOa;8KlmSGtlw zk|W&-?L>ruCy<4=wgoC-yiw%7{Ac(q?FxzutLFFx%T{suEV>R+#wkkZK(yBzQLs!G z&4V9RY`+WFc8|u)_$F^^=p&@8aUaRaGuGPI|_Lx2m`l$CV4v_J|oHuSqcv@)lm`_Ay_te)T-@VCU|!!?WT46hJrQ zXLOB(o3E<}m;2)Bwo%MG?o}M+Lnd*lJ2jIXb2v_O&S~Xsk*ds|3fJtPyFKE>qMKtA z_mqgHQ>`=T(eJXm*=~nFfX5A6g9}nVlU7V1lCofWW5o`ggUVj8$IN&`2*UtnfP)>b@BglJ#Oa=Hb z=9AJ`k4TD@yx6<2R8roOp#}j50^VWfLU3yW-q7ef$edUoioPnA^85#lLr5+n-f=AL zzNgGsK0I;w*|fXs)VnJvslu3hftRQFJ9WELS`iO~(7PIP@AAGMtuD-`tv&4GQ?~JV zvgqNb-zBPeNpb>S+{Jeg&yW@H`(A6AVsh~`lQ5oP7A#k)vjbi{Zp#C_M~4dKK-;b2 z(7v0kqx2ky-)>=X@H28xejnjEBuv=l*>j7$y(%W2XXgAgNxmgAd6_|Gjx>#bda$@RKRm@;wEcP!>z`ouJ)5GY2lf6%u%l6F&ZsM?CwM0P#?;x#u}hP&a$f z%iv>?<8t*?7uv(Q$6X($LeDLlGsb*}N3!4}gYgj6978Yqn99h-&mCZFeQiQ?wDzv0DGA>MN?bjeq-(3u zAxsU;=T(KLRm+_&hel&EKpM_TkZ;nVp0-l%OyQ!@k)hH39nNs4a=zzK{waJk|5QS> zDNZe=fs*-#{o+&Q!zlOI*^32x{Ek%LByvXq+b4w)$+R48{m4LKSn5TbvcPZ-wPRE1 zSP{1Y;31ym+hIA5ViJ>+N@ViLI|uo;WON{Pxa>s|;el$G}Ri2cgsYAf# zv!aE^Ltgger1$9av(YK+I*#e~iB{nZl|G0=uet+_w+m=lZFP$9mTh_e5D=;y%xtb z&N=nTlFzsoq&ftA&?wxP$6o95%HG(=4rKBt82N*n_^}wH1*WBfCR5gdcyBmpthrmjgJ%@+ zeADfh>m^R7lwrw03uH+coP&`8o&X_Fhr}a;`sAPZKuABnlV(0A&J!jPuoq>Xjxu9Q zThz8UaU&RKM}1(dGUe~FzmcjP+656!fjNzTPJif1$DOw z#TkxLcsHO6DrTdVQ;6E$(*`V!^i+cZ?~@j-hC#NTL!5iGbXc9S<_+}N_(P2%T~|>s z$@^;}F6o-4pW|$v{iia4@e>ry{FRVE$||xFG%t-NI21*-+CT+VoG8Bjixcf7I zN34er=__xKtYGI|a?e@Vhtrp925C zq2m8ULI2v#_qR~U{}}lH?F;`%LH}MT`g=_I?{Vbc(;dGz>-<}?5);GkQc6A97teZ<5gGIKYg_bMR^$mKzF)j+YQOZ)*wx)+rS$|wE9nGh!qo_Hf>QO zESg&P)B5t^snB%qD`O_6FyEdyr^2f#!fzu{36Tffmpfb5+TA^IjSn+}K&dC$V{DF# zU(^WNEE=L~X-ZVos7P+R7sA~v1`@VHa7kc)bSAYj*O%Z%k-99qx=gRC^!v}eWVLOW z{ZLatdJ8jktE7At4EoVPk-sH~yZJC*((@*A7ownMai256FF4MMt{=}~j{gdHfQ(je zhoF#DtH@4R>FtSI2aLKH1|f=*Q&m7A&4F9}l|Ntp{YnQ8^*k+Yz?N{;SAc+;UR(t5 zMW=wN+I>L77ra?xndd&B(OF8kI##7NANGBB8&beatB7V=6;?}zI@)D5dnq>=OLM)n z;GLT_H*YbdF$|i#QsG~jv5qP`Y@8<~6_#zzvD61u zzc#AtXrW{?!MgC~PA3bbMOf|GE)a)<`ZbRm*18HqV?(`276wk*%9HiobqzqG!n(v# z(RE&!Jt`<7LW#b)^P@27Hj_- z+#=Hb^~J?W1iyg^z@AeFDUidlmDvm&M{NTeI&@>Xr}A~1hk}zjsYG=u2q!e?R7|{p zD<7;TL;WgAejg106%rq`Padd*M>PTbF32-IogxrYLyp3v&w)KP4#@yj>{HCtZ;(#?b?!F-H8WTWf?QuXNgc%7ZK)?Yf1>NmxLpG0!mB&+pFmgd z&Xe!fkQd{sw38&+&(59#BGnDvUQ0Z8&~&&6{#BBK3?YjzKYtxH{$j2xiF3RT0GJyeHO=QP$fdMg%=lH+o8g z2+W78q>+)PMLtUzOsitx5Zr_)pXd5Q0M7zeXTCg*G8%9uso9b=xT%+jGpb43C~WLf z7Wh8FEI3wrEG9UYaxUYpXZ8RYXmH)$UCR^!wl<11^hunXbaoJOz?~?{X}{2G3xavmOB`hEJlUR>Ux2d zE~8j-pgA9=p$A61v>SM5=rkB=O;!L?Vi#d=7XT+^SSDO8a)Odk81m`>j$8plMhK2d zVYwS6+IMwoWJ1${2)wGTeje$%vnz$jA<09zlBdBtdHwR6H+Xn<5wmV2x?J=1kFzuo zoHa_0oQUp6H*dNxV6(DPIg!PdWZ!okXYQ1sYGctIl@Id;P(skq84IdWKs9hMm7D#9 z^-=3O29Vw23^5e9dO%944$5|>r+GjS_A1&Sn^TMHpufgc6_CrU?tt>6Nh6`_H?m8F zj`%InLer5;LkMtAZLWUsezB&q18*3W=k-4;S3o7H~n| znt7PP%(6a7h}*`?EyDL(Ao4cPh7%v>z)d5E2To=8H8{C}eNCc|Fa9M|3?*#c!cCkj z`+@Gk5{x&m-an1@`N5m!MvwOp`X`P;tlk#yiSX3Z7EaK!Zi75H-@w)oGN+gq26?Wr{^If<9hBMcn zu`o_$D84ST$>D5#);P(RZv%D<+*5GolP=@CAO&YV@H=n%|`=gt$Bz{ucL>s zUR-48(@C2~Jz|I^@{#^+@eIZIienRfeI1S=`?~4g302$Qmdb$5Hz)L6r)9$rT8Dsm z>QkQpPT}Pz2LUXSJx2crW-P-e41m9U+^6e|{mG)=+ zft!kne*)3|U}2${7@7Yb5M*NbJ!0_RK(s%I*gv^hCWb%CSN<0e?RO8(pDv)^pxEyS z?VlIP{ClvB@%Q-1f50-vKT^j33YJZ1NX6_8!h#(%cNs)m=(Ux593z4Qn(VS#^@#yl zeoA<(X^rVq4&>_K z&)oE0>UcfTLf(`jt`EC#B_C?w{=hLK?3!fE(D*VE1gB^j; zF68;z(sK7}NoiLsZ?#K3EL-p{>aBXO>KCGLiDPzmS2HQ{(@jSC0eZ}(gXJ1S{Dp+M zwXL6Qhe@!-lKpvEvhHgGLJ`qH;XG=p_}_*3P~4k-IgW=J$qptp!h;f|ln%I9X(MRy zPj4<_=e_UGC=Y8#R~j(5OnK7@RILEmH4(`nVTAIMp7Uv`?$P*@R6uW%pdL8iRb_5Z z#exNW3TXS(to0%3t+Bcl-mIU>7zVqHOWvCG zprj$DMXoDYSYF1i{nj}Pg(QvPMYmeaSKzWlC4zr$L)}nK=Ir^_uD~|D!n7LW$NiWs z8^{Fty4x26pX0LWg&eErrIa_qy2DZdjFiI-w);{s=mk8c#+$7D{)Rh zI0uc(E*uPdYz|f`uBP8R8csMaq`1=)N7D&fw)wUP@MWNdKthygHwe15c4`0`Ayeo8 z6(RB0!Y0nX+fV^pv1@-_CXST*=|fzZmg%r*=<2qqBPiDs1|=5Z)fuBrgi>4!m}m$C zg08*$xE*i|tgpi0@PUfhP<7YUkF35&}4XechAhzO8I8XqqGg6vk*4X7%&ojL6y5Gkw&cW-|+v2UBN%v+B z2;c?fQ;e5rVT<(u&v^kk8q>R2EBLzFBd5q9`5tLHT#GS_ikoNh}TuyFG?P=((E8L%+TClRjnLTdC1IK&9 z{X%1je#5WG5YKnkP-sB>|LY zD3M+Rez}Cuea>P`gYza_CqAhZvF4YD4b?ctPcNS2DO+lj@sgkgPe+iYXNY$G!lK(3 zPASHYBE(n@C5BuL0B}N7N#8V72}Ds_#W|`fU%ExRY*y`8?1;qSD+KuxKpZk&WUx*7 zHho}DqY&u_QO&x4YRr%AJ}*bLcASV2?F~&1|9XmR#)+slb?fP<*toAON8bn^k|qgY zgFujzfAXl4pgp!sJ6SkyZxr=NROkkhx!JQ0&D@3Sk+pG z6`td;AW0E!d7x9XE*1+tfgQ!(Wy%w{;Wl$+9g^OJYe5Cx^qoMMv@oU18)9)*n?m|b zqt)PQV_Um@iSH{c0qB5QbHgS96EgMz?qskb>pBY0W zplhFLJ*YOW^trL@s(~ZF>aonX*wkg&nNFTocz%?u@6f=26PH|_m$(Laz)|8(Qeu~7 zqj6d@01gI@zhy}&aEwf8n;=s1G)=wYG4oC;ZifHt3Sk3Lc0$IbaL68mgj=gw(inN9 zx2(*`6m+)I`wH8z{t5ZB-nK5+)O9^N-3Fe+s9qG#I-OoFLWSCFPR(pCc?Rq(k}a`1 znvFPL{!?xS4E18nOzDQ-a+h^Us16{b?I22TZtSMFr1k_#WtO2#&m@eB8ndsuzkwzO z(^n+~S)H~sZGGPO^NrQJC8VY(_fcfKpE`v@p}dEe^xj%Y<=MVS-@(ApJ?WQF9)t+y z9Dp`tVN}Es)Sk20&#R7^ZNIni7ybH z5UcGJ?8??w2FBI~;4@PtBo9c=*U(#I9zkKu5^3D6YG;X;K+{J%@RIz1vX)rJ>8^G- zMxxon!_&9w_#65#4CSES7&Kv`A5zW)f){)$WfDxfp|Y2{>M z{5@;&&q3N>QOO=3O2n=Q3a!>LZtRT0JPm0$b{j=i&Qb#0t{t*LZ8~8WT4Ju62Y-z)ar`T?r zXtzPhTqliu3ixe(HM@7kHV-Xwi`YIN`-eHUAA=I4g|YMm6}3w4X~xGw12z3>$hgsC zIxRcp#Y?E}T5kZhU=HaXbk=t@+US-n$TXVbwB;kiwT%ZhO2nBwdoo%qv|Z(=Cv7d# z#Qa>VUX?pX#0%=xg}10Q@19EINl_OWwWr~d(!aLxk@fbD(Hn zJ`27PQ>(Mfiw7Npx1^VJLm3Kx zy9+Ikxn61|JFT{3Cq>(zU8Z|YJwNW-9EKdJKTGQQ>WvyXVM~@rOlS`(_^dgXD>l9} zz>`hIcYM)SfjS`mo~~jGTR0k?V+Bp}@NA)_3d)AjM|DVPg{6*mF0W+bb1Y~%WowgT znFR$c+$s*^7L{$Dhr}BZG&!rGG|o0v^vG&mC~mk)4Hkk8>uT5Bq({-Dsq6?0U1oJ+ zp{tAw&A)>*n!%)d0i`KatzI8|vTk<%p?J=Bd$fm5Q$wlx9nvZsM~$cBd9rbDi3D=) za2+P$Gu(QOmjPZrcjvIvrlatE&1Il7$iTrN4Vzfecq-+&2AUW;iGxswP0NhjU>^@p zb6S~bLc0oc(B-Ed#%i0FMHVn<9bu$$y4YoNOf%mciN`y!ls(Rc14dYJ`GtI;Im@bb zhnAtK#t)r&nI6zQIo6nO&_Bi~kk{W_Xbwl8-Pf+$g$NBmdC();bUNCzuGA{@P$^j- zo)qMQ@jhR^aDu{8DraijzXSzS)`i~m7--Tg-22RuP;1BY8N?maD#B|deyuT6AAr6C zM5RTnZx*IF_P5nMVU)PBz$FPM4KUxz%p`+wm_}8vH$QpTh}r_AaS!fnl-IzretZS> z)B%z;9op@8N#ZU?g=u~Z+9kWoUL$11UFnSsslBgJ~qmfj8jl`F{68#6HTy<5!a+IR~{ zaNZTw>39L~sm5Q}_6FK;mnGQFg9fEc9d>;a!xQs}fCMfLAaxMK#y$6)&pa6d)P>s$ zod!>(fcCjF=~LvtOT~fZg)&3c13Ebr(Rvb*u%^UU#+a}KL}q}bCrd3(LR$$wcMxGd z2YB6bhPDhGuk4m?Qy#8ge050l^8=~t5o@lDa6u`aPMx0!F=wEzmec{PbD8$Nsh6p= z)8M8?N_a?Dj`L-d9YF?7z;`lQ>(Rdk340} zeZXXfl(PNIkm7gSYkU!=-RaFaV7wIIq&r~5K&JoIx~mm$i?P3l!3;wHF7wbn;^(3| zOF2+W`GoHnA;;33nI8{H%8$&w=`$+$7D<=#zf=gM4) zI4re%o4p%~Z~4?Cn6)Q?Fdm>r2(I8NB4xv^KyW|ga0v?lld9r1^WhL_?36`kzb*h1 zjun|NjlR~6fEXwU6Cb#MSQKF_V+I*8@=x}k9DtYuM;hg>{`3}zUi30NumA%Oe*IX5 zwrt)zsYXvp7dlqh)q`kvmwdB zoiP*X5MasxdkUG^ibd-BbQk)Q+pW6(9R$upN~{2a1own8StUGxkH1pFcFqC z`f93~y=h=pcGZ`j-G^|Zg9LkP7{n>iTozW3-_8p52!Vt}jEaTzK{?`P9q*x9>Q#k$ zTIeZ!K$|1QmDmCGzQ$3Z+MTs8_2L=SN{OToGm`)V*%ZqTl)V1TXpuuk_1z!z>Y2c) zOt4d~VQSTI^T&gpqzAvY0o8rGZXnpg_i+(&1 z0BKF=*~RG&|(!b2y<K;mCl59*pzbcU+zTLw6|paLzl@k78z*JZV?terB2E!$@Nwl8lu1KeQS zu}c&a>qZo&?VVmf73v2pl9YcG6QO~~GK8U|XC$nYjvkB=aim~SBx4cqVD%fzpkf^6 zeUxx6eotygIHTeHE>6NGkK7B_kX@e-)^H-su>VgC4TkYZnqf?0sp}LRDk1xMTo7(Y z@0Y~lB&^Z#`2OV|Eg~9vOUa$d_abJ-JsJ>Tq@Y!0> z-r`U}fg%rBVI8}}5AF49cPt44 zZaHXHNsR02hcCDO_szP(h===Df#{x+Jf&EgNa}Z2zb{p9n80l)v z)Uc4n+!GHKPsRPx`|rxnxupVYwXW!IiA-;b#soRRKunV#sD5Q{>S#J&)|lSOs*XHm zz6L38JC2>$9M+MqdNJP1M8u|(%q$4gLKXt1@prOwc4gZ^N^UqIBpfDxNDid5N6slo zKJ0Rhp!72xR&)$?MS2Y#FAh2AXd}f7VSWT}+~q9W*T{ZKi9lOVUV>9Hm1VrPYsl$! zqhr4Blf4;(y*xs30|$*@X0RR}w6(PYj$S^aij@>_6Vh~l4jg|vC5V2@;SQD47fT_v z%uFi2FDua!tvl3;(0>N_EdAWhbSWOMLVZcMF}l0(8aqPem}t_VICIkqHnQzW-$WdK z+It?oYo=C~I)eJ#7AnKA>bss2NiQ}kP}mON<)7mTUi>Fh#ed|(nAkc0-mAmJ_%8!u ze}mlrd8%Oi{}vEqV*DF>{AU347ybMjj{kG2_P4lIbmA=1KYd(zMjH%`=>zbc+Tm~9 z9^1INvKgNS>f97WL>0!-%0tx^$44LDESx`fg6T~dBcN2rRk6RfmVLnR9jgXAW}6Dq z3QhW7>Suyx6TdKg1zQzVt#53o7Fo|tzQFHI^-D&l-kms_JK4B2G%`(8A)JD1lmniz za;~ykk6#db6j7cXoh`GG8vEL~GJ176f4OzTBb!g-n2r5x9jgPGK?(|Ov3?>JP<3F; zA?JuzA43xBwiAF9RB7|^p#=EA3{ZUt?-QC?Upnm-i3v3$RK;$e$gGmF?m>SbV5nrkA7kaJs)?2~+T z=(T~9kPo-zUcUsS_P66C9GkW1yLBGVZ+N88AN-d&IGdjuuPMl14-sRAo>bD?oiPOS z@!LPu3}`c$mGIXGXAG)a|dLX}DJ>7hy!IZD^g-+bKF0`EnOmf1&>9 zSxIRg4N%2A#Z>pv$OPe@(NcknI?DLMbZlIzRnh}B22r8iQ($xy*dS}?*PsQ`u(IH& z_2{v*fM+Zx3F{vLLy<)D~hG< zqZuTMr2Q4)^8Wty2g8W}*yAPL@ZQVCi3>`n2k$)C#ikEP5C#_L87S;s-w#(?zO{|0 z_S4nVs_f2JCuBN}Eww5e{?|TR-Y-t5h>x}(&EB4uHmJ|mg(SVkuwhg3m0fyWjP*Xe zC6cnuU%c52E9Fd=R#$X@X;V~T3mt0pNg1TW2NHtkVP*Qhk7gv6Dyk9+I2^U{iV^O^ zm5e}e`bMxN?={ec83WL}eKXqO?6=PI#nL5I!+S_tYZhc-2|Q(q%`XoFLa^dKdo$uo zR;HJMye%cVc)3P7u?a_=WGu;7{I$s)Nl5%66pMU^X=yhHq^vHTLLu41ZfMR%tO8r& zM}vQUW#}M{S!R4|0jv%?h+m)rsN)_>44sQaHo;BY79bfK=GWdH%Ji+-cs-mngf1SP zC%6Eh7l%;QQzSp0X2w97uwaoR_*!cH0Zoi={&D&AmYke$Mav^dXV~mv&pt;txidwp zjawnlJw;MgfS+g*a33RIkv2Ki`%_W1Uye`M*(%K5T#RYo5`16x5Q@eF1ISy9mqr^K zNRjaI+$KLJcHEAdl~9uD_znqNlEST0*q(&oBAY06PWfmlv+46AV{Xs*!zm+X-IOf{ z3iwMv7va|&iAnW&sN(OAl0&NK(@`|=IF?MHb$z=rawM{@VHVYx+wEBsQShc+DS4?M zU(4Uou|n6W&+TFT4Yh#_R2QW4mz`ga7ZSeENO_-*c-OmZu9m9DQ4O3FWDI*Dnth_) zwG6d|;|UNmrZI$f244Mg1>$PE2=2pSg`SYmei&f3eL8^Wa1c$FJrL>5!JThmVc^XI zh?jlQEbqV@{{gqNi-qob@STtXThyECgR@z#AkK7#317={y;0pyeb4w^T70WWqyHKL z-vL?HE_>jq*>@Q>uEgwE2-R@YDwH4)Ku|4cWR9d0a&kv+9jA*t+j5_e+R`G_vKomOo3B#?Vf0(2d_b~iF23x$ za&t~7IR5lB25i$ZVebUE!SXd|xYuu+A$xWvo@WB=+9o3s0S^@RRX_wK5=g;El)ej$ zrh19`9}yJWMpZ*sY=KNgk zv{IY3-l_m$xu1|>cO`=@G7HOOwWACS_lLw6wx^R=eRz$pYJtM2iP*O zdA(LQFAsee59g+(?9xbt2@@9#=I7y6aRiapU6&soubgoahWeq(IL@^JxHrUR6_FI8 z&8ZDL*5jyAt+Z0a+)hR7OsMZ|#w@jV@vKi3qanqO69B7yP6ZXZ%2c(4tEL^Gz%n8Ac!SF(d^ ztzRzDJ5fCm-h3%Qqws*~2hFGt5+UePE@hi`o2{T;skcPpV@1L^p(E6QDROOS*3`)7 zqGB4S+8i9-K?!C}1?;?GL*bCdzW0|6KOpqj)%TRMYFI}IXiJL=&9&HN=QVlckdo}f zsKBpVo*Jzrl{hIa$D)TLn!5pfl_1`s=U@pU7!@DSg7`&ORm}#TE}Mk`7+*c>#LUZQ zQ#sjocrv}dp50M}zvfMR!&!wwd=f)#*z>c}@pdl>xyeMIesZ`PHffkM8g6L(QRfHJ z9XfMq9QP5^|0_B@J1^v_fZ7Q&$+zjv^E!uDi8_s~ETFZ|K%92ic3#8W2d@V-GiI#bW>}2{GZWT2*f53yy$CLdI#*cmu@tQWc$!g(uh)dh0u}=hc9HLdq~oM}F;Vib`j! zcG)Jo9iAOVBQH|`RPTBMqa#e5iASY*TyWOADGj%l9@?XpY=J!l(?c3Q!9!^MZBkS0=Zl~ z!h0c1jrRH2#xJ6!;X#d#*L`CuMw=q=uskPEtZ?jV_txp2TuIr2t24Dbg{i zEQ@59!38bWj=w-c=$WcZUhCTv5m>%WYKDPiE7cYxIZ`QQ5r5lzecL;HX+74De{#qE zArU|^GO)7#M}a64)9(Yve{;wFk;D7V5C`yT$jQ{slpwd~O|tc1?(oSK5${cc4~pTs zxV(bz)H(Cdr+IO}jTs*>s9DlEgNC37euAb)wod~|!yi6#Q0nmT{5+Y6lB5&fmqOLu zu%(^%zWzb}E>Gjr_IW)aQ%*L=bgM#=N=7fUc*q-|{ZF^*@5co>hC-6dqoFYZ} zMxd1R;OH{p&q)$(&KJ$s|!}Q;Us;uYqxo;sG`sIO&axpyy52D^#ZTAGjiZ zrR@9isUBZS=3hxKfRaZFbHXqt)nDR9X+GsHKb=8`2$CMR*TF%EE^kjv;=u3W>??KS2#qd3<}b?_6;q z&6r6)hE8<#(TzNB-RUT9DXE>b*oK)lmBhnEHmFc~s?*iv*%SJ}1@DE4*FGOnH-NppWjz#9v{gTMj5C*m|0v zz=jSdiQWLc(Uu}!x%`bLS{82ic1UXv_+elvwnjh+ng#O%pn$;j(wRF2TL?hqsNY=y zCGJ@Qhr2A=O>re|`N%g^q)RYU4F47C`jDL1MKiI~VYM0#B*>Ls zJkA@k769g-593yiKs;@fn2M4mY<@h9h4;_uHQ>ZozRp#7Js9Cf4RPIP0?2ZQf}l$? z33gkV`;dktvhCl?j@`=Ujct(2bU^S9%_ykiKI7a-M_Xgzi9+UjMW#1I&v))=|P6~H@=VS{PU zwfX3FhU-SyjcJN`3G4mLL8eQ-m1>{EDhBu{Qj>H}e)8SqC6DDE7=|7~ zH4X8aDF^a^;kItvI2o1P9)T3}8#PqGnRS*_R@|zyAu1S@?c9+_PKf z5f=Et1snl3)&fak`9Ma=5(!@5`X8jXhknlMn#|jg2H~= zsWHgN-vVZiY;hoo@K$3(MNAJ+`kFiE(RC}#!*9Fw?(@j@?w;ZJw`=N=8f}k$ST^GH zx|uay6b_-y(pAbx zZC~`zhCJ{p2}{nH+YoD~M;qm-me$a23}sviHc?hwvz^8MgxZK<5*<;U}<4 z=+AUi?t2_MbZt$fs@eBW%I0Sq2yjGO09GW%V+u?RuYzhZI)@FBI1TPN2pByG=Na(7DK zFadhkE}w;kq6lRN(C#p0T3d$>Hei!f?Wy~RtyMHsOi6R*p*d2t6lt|R)L1{5SQnpz z%1BqrP6uvS+q-}5_`2nXFc8uv4+0V$zLL#^I|f*9Jsck;lFHBw<-es-<>_#pF8;pQ z$1dfaBr=rAg=`JA6k!`;3vtXDl)X*j_2>wLEMA1k+-@1=&FMGkxQ^99YF9M|3{pQn zNbUUv*(!Y!kAf&V1&8h)!bwO zFrEfxIBqj$%yBgn02BnEU*D_v9sw#K_oiw>_9x9*#>|6e9^UPt*~28pUdD}{DhJSZ zRqxKdsIiO^BA!>hD#jvgR+9v1 z>#H-l2RIAq+ui1Hbkso0?LA+sleeBcQAOaSs+m)-ur;0P!NN?P{!1Noe2jgQt~{^% z?lfLkbg{0Kfmvnk1(?Y*va;wFAL79F6L1T9o+Xj7W*)l5 zQD{W88c;?y9~BB=xt(Q${8BUBFXNbTLOekDenzCC$AKVcyJ>W&7e$;#Me<166BVVP zIjW?83N+DYU3}r!i_fiES8try`jqU_wgr(U?kJP|}aklwbiCy8lu! zfnCu;=Or1NPIcHj7z5I}T;$U`Goes21pzfKaaD&M|S_v?7kR^`Z<%OsXkJR5|(hg5zqu`*wAK%^QWaY@zSWAaJ&0 z0&Dzq4L3KR)#ropo6}~>vlUhM1?BSU_reh0$_`2_)zM7J(Ex3AQ@7QdzB+>hX zvOOoWRT0~Jv5QF_v-9)SP5Rp`gNR1=%C+f(Y~YtnS=Bld65E?^~0%pqXTQi zLVdJ0a|5~xVvOqlcsl?3XZs6*#>)2RY{&e&yZ@gb&cFV1|1*Ea{6`t!|NLSU);|w0(2M<@Z~He7;qO<%-w)v*`HX*g2+?)^B4Q02`iO3j!81z5{aoaNF`Fej zovswL*28G!qhP{?o(~}&wXDM$ts=Q%<0o>WWzHr-gg`($p3qlWT7IzzL9;XX` zd9(2T*byT|&ofXQKtHW0(zN9RjSq6Tc7I=u$WV`}G#*qY!HJrQDW38q$`B7Dl}al3 z%OJPox7-jB3$D8k`6M})5|rn-VFj^7CxTM118Xp3`ei@$epiq^sCZ7JNFo4&1wsQ& zw4{e=FFs6Ko2E#QM_Dy#UTN+^#yoRy3x1L*J8oSnu9`!pvxz4Bi<6?C@od^d>H4Gb zYU)(L*C~qK=42)ldm*{@h2kU0vCqw)h$HG*q4v`;KaZU{1-}iZ-77O)qY%W5M$wLv zL?m(rJLRVH^=n|gbSu>(L6wCDR0T(KcFf_Nc&;Osts_k)-gnMMTjF>rKXmfmIjKBY z0e+PXxSvG4Y^y!KAC%y9@DE4WgSbKbsTpi0VvM^#UfYT6AjJB?`AjoRvBPb*rqKPtztQ(nOzV8)$_=-+ zUvtpgZ)Gl+6lH|?m@Y}nTuHEc=JBRNut>5#EWfIzb=>&QDBb(<~~Et(!*Ml0UPIRszo1IE?+_{v%#q(fr$2OcwopA z-vU}^Vj15uAd5;%g!-bC-X`#p?3rSQ>oHp(qWQrKd6&@iya=50E+C}3!ZlsqEwft| zCy1VCVZA)Z`Go6i?2E>{?AF+tqRdA*0*fdg7MEw6?0_UuB>a5-%+m@q&wb<{2a%*e zgU3bT-9;{Op>#fyyP2rh;U_d=z7*~YS@Y-l6$;eJK0CM}(KnB8IlQcH@!h9_yJ%Ci zgLrcN^9ATdNAE>p0~pV%yuAp6t4K&wOA?kZHY~5%)Ofyq&bw}~ zjn$mQk9OQ}!#Lh}3XSYar$@T59s+Yt+B`iOKdXKfFeu6duY&V#o9Uq2QbDC+k_wuY zMfZ1TvFx@p{wjCDNmPd)8{VA5YGudwtYy<$R=bf`056Z}TJnaCdhKr*L<7hr-?6-QC@_Smw^1?%L@)+dVs*Y(fIz zj}t`^8)9=c!nHI$Sypm$2ZLH zoxdPX7=&-DVYv5mY6C2x_UbqT*|d}34^|2NSKlhPnCH|+^uXuaiBYo&B# z@Dq_hExBk1hCW#iio^#dxrSO#M7O}xEXDe0u|~ZJnHk+NdW7EL5y4kC*K#Mz@NCI_ z$8sxE+j?Y;y7D8JA18*gpnY(n^j>NdA70l}`z(ZOoD>RcmsCr$IJ4IlV;<1aX>E^v zy$&8rAg3uDKyXJge6#Dcg26M-qN~=;pf1zZ{Ng!VTfZRq=Ck?ugGE9qyIXJBPH%kx z69RNw{^HU5Cjx(ZS8RWq>&Ezrkbg(Q|Ki#ECk6io#!P=ot^b2(@9&P%|9dcIBw+qS z`u!Kd__L?@_y59wJd>IK#b8Ohnq<^&6C&7`#%ENor2uSG2l1FWU`QZCMy`Mf((nwO zGyOX7l|n0h!{L~{^Sh5F{B%kc`9w>%M)0p-yvao2QSM&%axO^TC$y3mjOq|2;jQ=E z=dDGJo8I)x8H|EycNb>Z#hI*pboFuvzmkf4JYNY}@{F`0z9APLH+1M;hML=l{3M!= zF?Z{fD%U#mlm!;^$@q92UbK+aU478m4?mzMe2zQaRFbN;Yu9X^fNq)Sagsn;*l`-| z31Mb)k<5T7k!rf1m0ZVZt=(M&JXJucB|u9O2g9zV`eMmRBnc(I$H8>%;BYotc;StS zr`sBknKfccE+BYJ7ykxl+3C z-ij4zE=X8O+CsMUGK{N7;5mLnOE*H6g5GW_!ECCT^5w1QTq>`NA2QW7+Ofq50S$VwBsa(EFBJ7rGNrNsS9RkSJqF%9o}OW&9F^dF+6P&3JH0-_m^z=eC7GA7p95`BmY)PUN6bHbu-ldnQ17Esfh2)UEjV0 zOA)*F_ts{)xEfI-=MuhQ!cEw881@s{TDS@h`e6sOpmF4Le)}<3)EmYP*H(Vveu=Sw z{Va(@B2&Hyk>Kc% zGyb&UO@v|=H+}&MJ1#v~k?O`G^ZoTKv2?0N{tTvY}{P+=+2#$$6CGKr(x zy&rWKTL}I#jE~s%s1IzTBH*(GU|;`v!EZQ8h~jcLUB6#hj%!92qUI3K!m4%#2{JXNVLNs)*}#U_$oU)F|oINkUo;s zrg5T>K}0|dxKtKcVFv2MNB8!)VqZE){16zV9&@)ClO&*DbiYG!lQ*CWThG@>l?RL3 zy3-MkNB~)IEU<~nOT0n=x!M{6*Pj$*$Om)cw*HvW@%)JNGoen`xvqRjI_V+|A8}0IUw* znh{B74~}6yz?+G`n4?CD19M;ZjpgADa{N#)_#zKTTqDxBa*!} z`Lu=+7xMhIAs-Jw$=26R6YIjusm~D4v$%Y5-fWjkBcx4X5cEq>%TymPG>mU^K;B)- zDWw*y8-+}IT8Ny-%yWpqVj#dV0FnwC{$=_;FALH1bNnmEDCp?eJU-}N&ZgUmjcD~$ zy&EoCCp?YR7GRhfqXk@vUb2a}ya=S3 zHr|@Ojo^qMbZ*N}sfD1;fZeC`zJ-aYb=9uZ z>B?UW*x7BNl*JjXj{3t8Uz_3Au21Aq3jt|7rX|Lh7OU4{y?<giyc5 zn3;a}*8l%g=h#r1@cIiOTVoR=OD}1IjMbq%yIHu&3cO{i|CtSEz0g26227Z z{+*?nJ9h(L$~sg1h;KVxYoW#|cUohMOo(FChUopw=--`x!coLC;rYmBy##v-(Wkg? z7I)2k4mV!8tFJZpkYG8W9*_EY+md$7Zq5eC8OuZ%`)&C(e6WpR;H841%*1%#d6e!6 z+hdf6Q7o}b;&L3(73}^j%dGKe<0_e5q?AIzDqV=YGUaQ%14IVb&pdo_!dx$i?THhG ztD)?B?J;%h?V%B0a$p)xrZ~GhOa|#iWWU;VON%;&@hym)yLo!j$oQA_fjH3D)y)Ds z^i*y2v{I6>S`Br~C&1fGlX%wb=4Rh~uwQhf;epV;&+1=&(5!3<>I}?IOx}571HTTl zN9mk@c7?zKO2nXXQ8An}$PqfSmA7Rc+=aL5R@kpv&RP4nd@GJd{m*a)oF=Tg@}~^D%q^yDVO`DfQ?`G?*rM&XD;Yg7&I zf9t)Mx;^6g)hZxxbk#f0Qc-ShXZQRvL*pzh|G9Xb@JR<`jbp;Y-MXy}W98;sNaJh6 z20-o3o$r{)owXgqD0?jt4{(#K_T@`XF=QBZG+!^Od*WObYUA)6UURfR)oP`=)>4=8 zd?Hty&eO?l2pvJl^~iM8)gb7U-dbg4xsiUL8J^GUy7Q`Yq$hW&UtkZxxLT?_1_>?9 zgwU}DQ%R)sVwnMXqKFNUBTD>q4e67$NGGe1sx%`+vM4#Fji5}#k1Z$gP}i{RTxoe0 z!nqiMRw_bw)f|*eI|NFOS~O2tQ#Zq7_=H&)g}P23zf=GpTXoGrn3J zeZfX)%goD&fE5TCddX&8asUMDtg^~`ymWFowxhPAQUmo_mPNMhuoK10?eEp41Y|H) zkNsASQ#tG!tZp7qvR1dzlWYWYFv{`YTh*c0`nST z#T=dKr8V&OZiH!%qo=7I=8~f;W-aCp)zQlIQk0#2F!%md4ci_qFp(-@A0CQhzrA4j zPSvO;v#HyOy_p)2QBeenHy;Ms>{IVm3|uAnjfRQC?fzY=fBJk?(VjGgDF!#_3|f%z zOY-Q7jTS+#c~+(Ep@zE5dSD>)9oLjRaG+r-3*v(!LsJ+p+IKykuL`D;{rUn;5U3#i zP28+(3gC!eKNCyGyQAGpVI*rNc-Q&x(uGz4MEnI-xNWw+%0ET#%Lyr1g~!W46iRJ> zY@qk7H)<8dPp$RFsb(6pSdiFO54P=7%u{gZ8xy2pVK^_OyS4CLgu_;dK5sW2bqWS9 zC%RNR;kEbBj_h}o78+g*jgdqN(jc%x%~?3+gaTWrACPg?sV@}mDS*}s)puYMFJFvs zY23?=t4^&SFcJ-Qwni1N0jup#Gd!PnzZYr$gc|Q>3`)oJNF12r+0w{ni>$ltrQYwD z=z)QXQ;VcwcI^yu)`hV30rdvo?RYJ(X2E4qyF3S&2iNhfPtj8w#W3gTA8#`+&+;07 z)n6Ffuh|rP0RS`|xtpG@>uPClu<|(>ZGN}_&Nh3@db;2%h|Fq3deHr06hl$i2QQK5 zy#*{jR`2=d_ubG6Zvs6ZK z)Cin-H}cU)W@Igj|6%K%w4yk(SDLHjvm$MDP!u`Kza?{kQc1)`-!*o--N?70PB;pAI6K&mH`W=Ez_JN}QYWcM}7bJy8yt$1D{%YyrGFYCO%FtwzA% z0rdeN^lVBx+ww4YVcLO75En?0+41e8Cn@Q)VNKcB1hn=SP@1@&+1 zMgKL6iurdO`wtLd`O|OvXCRXCDTl2MAcCExdH3US_!de(#6|c6C=lTSQL$EAJZ(L7 zk#cZ-XW#n{Z^hcltE@ebV;|gV@6S=)TGY~-sjKW{kK9YwY)IqTFNGy>7X8i_(=`EK`lfyYa=3nX^{g6u(iV7|ejA^Efrgl;O1YSR$GF z&ijXw^9dmydIth`>RNVU#K#%Pv=;S{%ibDifx`G|jT(f{1=}?jE8CAL73i+p+8#!M zPL92sr8!3yogY3sqAjFs8cihR9aT3-cLk1N?md$tvh1x_4JhPwM0K=FktAPtj?N#f z20(x(5_hS}+Sn^?yr31yh_mzV9dsLs6+MJ@^>_=lY>Zo0sK6u4Dx#s7lgafR4>+Fc z9XQ>6JQ_!p?(nFiQ;(+Zi#+q~7H3>Aw!_-!+?=o`XUUyCGLnxbTCrz7_ahrydEZ2W z_qHAWETM^XdzDML^sQ0z2XG=VOSLAGz8ktNvd-;Zx^A2dr(>oLjpYy`&33$&D9=fI z*;-lm>&RbI?uysUkz?}G^aBfS^I1_x4z&%92#iI6(6LW2mRHA}T6><0sP{G4MsHTd z)RN1Z7)P$i!OsQQyftp%xG)zelX7I}OvEbJfmQzaX^nq$skrFeJA2$=arBnUmC>5> zV8N5niKz@hH=BWS!*C*)aEWOMGQ3m|dfIz<7GM#yZ(>7-{0NHKuXyD2rXq zKGfl|&s9pDE=hIlb#-wp04`r~n)zzn^4f&3Semky1ae5t`uhg&6txK1o!fBR;^UAo zi5*sDC|l*HvH3lJd3A7yWgwKnRGOK`_^BSF(6&P7DgJt*ZMm$GnUef4Wtx z!5`@9wgxXsN@fDO1JX|-Z_@1p^u0;w-KXHE!)8XAcX=-$hfl!bz}Zhcy>jH0LW{|r zS5S_VDlS+q-(ZW3CSh_5lYVP`GQ)0aI1&KGBMe&hvN}RvTRTL4#(8;eM)AcY)JqTA z%1zdTZ#)rB3{&aZ&=08Evr7F(h`Ccs+evAx z^hCojabxJb-^_+xuyAkv?sDpp)f-eGaC4NmFhz&yX9zH$gH#9ne-_y6a35ZJ8eV~O zzxN=Hp&f+J(x9P1fYu4gwr+o^Pv1Pq^>O@$N;@_Pt5o2Spm_*hat2)tu#HT1p^2Dn z6aoO@3u`Lgrxj|MOQ52(&;}=4A~(`@0XFotDL5AdB#>K`&FuqjK2~clYZlrFT8aO5 z>VE5cksmpGTDG0rBqvo@kEn`VQUnM{4iW$|dwEPdMTE)Kh=)R>@#lsuQwE`3KF3+i zo{R&ffF;n{m-qUna(A40tmASBBK2^8B`bEp0Yu34L->#wEHrN*0oOwbyG6JLBtWXy zOv&Un%oe$`&n`v;4n~U&Hvv|U-CLqILq@4HOu>{Fi%F8vB>f{wdRU*RAD)AmHIEIS z-9!uRXq_56xnKbzWnERiJXyAi9{j7t28yCkOJmsfnhrvbHNkgnP}*7iBW$R2XwfCq zfBQOudGXJoCHn;p)3Qk~M$c%_$EC2?V}H4w$qY74LM-KrQT8F~6yxl8w$$vcRdTi2Rs zZrAXVwm!j{%PSo^2M;boj$=1VG;YNQH&O&0_ z@*O`(nfki=DMa`Xg+9NX3u7|6^I03!n9Nv^qSNcXr=6}?jc4P}-&=SXk z@QP{V$UuWh&+|S#NY>I<59mSU0R-LL#QLWOIMy83^3>e{V&35torY0vsvdCW-aXbrcdO$~#?(jpe#h{*q+XFs zjEetqA>7dHRUQoZIHQC~*~~^=kPwnHt)m{c37F%gqb4C(fi<$sXEWDYGG+GUa|%FQ z5Bc$7G>V#imZG^hNxe6~F$2j4KK3yh;{78=7y}hgWpqX8BkLz_h* zys^Eqz!0P%Qa=swaOk^4!KSsR+w;NhdfEq&n}Gt@Ka$iR!0?CEL(k0ccar*Da``Vv z>d$h+?X{EQ7z$RCl!Ipn(O&cLP^DO}A>>hk@Ww0$vlmd;d% ztwsI>P3u4O^aY02AmL>sAN_gzc>d%VolYoO$-8tWd_GZFH7%>%sP4(>ejT08i8}>` zh_5#EYny$7MePJlqQ;<3W0C02yqC)v7oO$%WqX-@iu&fY=`Pxe`z4y+z1A$+d}NVP z;1U(3Isqp76}n_z{bP8o`A?T0s|(}R+R7ufPWy63Q`VRmn;Hr?-{O?hUgSK7T5>CS zdl zQ$O7oE=t80Hq$*lpPu&o_pmP{Sq!UfY5aq|Vv;D4~ylR>}llgIax)TQcsp|GCRSc3D4#S9;d<1$saE<*hFq0b|1~b?`(Z=gPoT#tTApQSiEaR+PP|M?$`n%pKx5aG{6gR7u~Jd_ADq_5p_^q?(36_N#dH4%fd)+2x&u zYY1Wz&6&lV1^L)O+k%B+7(*&qM8hX<(u0EdTx*uw*U0&7Q{i>>j)0KFv=Q>=2$M{F z6=ks@k0qKg>P}%8H$Z@&A@)_pUkM8)KM)tz&cRn$T`y=~{RT!V1XhX%m2OLwi<82mhhVc(^nG7;0)lV572)nc_P7>HnF;`7 z%1-mnN-&1wxU!KATW={e?BQ*DZ@4N1cJ@9i1jSrvAhz7(d>c6r)rjaEz6mgperaRY z4@&M@aK(AwgL{D3Y=6*)ScS+rx$#?2SQy5V_$zoi#r^azHAwLvYw5T zumx-ZnD^h|bg4Q#5ivxP-z_OOIPb?f)9s~@@AI6ty6jeb}Ke59cdJJ*ml`|;Y#zEqM4&|}e+i6nw*LIv4TZ+Ex(C$omIM-?94|sf$I6(*+ z5n9jH6q+g&Ol{hQWPbwAaATi`_ehWC_nv|W52e^{v~z1O^F?5#4)8!%)_K5&gGBJ- zbbqF|Ur@it1_M!Qr`yo-f*&VP-WZRnvawEFv_a3g@$T}Wsu%iFrQ4Z+){KT-mD#x? z3a_MkMoG3y23ym#bSgF8zfE?$G!;T1p>zY_8hxoM*x$pBB2Q2?cgHZa}MM0cqS$e z08!f^4k|F3ON8LiHFm{SU;ms2nVX>wFk^=CYu(Y+!O^XzfISMF7rJ1mJIAa^OsHuP zX$Nr@_O3BX9ht;d8}e5H%K_a?#(SJ2__0FNyz{^qVS~z}Wd5(9n&DS{F zG?`w*Fl}34Z8hM_=Ps-jm?StD@w;@eSOOwjZdhh|J)I)p1GAlHDl>+zYf1mYE(Duy zpt?~^xpZGMUmpa*FLN9BjeK4oFE5gKmWa+vTP(FVZ_;ypi1=o zShJSVtgFnJyY{i$u)rxg<@LU`NeH~gZ)GzHaNOCR7h>SR1Ao8Kb1i6YoI-XdoE|cL z!vX%5xAoj(-*>B$+LxY$B?&P;t_Ar6Pn;K1fYaR>!%`ts%h$E~1m3~`CGb-D?JRmH ztq%qnt9*?Q5&5M~Dmn7fpkSXLdS*yHnp*5UqmVu7Y(JdKDnZYfzE6lt2_UU5bjP6?3YV1YjNjx#R4; z+N?%hXH^xZM$e7BDOxgT)B_w)iuDi$NN^tk5hZTbIGe_1cwH}uwE?b6SG~=~vsJ^0v`q|v9 z^0F$+!SyRmWQ(3$c3~uWX2Rz4(3J!Juj^a2^g-{DHv*o?hpG+Uj+VBVYmOx9=;`O( zfiZWjj~&nmv2b1d4`(;z;ptNXBSsWw8O2j+|0v3E5?C6+#X!EHwDCI9z`hno zb5phaEDZj5%K4~A=r}AaUFo2pqN4cpCFoXEu0R`iGcr~ZN{1Q?(wL^5>p}`1jqjVj z0$+}Ujv!sO(`G3>gJyldtGwLUNHx_rizI4)862E9f4+ijeCGM}7$T`H$Bf78T}!OI z0?^*+?n8N!x4g<`NsWD_tZAg{tX$K$sq?9&l^WutQ+>3w|9&ZHudZ)p#{!-)(G0udSs7Nv(N!l*Txw=okaVr#KFr z;jYD1u0^Geqt7f3;4$?he}$44cJ;?XmIFTa*|D>Gai_ zH|^V!!tPfeYEA8~h4}*O+wx})QbK5yAH($%mS~8&0g~x@1Mq1s;Knz#mJ~1$g)q)m#ZKHL(CMxm5nQo!T?a(cbiA4n+xp}vy^Sg6<3W^S?6$Vl?h{?7l6#xFZi z=PV=Bc+86Et5Vc3)(OJM;bU{x4xqUfr}DP*#r^p`A6@t5scaHFwy@k!7fQd!67TRd zrL*AK8Ro^IXwv!~!x7K0Toc_D+=a0Yda8w$^J$gEV2#HXMpO0Cwm$NGt4OYEDrLNj9^Ap^3h z-egL+MZ0Fex?|;wzR!eG&vGfqA$V)$h&MIG0frAk8k|Dx-Z`G8SHH|m@F(CCdarHb zwhMT)Y~}&M81HH`N|nJvImdP4F5zhzhM?iV8wq^(m|OfnHD-x?-|$047s4MP*CzQC=%+`^q$x*VLD{EVTdQmSJD z4M#gTkL(KrNpz6L0bFIfON_)JFg>%cr%&T*g=R;VT*}$)edSpP9COWh3E7g&H#8Gg zB{a`W4;$+4bu!ab?;G^M#D6UXoq!U-6yI*=nJN4EMd}DDpjMo-C4&L^h#bnqNHLKh zn;b0Wk|61Ea5AJ7+b?d%(pg!|mqeNcn!g{HUni$xDjxJ#;+?%h^9)5-4khQeqB2KMZZPKu3Ke zUP`lvCp8EIBJ}scwF%@noF3wq@YPLjCE2FC#kLb_l{pO-ISJ%8Y;_qfq>^48T(c?D zcCs(CzKw&6O9tPnj?IC4f)*^##^eRMh`W()3MxU%ZE_f?D=2APIE!2Clq~i@xHFZ+ z*g2uFQl$fZJHPBE<7ZtsHywYFwsWsK|18v>|HO##yRtOVf8Wt(5TQWH^z?4?IiP{a))(-52WUO;jo>6ej`ji;_|>$p z_!<0dji148xPd{raqNC8u{7FP?p-`EsSnt()&!_?9hM+8| zxf$sgbzhtSm%`-|;R|)+~ri zYd_tf^iuQY%)@s1P3(<#FGSzNuU*+Ux@YP!Up5XiJG)_a0p56Hul$EX<1SEfWsgg8 z-U{c$`Q@C|b8aK>loG3s&yNlO_L}EGY|XX`>Lt}`Sf2Lb7`wj9zX&rW7kFz-f>1#v zAJHx4*-#qX`m9;Q88K8g(bP%mUlW1ay*yzT3)pWXW{Do0{~R=l5>k5X7FYT2Mx~ z>Uc=oamIb~Yr(!13-kPRmQVG2Zx)riIp=$;NJX0GI!Djvn8EM2;q^74Pigg#IA-1j)?Zx0eYEikVzLU?|{&Pm%3x;*4AM zS0K30uRwec>$70=(4^+fk=W1WhHEhjCVv5jdh?WgU7=Ip{|~iORk9 zMG?hk>TNd&Zqhp$0!Qn9ER+YN_#5m;QmS2Q?KH@EvB{&K64(-Fxm5;}zLQ#~j;vG- z=3RuX9kI2orT`%(tD?fo`0td@7OugrT)`-ZYyxsXF>RgZ`s>CL0WcWQ)qm!o#iHG< zIVF&oZeD9i1k#6ZTwD*(8Xw~431xb&mt0r<@Tnmng{4=^Z8|q9Y5@`fA5t+K zbzFjmf9;E8Vuk^KbLidNTuKEvX2LWBM##eOb4l?d#goui9O>~|m8Kya`x>8H5#^?9 zJz9E}CFd7|m6R&sf#C-`fSQv@9;q{l0gDejm6Bh-?o7kTb^YI*?-tV%h1mT;HbT z3F~X%SZuaLD1mgis0;<74LtMiww+8O8=ap$hx-G`WYK)~t?fp!2z^cfebaRb|0c5} zbHz`IN$?nCpV?pk;4t{fr!8^O9>KYzu2#t}>h zs4R%Lk{K8}8D-<|l!_AxIE)a0 zN}(!3fnk6@co~2MrGFp-4aQx;F>&J0tlknv@1S{dY1hL`KLh6>%xrBKDJu0wz5{;U zI5lp!9kJT*6xzK4Iy~52g{Qxpru1oFw_I@1oxT0C}i4r9j zaCwEatTKmBDdY2f$5a?ki4~>SuGS(PFiB|4CMi9P=*x1w6E?&bJ$vpliBmd^UJ6+u zBx^2X{2%5Bd?8&OUTnRk!0%m>Yt+E9ay8gwkXA0Y@9dnrhs2q~)TEbvKNRt`$nBNC z(ALwY@w`xL!c@q28%l%Z@rUxgi%cNPLMX7|oMP2=5jb|&gM2KK`hZgGs6nubX~%mo z-t~b!&?j;MOwDb4v$zaha+P|9IDAdYzgFC*{P>6$9WQXRd*Ao3kHH2*+uXBtT2hKFqxYLe4$gn?9ud4z^kHv+F4so74R!EIHQIi)7vsxRI{Si{v_vf zLZ)937mAAtjUnv?5K*B0Vh90SgFe}s#6t2K{MKanEHzHnyiWHOIPdE+D>ygq`fGaB zjW%CBk;CCY0sK z-FYMM7L*4PA!1?L#N+6n-ScF4lBFj>_2^sN)x9wno|!39cq$@o-;>_%SbMD<-p2ycpVy%&_%&Ku^R9h8WE{Q>F$@01eiu<&DRu>RvcldI zl5YUPk{F~(N$@#Ph-?>AHYyE&CkL~-f&igIZvT9x#!Fyk5RN9a`EG_`9sx(f8is~R zGg=sdnd2%#8`CqqlqJ=JdQ@nF2X#j1cIijjab=7%Ae);8`vq<=W%G=}P8Y_$RS9i_ zEi4V8a2t3#1`*1h+#Zm{|{c15+LAAx< zi4+ASdsCc*aCq3#w#hjDvWfqa*^nttT<9hYq&q_kLx3Bss>~(ME^lR9k=+asJ?7i9 zIR5p>>IJsKb4|Vy2Dc)|AR}Mqci_!L{kAc*OFyJ-04a}|5dSzE;57Wn5!+M)@GpJO zaF^ZGJP6r82Nj5lmkurZy3g8REnp~)OF*t}<^l;kPd*p2S@(7CxxsW7l=CRk*$BJs z+FvhBuPSVoJh8;EelM|12CF6X{eoRsOX#(%6|MB(=OLBt7=z)<9CYo)mc1;Y$XR*m zV10xrQ(MH_4oRE7u;E)XuhNPQrHN~mm9JC=LXfCe+^aX2$h14gyuJU{Vs{Tzz9RT!V3#NNh~-!nghV#7G#(85)`qyL~7B|n#>6pM!r;be} zDW=x2;OvEmig;r>KrD!}Oy+z@G{8glYrzB5P~H~&gUKfPYu#OxOWL@~}q;X}a z7ocHfh>7QbeIT5#_Cp}uCinaEXxlfqyV3{}Z8fg(gmQ`9seFg&gJ+HhsC!O&;T%2_ zOsIf^5vHe5)kd#*AV_jsB#KFWAwq^%?uMor#&2H0Y@j-a=INsQ>^QSs;+0G0-%AVC z5LHcoFhC5->jlt9Un5gQviBoOF;dVa%puW!QTVBg)C|6ZT{7Yn$ozrQE^ zjneVuV{w8a`dLLlCw>Jzz;D%Eo0qQ8QvkdP(>%1)*Pd>Bm?Y=aSr)$Fg=NM`#b z{wo1lCL2*3-Z3UH{K+SE1qvf)I*o;M^s7suqXM&M zz0}v{$T?_0WdptW{fliNU@KB7pFcWcJ~QY)U-@!Bl#w~O<%WdmfuUN8VSN$uv6ZI^ zD})B?v#6F>%i1rdv$aZ1*CAyzqcFHo+*OnKkG|PNrZ2|VfN3g;r|)qChokp(J@pj3 zrsU$TD4!AfE&}-$B7KwZz(y<_V($s6-;F0X4j+$v;y9%47mVx}bOf!GceRyCbKsiF ze;7mvNRq>#?&sEC0Bso9<}voYrI2a&6TyxEbt*ApjE^Rtpk9g6x7cyoM6uN(tI9lZ z3~-hDseWtC=I!O`DuhLyd;73bAPD*f26r3&z6fx2dvOgny;}qhG693ob|8Z}y{fp)Fbrs2|&sq?e`iZeT@*$CGtS(HYl&m|n&@+d=*<)8c7#v2E z?JB%HZRY1*RxwQ+OL>D55lw{1n2kC6-L4r<`WGWIwHPEqj04t@(f1r(Wzyw%qj3Ga z&>W=hNzkCY7KA{TnDaGr0&Y3)@sT8&mkmBb0%|7(09@)I7Z-ukn>;|r- z9KXR*HNL~sE0pT_oTa>7AYcy52ihh&t|VbHeSUCcC=h}8dfqo_oqZeNAG+x{eSF2a zTd{tw9bI7Y!T^s(Dqc`qP95V>TdxRsqn6r9?osbGE5^!pZ)kY6>sZjwu@yI`T{aS2 ziAzdzPRMvr@(6j1 zU#g5Rx^wMNqYZ8l3&|MNi-ZriTJvURO8sc?<}j|L$rS-M+Jfut zl(#MD_Td&ziHtI*#bh_GY&%fSVx{k`=9v!0y=aELW9*yXLk?NLwvsoh4;)`FT3BCz zQin_~W{S|s_PUBxI0cAAoM4|65J$QHbKeJ%HP|F$+unVTw%B^W$qp+6nOnhetNev85d$$qgjg_+5tcR_Ope*t zDNh@`92tKyV!xZLMkA|PioKV9bMAX##Of8^)|C$HOZlT(eg?GqEQO;I_Jo0h8L?G` zsy{1@fLVFw0<#E{Rx+0Th;njg;t0u4kArnr`DV0-_O8G#nOLF$GqrW4g#{**cQOiD z&9_F*2QdM331}dfnxd}}jk_la7*uJzMx_t=X1#e+POm{4O^%!OuKV|pF=3`C9YH%| zlw~5)?MN4J*Aw4&rZHU#MAQ`!nb`c>2e?17m~ei~AA-yyD&3y!P{EhnHB74$f431~{K)-^uG>C_6)T7kx^;D5 z>_xwgYs;de^A6i}Tg$Lt@+%)hY-13lZ{GkmPSGs0i9$sdQ-e>!LXw1NLsHu;Nl z_V?`IZ*0Q)cQ*NKlKzcTexI`U+gbbxCJcZ4DW53wNm+mX{4buxzdvFAb{79Blm0U` zu>x~6a;*t5dF>6=;mnzvQKVf1>_LytvOM4?qTylUURUDMtpEIsTD+yK_qWFYMY>)NRjGZyu$$GLo1OLd@I`zDr!;Ukto4m7h}(|0 z*9&W(qHhJxE_U^u&@2-Q1wA~!)&U8^6nstG;P69l$mLNilJRXb30VVr$+^xhiPm0_ z20)zK*FJ84(KWh=N1Ft%w$j7gkQ4+k@m!2J(e?@A;)&WKksD zL+Q!XgE-7~P=QMei-T*;@m&4X1e@v#sV2BP>kIbxANmjkkQ0bNtZqG<=irXa3tZ^= zuDN@fUpr*uGRjj5_oSP;9IwEbdbTSoMz2AtFn-Dn=TUSdgNJYUI@YP}rIcthMTams zrw|kHI-XC(pOtZrA4{-UNhae8AG}wexX-FqRzZu2m@BF-YUT~`*n%g(7<`9ffi z{UBu={=migW2CDCORv_Y&EJx!-U{UBxocZ}#y;s3?2AD9e(NJxRpjW|el?WB8F$U4 zGTrD|Ks5~9^xgaq)Y8s6V9*nIjtYHdhk3G7D2`c;%Sc&nmG(Lo301LNWq_4JNA7Q5 zY{6n8L=}*UZVy?)_D*x2I0eZTu90zCM0h-t9HzE9E`_FSRT;f*);c&`~B{VY=hZBg2Y8LppL*d{LZEhf1!2ZNhicW#2kroOF2>YC#i1 z>ymp(`6DycR#EiMe-LsBf1P*5=>^6+fV6ANnobGi?>v5Hk(|Z0MPe|#i4hJBtoXs6 zvTPeG%bG5CWRz`&(RBa;;K+wF8n#}MKMD|}1(v8bK@-4b-vE9^6mN}HFzd&bak}`m z4Jh|SdVWa0{i%#E7!x;UTDS`HvA`bmXp}WkAwKd(S!2ir=iXrggd>nM#2)_eMSKSb zz~k1IEea99z8|v(e1LbXLYEs=hH+~sYvX;S&MCKSPs;?l1DJu3|B4Yxfu6@2lmW_l z!~8Soem{XIinIl2vi!t2b#Kc&9`zb}*mw91%U#G!=9?^w*bS+LK3;ThY|E=>|S zgWQ!o10A=c_?waP+PeQpHEEg~4d@4Z+Uqx9Vd^))f~b%kBNLNMjv)*&-V~T9sW$UC ziF>-!-zI`*J&}F6^;Aezc3g4;BN7OJDI9l|R=Cga+77OK^bDD%_Qh#|go3S-S0*~1 zDeK4Kzq?gLA+F?`;oWQ`R{ZF$9t}%i4{b_DiR^NvgvBdBwKg5$fhl_hXgr6OCRBOr zo#p6P>alR~R;%D6U_UvGdkH26e3xoQKQ?Mw^6;e{-GK`PAnd@#GzW+fq2e(J_!X`N zAl`$5r~L^Ri`2k1b`%6eu;x&0&o@P=7#(tq@UUfFR$Z~232}@$(~rov$!3^{*{L4c z?V7@R;@K1KgPdNnJz5+EesUT24^KUIzrQ^!K001x*dl>1_u{=3{|pph)1W=orw|kb z#C{T?81x?#l9!9I6r8~l-a=?O4`ZzDr~yVUGQ;@wT}V17o?A`~majM#!~toXQ}x8{ zTP}$hrMTF0j^q0$Y+8(5KK`#O9L#<$W$PLfreOa1Y}^?6X`bg{al%HPNcbkbf!4xX zL;SOqaZ2mBSSjjCV%;>;3T+UJ>l_h?0qKWydZ^qKi4h0>eyLpAbuej>PqN%TCLYMy zuSsA9g{b6bIm>bR$&ca1kBQlQ^9BxMc5Z46-F6dXJ9IuvdU(b}C1}B5&wieyH)7*; zB+|8n0WZll7+=VN+_}pyNv#6k`;6&=_ZZ|}k^tqEB~H;XVRU?msvG5+>5{$TPnO&Z zc_h`p|I~M}cODT|52bGt`Cer4IQ0u@lW?3S-+)%pwnm}e;iWg@+5E@Y+?#@ZEZlLT zCE@5g_iH%#ATR~+)ypLFB$?#I98Gk0_gmoDbPx~Afg5@j86;fMM`W^egoGi3K~Rdr zk4h>58eISe&$?2N1bCoII=&s#DmNmwQ}v~O zL5cN<3Hgn%QxQ(5?pc5h^uWhJsAc~0>Ul7UAuN$AJc#5q=$~Dpw4JY;UJiE!`dt_E5wjO0tuOnRVAETuqsy1OCFb48ac%EL#H(ATb=)wSQ^OGlk4_YgBKZ4~)V_MMW^wrw)c9#qz7hR1-d6iIBe*QtE@OOH`Y;E(b5qywHL+aMJ_iA#a= z56yH^t$h*1Cb_|CFaN{z+MigQ?UaxNPA>Sq5(E9guROLDfSea(wq(K09}UMea0=xQ zqlFDsM`gpuHW%&2yT?WkInPTM&;t&2TAx;v77E7%edQgj=e-A`;T8`ydO0x&wddz9Px!BcoPQ|cKMg!}Z;FivY?@eE)=9}?&wY^y>Tb>?&;puiVjT%cP zlqeT7Ug~JH{%|qhov*Th_lUkXzV8o?Qmr~4EAoNm(P$X+n2}HZ#KD%MY_7T(uh!=B znM(XR70jJKPA84u=YCc$$$KzAH^@0wH?POpjMMJ1LrT}Bw|iiaZG}@to_D^XbEKcx ztfk&F@Vm?$Zn%Sv%|zwl5p6p6sY8LA!_HLMv?AltXqK)-xR^2uDOIt(&qZe=X`ZKg z<`PGBr&QCu02dC6n{Od~ec8GV=;Aa@hfd4QxukJHQ^Y>inKk`)-#hNT-HfrzspaAU zk0ofjNpv%UFxAm&zbC7@H!P6`^^d|stw6h>N`cHKuzmLSF)oc!yEsvgXLpse+m{8p zC_I@9frpdI!2&SZ&_Z3D5W^`R-2~D0-l%qOC620vi#}odSmq9_le}b}^m*RRu}4A& zqRj-CP1e=iEglODwsM=@MJI0i2?5aiO3kI~?f`P~M5UZodfE40AuBb}LT!$O=GUxp z9X(YPRiYIT*Y!pd_&Srfi3XZ9*I~S?<68LYbjt6^b_Tb`9;$NGbTAE!5^pfoER>N# zso-Y64Nq88q%}hoN(_5Kj(hp`?pZI>A3g{nI|3)@jcCGZ!2B3;P5C@-w@(alzF z)-SVwV&Q)+5Ml`0beWzFmVcX4zga22nLJ(!s^Zh0NznsEd6i?QRTMx0kQ?`Ow}h1@$Wd6l;q^ zMdL?@&MLNhcm`Kh^@iTNj%V3T6E8qjJ#J*Uis>QwAVL+BqaZa`=t7l}0>oX7;6iMa z+UB`2V?5AYi?o`{tkZC<3rMtrHD^tN(iV0D$kpUM5EOv9 zcV~`F%^UkSqp(B7K+h~O4lji z)Ujq9GOep?#R3lIKpvhsjd6Cz&;t+K(PUW0MRcF+dW{hF#|gBDc$;VFY+~am#!lt!;|xrdRuB@vW>YX6&Qt>}ctH zQH^7+h}VdG^a56x1M7w51kb}@luAI+Z7iF!gsgcrFi4+9fI^9Njb2mu zt8fC_!T^*EarHHM*w*AK3~Ms!WqP^%k99H^L3hLq`({hIvU($-65;gQ<6h9iXNd7c z`g&5G3rgW&Rxl&QwNGy@48p=y=V}4*@#KSn39Eh-3RSwGh{xDMNyuiFfT+Lxay=c8 z3}?-G1I%-89NE|+p`7dxc|Q*GSJ0)w{AOPok;BVU)U39cJXLhoTt&5Vb7Vv(^;Tmn zGwLiCtsbu+!QM!kAm*n<^n)cn_8EZo>+> z;l9zC$I(WVMRz51p&w=!Egg1XRcndsKEDNs>_gank^(KIz_?tdAJy=>5y97a*@{>R z)|$)hUf&IsZB+DmY-ph8F@%#h2#x}t6u=y#Wx(k$BZ1zyz`maref(~OWkZN^ID5D| z3A#MK5wy$O4o9kyrG+I~0_FkFg;HN{pOoVE9uZdfkdr)IzMz_@LdZQKWdK-r zg^vXI!56zvw^%`9{|~q4hm&=-O`pf%N4$lsN%Q#GTd%jf!>L6!-}hBMp8y^*M1$O9 zXf!!*gq7B$cnAaxk#WnJa8&{HcgFr_O(f-nki(>HuLlQYgPF-jbE*scK~BipAU^pj75Rt5FI#Q7EKGNw zZx@F-!?K&?+3RotN)Q3p`ygK0!1o7zdD9;%KJ6gFn#EAs_Y}l+8q9%&2`#aSxS@nx z9;ei8&_PnmPX+2j1h7op8SCvAwdc!9aa80_MTYgxN*jJO#TfReY2VUeYm_cZV$!u7WT-)oXSf31vz= zKBTE{56kliP+&4!csmH4*}YqlWmvFJJXDRP^S5Tl4y}myBYv8?vD!&r=Dcu`{rp*u zDY=?A$$){=t6DypN#@o==NP+NMzhEcWkylgx>`6Vz20D3&ivQeafM&CEZobXDDrW_YN!N=2i|O-&Hp(|g^R=QS}P_Z-j-33kk6SP6V+l{w!{IBTD? zB2<8xiTu!z9m`&!KCyprqZeLvRwx{ zQ!oNg&#O=@A~*F(@)F_fZndN(a6*$^i|xiMi$z%Q&yE{s&Z3pH=h^Vh5c$~ReL^b3 z53;CP4RIfjcWc!S?CWUez9i-vq(G*~`RF*~NU%p_G0|IdoP8VP%wY;fMbo)^MiIdh z+w*U^p7bFn(TaO5?T|u!aKQxL#t?9+AZFoFp?ennL8IRSOri!E>!lMAfP}llNE!HI z>w73k(w5%7_F8lXSUv<-k%c2pxQF0CzLh^^j1j_i z1{jI?%$V^EMjA_+HqZ@Ml$HRZNG4QioLY1T`7WL$VZ9N{fktiZX77$xl* z^ot|PDtgy*z=4Ln>X7~GNJR2Sl&SpU!yQ`;k3Z*}n5YOqG{4az_DaS+ac!Vb-VuXD zhW3IgK!yl)sU&~q$6Q%!hB_{5u_WYft76VOdj)uGo#v;bHhlb9&}j?Ednow6Xp_7Y zmP6($Z&nk7$0PAth9D2V8o%$fA6FT(jD=vr*7T3^bhJwT3BL~|*t25dDY~1}fsPwn-@<+0p?eA#C{{t%hnJE9mi($wAYo+|ZAVgK8}|DR~5P(#aUwGA=LVe^K{D@)Ra5DWw&hTCE)a+mW$ z0+YuI?m0v1h;G3?qFlE&%TO8in*F8+M@LHbu-@v(`HR@hVSg5_NetPz}BTLc7 z&bZ+NXp|g!NmIHsS)J}ULx=uX-(oZ97F>@E@98er2Sbd6XM5jQOfwdGfs zLsIjqHDB+FF#S|*)d}RJKOh1Uu7cb(1aTxi_YSWW?zhWL!5=0o#>9rYEE@BBHoWme zuiVfL$-RC(*}obgJF*v<%SJU63G>M36G4fcLH5{qO~Luuy98r~FOOziYIu z2}%v=#;TDgN_Tk5B&e(u&1v>;*r4G{Brw{n4}i%se6OuI*M)jY;w$uF0CZzqj zdZCo2jx(ku4{pQlw>;3NWsO63QFksAC4DT7nYIiWt43ug=^HAzBRXIXZ4ql%##p`u zV}k6`GXeo9k!qw&YKci%FPI8=nU1Sz;>rk4+g8+}=%%yMH?=$Q_wUI!(uPmQ&`3`4@^eF^XuN(?54pZ9fUvv?3DVSc=f!ivp@ zu-|`=m*Q(Fa03^Hz-K*=_wMiC7MIgGa72Tc*l|8wd*gD0JPPNwH-VLlM45x zdi8n169Yb#hOuCTfT*!Ck98_k7lK6iae=$O*7$e{A|#;z!!b}}ac;i*gs#A8ap_P5 z1s8C&CLcNLU&=Uu?WxXYA=LwVs0+d5qqN8$p-T_q|IRd7&)<~G+? zVN|q8m?_p+714%4p)-ku&9F)jq{6a1F4t{Ti)IlyuM%5!!whno8+!vRZO)c-cYJrd zf3$t@>XUw&Fy6~(Z+J+}&LtO~EMeFHRWwJFbl$)=U#(bT893-ExSL)$+Dx`EzyXfo zrz*KN;DH9BqdCSK+H>_i5;{{?J?e)52C}}X@Q|gH$pXhj-kxy`Rm{ZH#~0*-`xOf+ zt0}0DRPITn}#9cW{LSmA+hxIwzAY;~X6( ze)~vxOFTIg2yU>5aR|)aT7>oyKJ%Kn$SoH}!dwW1zc2P@&d#^;ay|I@xLaTRHGwo)niVSM zD7e-JHSlqxm>vA0y@B|PangV1XoYGwR1Lcr<5e?7`8Rd|65eU} zXHYF|AG9k5O-^l-%8n~DNLk;Xwq`%Y37e7u?-4Gh)wKyP2#AX!zfZIb2XRTCI1G11 zbOEn9FmB_f6ImdfDD>pp_yBvay}RY&3CCuq>`5Gj*5Zj>L%gS73U0QYaW;p8FpDh_ zZG+XS5gPhhQJ}uEW3zj|h$4&b;C^2nj0vM#q750R z&j{OQH%9Ll^1nyKXggIR!)&CPCO|b6ch?;k1&TNqe67XSduTfXkRWDx(c%Gvk378y zJBf(?NrNG7rcsLtp4e7*jMwnA!9KBpPqY>@HDonr0RLT7Cnbtb;I5!wl{BOux4o55 zkGcxCV-^Pf)iJv@2R8W*WsgHi3>J-+Ab*(BhcKx>ftX%-1i$HPvn_-5@(+QtGbAvC zxTP_jQLBUj(@!Q**9Qb?2np&?g4=>YHB+s$kzP zrRv9%JT&z|$r{Ft$0HhPBKi5wMVA}O*B38VkB_yQ z_%;=MTqh_s3#Y+t9|*;&BC^>01nSIyxr!1UIFU!51LAW}-#8H2*!bqJ@^#!f#vc*k zj?@{g$a_4~dA*-BnYRr?K@H8}tysH6hlvbYcFAf$7hpMssV7q0-k11RZt2bF5DSSe zKd^_C5{+pz^%!jsa*?C9Y zedlL-vRH1)Z;#yuqRHPPYlzp@d3;i;Fo-{5f76&Mh<|9wF(z(Zw?)2S%^Z(JK9qK{ z&F&nl^Cr%gUmolQ`RpnI^m!hLsH{H1j;9HZ@eC71xn(4iXOcdJvvxAvuGjx!=FtJf|lMAGn2u zdkC)=ad$UrRUFG*vX=Pe8mbkBPS$(R%Bp+WjB7(m5yp?aDmwaitaugDj+K5>tCTns z5|J5LatOl?U3yiVyN4@&RW65XywN3+YYw7`>gA^_JL{oq>wtZ(Y#&T39jAD8rPceX z{hgn4I;k)7}{CYajOj*MOCNi3Xl0SIubBo~6$13W~{Z;&>Fgtn?2+}cqsDV8^~qn~}5 zyLcfJ>!fX@tHStQu$vGav^oKu`!tI;^~PI4al=Jnt+cGIl2IyuxV9-;90Tw^Gt5a1 zbl~)qfjm_fqN^g1g)yCQo;9O0fm~Y*xXz@&QH&VH(n9Gerd3~aN2jCiRT}iq#o1cw zV@btmM^By~-|sONNxi>`%BiuEyhDsn8kw}8DAhK9T@}WtAF#R@Uv0~ARXvdDZ?4~a zvW}(e9byNK5 z+q4{JG>s?TFdNopn7wil{vQ1W_A)@+jp>PkJVZBi%ckcyXLxVlv;mpdHs2jaN%gA_ z$ZIKgotClHO(TmKl3uRJp~tMIM)-C9wsFT-=Uj@~!AsXSfSIY+zMiNikI%$imbF`a z?NC5;@O9RPPDAc?P3~7db)+tg|H>SHN$bC|>Fg~3{R}1=J-pf|7UxK{qF^` z|M$%C*AxD$J;VI3nd48+`0wnQfB56ep7}q#rpo>&0sRMy{N1AcpID?(Q>%J)2$6P& zt-E=OOfC`1s=xC(4i(gJz;=DWA-2W)i$o;8Qxg(W#3aZicE`SLO`c8AN`w!)OoM?{ zY@c}_KKwe1b%T71cVnayRzP;sPz)1Bb8CM&zHOG{`(9edl^RA!aj~)ST4ne6slML4 z?wNn%%z}-LzQM4pcVhFs0IsU)rOOh_lEdY^X-#$5vPApy^JieK__$IWc`mUAEvfdG$I=1kliGqRhR#07}7n`GjCI52H(=A0WLX_{*6{SC|gfwItW5imzr+ z+n5nzVKsRG<<+uHXeTai+P(;?wNZS~!FU&`;8GQZDIo%Qpq6uIHZ z*poP)%_R&&KPzKa1`KK!c0F9N z!JmDRd&B9+BLOiD+W}4Ay68qgy1oG?B0_)F-}=ynH3FBE&+n$u&ytU?62>@6ZhH!DVx&#@Cdpq65dvjmZU9Ax{CGw`&5x;}3nXaQbLK z-!8SXvDU^-A6rur9pxbiC!UjN8O7F9ak6qwRf8pn;W%#(l1IlXlLHYi>;VvZ6{eku zrHEh&U-yW^A51R2NeMmq#$ipWq@OzM1Z&igRFTEw?H(-Hmkk!~!%rsGQDr^t-OTM6 zq>N@*LmpF+R_<}6%DumSbN;c!N{EUP)@BzirYLmhph;SOi2RHCSWnJ)xfPU-pc2s{ zQt%$AqbA2i5xL~&7L{^YPB#cpjYS@M0!YI*AgCCF`sOs~P{k{PM!dJdYxm#;0P~)p zJ*sV{lFMB&fNqj+-cCWQiIpJW#iTj-dCi$zgR5s%54*6-WHlkW|!)x;>O5+9U!8lp||Q`6;o{oKX{wEg6(y4BNb95D@*D&9A+3%wPD1ak-n@8FMqlCdjMV-!r7zHL~8tsD- zCI1%GK@K(@@Fod$OM@O;;E~XGjtiCBh{HyJc7ey1D|S_?!mz=-q$`E7a;{^x1-Y=L zgZDKQ>U;d$z4RdjijK4o9z@*RRg!(u;jEH`v0j{*;#x%EpOGvq80@e}i@dO4&LbM&g4R}(=3c33b#VD3xk~2rWL8Zs43i!05<=Wr@ zC*j5^;#DsRvm^XqIaX01&V3ZjU6h-oq{h;I;!K>P0g{XXQZ9PE`*fgf$1@k@1XSW` zd)bm{PoaND6Ove1pz0r`0eLHXbf?g~qKG?$pUL+8Rt4x#Oq4_tM_}XXZP8@VOx82X z##{BQ>V%Rv;g>RsqV>Gp^AZQS&~>AyKtuq1TR3A`?tsI9(SY2Ja?D~UfD24u_CKOD zHK?3BS_-9cGfEN&sPc4N)BccdPmKNdt#Y{305V$rA1gh(yXiW?z@J>{;Bp1_$aP+l$ERYPJh#S zuK~3%5}OL_oLfcFEw*;0vVcr&yqh1pn3hxi6n}2vdCy$mc7#29=`dlRpfoU}MBt83 zW5KfIk@{MJ&|^gtR3%|I4W|W4IhA-+-w<5@hcp!Zz*$w^PJ{itIo+I%ebpNVGSJg4ti)@%`m1(h zYYm#rjHSo(m(~ySeynQOSkzPufVI1K2(mUG10?qTE>KS!6Ui$L5D?iuWhoGkKf3IC zI-Nnn><78eX~@x|{3%tlT@(A`G#R-9p>%R+Ea-Qac<71Z9+fN=Gc<^=ghfuJt&oy) zP0xLVDu|Lu1XZl4Rt&*n4P<4V`}?|aso!NPfayN`H!7nspn#y3gUX1s7H6bv*mHF~ zkRrE(fW8H{Ntz8M%80OcyjBPLX7y8w=5tNih=W<4xeQn%i5pyhdFmH|G`Jf6U-)bj z&JS=U5FbS;IM}?WH+zT%UOy=Ww-HM=TFZYcJA&*ecM&zKME>^lu{I);KapSbG4L#2 zwbs{o(pAKmmPK)X{4MT-txTrjns zEBy8Gt}SpWI&!hblY+%_iCQ05dl}MuvYL!;n57V!H2m4}{I$k}Otb`SAdy4`7^=cy z!wxk=M|&}Cv@JkUl2&oa!+$OkRFMf){E)m=p74j9K=eQ}cV8sJX3+z)2TDzSG5{AC zQ(fgiM5@VeXkP_+XQ6;REHJ)NlPhB{i|Z+!>Cxe97Hm{KaO<$$=^kGYzi+ZcP>keI zg!|WkZ@OXvmId4qZR=xOlu##;QY08ch{U$}_>LU>z$pAYG4VawI^7UkLjA;0J8Wvk z9<^fGA@0?DR5v`{*AK^{q^WMKSm|hj0;n{%(&E&o{<~9`oNjKP$~T+Y)=(opjA3A? zG84wv@bdVI{^Mge$D7&qh9Gc|sNnSbOjI-m4X7T1tKPSXq1*tfp__Tu-(PkRz>r^J zj2d@O={Y_JL1AfzYwbR<0|OAEZH4gku6a+}Og=b%W>0a96a;6N|L{ymu95xHGMEJ1{n>_^U zha3|{Y;X7jzw;A-vejtvU(nj$UJw)$!yg;B+5ha;{x`Jtw;l9{L&W}fzV82l*8c3u z{NoV);R^jpUw=Z}p8>i*Dej*;CjU7z_Yb+ziT)?jW5s7=|M#j76WiB|zjAS3*3?(P z@GnsNU)Ga3{*~>-@xQd48a38!*V+)l98-(!t0~b~Y2uJXSk%bK;7(>aX^_y!rym5{ zsfGm`W&~%l=DdII`E&JSSQhGBtc*$+^z7Z3v1er4g%EvRpv>m3*Bp4E{^4YPQ@{VR zf7+g`X7?>6LIn5F%am(kJoiz+a`y*yi@SA(IXL!YV(SdQW|r^XL;vAM{+Jqo6Az2CK)}}_awAOp=KSp z)v5v+k=ceGLI%*;&xqNlL-;E_UHa50RG-$jv0}#c?c6i$&z_UQIYJqn;gJN{`Vx!M zOzdhUCLUZfx8WbRQu!o0=xDC{Z6#@C&*IQdmRC1k{J#^k3cfK!$p~{BP>p32W6H*h%1_1XGLf);yeZE0IO1R6w z(#S4$j*>D2i@z1etOWR*oS}RTVqj}xNVt8!3m^+6D3da#bH`8rq7YOxX7nY^WvdRG zmI;%mR<7u}wq)ar$Y&!E3>Qbb{fQk6p<`(i#Iigg7=^&hnO=)3RxMPq-A8uOlJ7j8 z5QQGl_(CD63$@s{{89*VCUxy4V@`T2pDFj-Y@2GR%d5fDtk%r>Jt==19 z!QDe~SCiHD7}jR{lk7c@!t#XWJfc5Db@o)9!uo{739C(xlGt7r8iFS_SEjWh0+XNQ zr^y|y^crPuo!P7px#tw+dcgh;NQyWGQoOWKNIaxwESR-*txP_Uf`$dWxF?zR+4t>- zj+R}P*B&q-5xMhZJ};52NF}r#Tla?#ZNWEfxC>|KP?e61ZCNIB!ByWfRxB6@_q@Py z3b*t;;0Jp4D-lWK9lmi#rfIlhMKL@5fXhv=Ehi)d28jGOLuPun{vZLQl|0wD0Nq_J zPKT>}Dl)~RmtY$uKE>#k5ZR^vNbv&iK86I0O8+MT;9=R3c_bR;4`LBJ-o=T%BixTM zU9j%FxF2nroh$8dWD@R&j+PLUSI&{)^VkswX2u^dXLIHl*_j^PH^k}kwo5@t^JUgx zHFLvG@tmDn{S(H+hN9HQNm)t4S!3bEbzWKQeRgOwrBbSi9vH@~Wzb}Sk|59yg)KCV(6Sy3S`Ma>$|NItQx;v7X{Apy z3|e9DJYHF6p0tH2v;n8p(7Fe$SY+MWI^Ko4Y5@bPbC*)~^M(Meg%-r(z%l9ol=~23 z2=(0Al;DNr!-ubRz40d}-NACSk9T0YFb>?wE0v$eSug*!y0uKT~n5Y?89v@EUs$TKR(td_!0_Tqc1FS3kWxIXckw~*aPu=+d4aa z`mIKRWU02!vU=Cc}?>n>`-Dd%w8! zZH3FIsyO|P2{Kg^#D7jH21yY+<_#$WY{V}rI*^)~NNyhuO^prcTCji+0;Jrn1i%sI zIHvfJ2p>7y|%<*wNDU^t^)>g`9v93ZmaDzH@CMPmQM^s)(GOLoyyb>D_)_ zGCc4EPTIk9?U#d_d`7#`cm3;sriIV`M5253>PDn$FKE}#HxnR>Sz-E^7 zG)fgs2hfRd*H2F(t*c2lHt*C>g5OZI8~=#swhbPP6F5A76^t&1=}pgvNTM z%ki!*xy7rn&yL!+K2a4~pXJiL%69myas zYGja!*w{bvwatQ-gD^*?P!no;)`m!0BcM>K1-F@`L|8+X^c&$b=r*W+O-T6J9mii# zks&@2@)Mw(fY;f7(WN{XarP;GPe&u|SUu`&W_){a!Iol$CW^F5!yVy0 z8YJ~xj1uPF28wHT3D5CH=R(`RY8Ccc&;%o({z;%VE-SQTPS7kIdJ;4ZnWUf?tMx1$ zuvi_2Foca}i0z?v-w(%fD~C&Oe{{hfk=Kv9r|81Hm&Nc4w4bP8d0~ly+sCKGGK7&* zT4CKPjgk2rL6o6IG;TuPn)-w1YlP$hWfTt8tKLTsRv+PWp${_zF_hzkJ;aEp#U`tIf?#utH^2|kM@>BP;mziv8Y3x5xLcPUF`=EO9e2wCfs%;yc4ruc zycIR?xulcX>h1=+3`6Xk|4W_`UZ9mbvPd93_m@(-{}x_NDmR7_u*k1TQ~fAwFtO<6 z$V5R+Ch+r4cpX8KVr4LS>e)?0<||hzA13hP9k>o3B^*X91ZmNL9i-$aY8K+Tb+SPz zldboxrRT?0GTuVp-NhY{0ABg!k8VEkKSl-?*kN;g-qyOBxh z30JW7J8?%^BQJatxy&7-_HHhp{Cg81oS~8|`^f7s5QGLx%l}j~H6)jDp$mhd|c`0mZ4D;aKQvkJZ2J z&W(Smw@+YCI{2&qD&zkWmVcQpEFAv?z5Z&8{JZ(_uVwsSGW9>3E*$@drVGdaFZ=&k zZt~X?{{8R%Bjf+n?te)7Kc)DeGX78L{*R>p--W(7{*1W&yV1h&cc|@uGFlQfG@`#& zc!Iqh>E+_X7x^LnI`}1npzrEk0lZ|jB=}%Z4^LCaVhJN+>EXV!`Q@|B%_A3Xv%|r_ zr>_s5l$~&PS(d5*e9loqt7*tT2`E>L?cU-wPw90@)FmqV#1faEXO%R~+iirp(x1wt zfNV_P)H7Noy6Y`aUP-TKNiSCWZhpM)rz7yfzY&r@-O51FQ4*4_3UO`VY~2#xY+Ub_ zT!sU8AA@DrnmwG-Uq9HeU2&g7$`!GYrSdFgiK%EsiOs@z*4Xqb?S^Z#%2Dy8q~DW5 z8BW33URHCL+~dHz9_Xi!Ij~LST2t*-vpFcc+sW@YeTzEKFarM(=hN#E_>wG~*<{}87Y2RYC#>9xFzG94tkH(NAtN+?u zt6Q7hpZ(e?5gMZmQOq8$ua&9M;CVaR7Hq?VkF}7gfJy)CUX<9E{b+lTQ=%ga%~jMm zsxx#FF3=)n@&Ve=t`DLjU$9A0Q1Kl~FLkx+?fL!H7fb@+b3TC3W2aw#h6qSbpQi^6 z?apsG6_=^ogh$`rJod%^YrQXtyAx$E!qkAWU_*fdI!TT=X`G9wn)uz@{euUxnNNmT zxlG0@UALWgTndVTsRgm<6*lZ$k{7axo{g)=5I^rRwwUx+tLlq zdX~#+f{`5qV|BpABrwkO$duq@BQt3)FWX$L%YyZ7#~sf&XA`bCGAf%dC-8_mG>x$b zTNf`p{8@Vh(60h`SBmy2bXLj^2}qH&Ge?Ykn2j{jU277z?{SD>=NMrH7vjR@rLe=3 z8syf;Sxumqr&;Ubb*I6wweWuF)2E97Vs?@8!9!mb*xDoQJnN4GS1XkTvmhr;@ZhY!|2!f(_Jw?(*|Yvk&zZ^Y@U z%TPsSZ|xfx)?~w&3Re>|s}6CAB%6etRik%zWph1X5Mw?R-6s%sb~l)J`!(G-FYW=J zAD?Fj`dd}_nEOyBk$4OliVxiw&x8<3HIiL^_SssyK`?wa@T!eUlzEdN(CG6l>Wf>z`zIx1zfL$N27Ig@d5^Ba?h~FbE;(C>!IGJ_eaO`lLZd= z2Hlg{c%CT37`Fd+LmQDs*$B1i(yPoM!b;67zsM&>$sj1X5N?H~GwD*XihOa29iqex zI=E*q2bIa>2hJ=@M(|0itac`vAbzv);HMH9C$^khvtYGl+!EJQ`YTap&=9dKSqYMf z-;tjBh7UOC*s$9e=)XJ+;(Ce?hMgat1O-B@+Fh|z=+pxa_q@J8l*uR?GqD4mR`XWm z543wfhi4N8u+JLZkJQIqjnxy9@zD;xv2&>+>2RP{)EV_caiLDc81)Kog+flD%b1~= z>;Sby(E3?5`R+JTi_mSuQ>K?qyq@dyL&tlC#RSoSUF^1@Rp; zRo;I@g96rLBTH*ner5(TMFH_m{eZsLvy$FdNHI{@7btCyf=ALl!v_u#f$bJgg`6#& z=-Er4yax4B!36Z{VbiuV4n@udQUXlkI}z|=3oq23avM3|Qjaa{0|ISgnUB{v+}0OY zO&(QtFde`ED`t1K;kmjKXaC-N2M~4xIRQAf(#tjSvaL)IGVq@}{ z9R<69vT>x7=`CX!ak8*XoDSdEfD8o<-yHi%Vk<+xKKFbHv2Sgv@5k8f;`=7*9i#%D zTKDQKsI^arLB$)l^oJak#8r=ni!JeEHdSPCMUmc6%>pNV=Qb~xXN)H;DLwVxx@3EU12C$)T&nfxh!QvzfJ8pIr&3egYOfIu2& zRO`ztvEw~}p=CKj^|LDYnOnD-)pasHBa^K6jfY?Q;iJ{kEC1>(nrLjkqHoUTJ6)8{ z>ak_e+S&u()uKfW;pl!%U*r|_@GgmmWO14%hm`r9^b7axehs-%d$Hhm?kq#HabWCRk$4U)Ic z>=q3{x}bJ7c(ZN5e}Zout&1B;)gTt(Bz9WmJSI!@)#X|h7)&?6`q#0sLvH_EdPQ6b ze61(7t(Wb}nbg1+y(k_uNsC*jKi;c-jBZqn5wYs?&^Y5^auMlSH_3*v8?Wgu`VN{Q z2I=4#st7>?>^4F_v@Qe)I7<5ZHZymV+JO8dBLd6y%b|a}Zlw_4S*=&dmey1nB4Hb5 z(@e*N>L`B2cG}p}nm^4||K!#?m0rl#qqgXjEsOi7;0h~~%}3v_nT=yEs=_)Ebb5{* zlhstK`XQJ^`y2|4T{zk9TT>gO$#&iBK1X0-H*aTRAX#xyFQ^xSQHUBQqU2Pp5OnOWQ5e3Y097fYfk2OKFzDz4y_N141it~^1oDV8B_bqv#pBI%@=hz|6 zji;E_w3z{c1xF`-NN=F=w#xE;E;%f~^y%bHC#U)6uaFMeZH+)Adz|C68;zW_)<%|d ziAOj$KM^{nXqhZN%G}hjShi1t(V?aAGEQBBeC4XnRNJ_A)#S_g7xMh?#x)cpDvHF zj=xt&{cor2|3aSsIo^}6S)n?UxhMU=^J|wmFI!h z|b~0xuA^{k?Yjgak&rAKkP;WHSHSWM}@joJ!WjiZPw>@J>H{N`MN!e>&-=rgrR24 zSI)gfs71x7H6ygCOrMFETEYY{Vo?Q#E>Vd~kd?EbQz&v9YK9p5my*xrqg@uO*oHKg z`bB*4Qwk3J5gYD<9_E;ZIbC1N5Km>wOm2iY=GyBlCQs)`=nS}%CA@PmxaUddHCy+_ zg&MKSc_^C#O7lj}ErHV=C#jq(#*!wMmCx7R<=!v&&(%G#HdTnc;ph3}RNBkIUMfOK zzOygc$KHf=Q>zoBU`k8x(mhi@ONYK*%$=2X!ycNbrjj%I<|wjTsj8Z*>mz;c46mZK z+4uEwSSKJU2J&8|9#T?d5PDLwrDzD~iOL8=d!|~(yR~;+b zyDCXmv&_-hSJsX0TN0Z{m{g^nVRiEZ3BmiwuFS{pj^H+O%%*diM(GsiPl<=qb^=th z1R_f~WQ9_njt+NwLx>60?)8ah3#6Dm;TcBnPFzYKRma86f+TZBOPQ;7-JN&Vq4lR~ z2%!Dr{MpTko!F-zrP%LD+zpmb%xB1kNIo0A0!UW75OYdd-m&|Jh;AhuS!p%iX4Jd% z>90R|R4$IEloh_N^j?{8)@r+~FIutJPqFkTvIR<#2=0rZZkHN8G&$*BO~XG)L$Lx1 z{d3f0tX6|baN4RZ+Zq8x9*P8m#_xt>0w z8oAXqw{t99x4w*(bw(cKyPcfdbP^0C(O^D)J#*X2K;OuaG^*Di&Yz|m%#8RPE4m^n zY&7+*lMHBH9?o(`{Wx<(I4&>4f`8WSWESKB(MKbUbD>ceGP?yPL4;o5WwAc~PPb>$ z(X`O;$RX0uGJqf$ew>cR#o%*;#%rS_bsQGM&wQU=_|(V0I9lEEcbz1TPh4M@*pJyM zLGW_P;10k;ohm-#n~49VdrI=m@!<10Kwnj-CHA`^@mDZu8ymgZU(lhu3i-Tk~I$i~=qivv)_m`(X z*B2&caoT8zm9LR&FCDdNE+A`c&6>`*)o_=k2N|^Sy~1b6jfaPFfncWxEMM&zUvO+V zN5Kwiths=_Tfo{vkx>lWbde!cGICmYF4D?2d5CD|ko?pA9SA64pn=)$zjr*nAW-sh zb$1TR`Asa2f6;fM%-K{KvZmSqK2OPcwOzeD>s+-{)69gXIkMw%{&R+W&xM z70OGc&6nxu2H%-q>Oj9Br6FdjYHD@AwA%bSV~ZZwkth;&ft8GPFBTY#H3sX5Gx;1? zF2e{AK?FmL%!utZXD?h)8EnZ`4 zBvDf;fZ2VPZNDS9#GI+?C#y7@q;sb}xWxFyxP8(T?P7}L^2+*z+9Y>B9WVyBGWfgt z%}hOj`6Uovre>(5KA|`#FnO}SFg~R9%vk%0lprE1Z&F_!Qyz;dAyS z{r)d(YQWUZgJ!fDd27?@hdZ6?5Y@O-2n#5Q`G9B{fm`w6?WiSI+00?f$-7O$$*)J| zXT6Lrk_Ow}N#(Y9pZth*`}FY3x1y?XE1yKyLMmXO?1PKr#^-NsU*HGXyq~kmnBP-iAisiU zHYA*UjDA!*_VEYlX{M5xPAE7N5e5#_T!C^q)D6UPJk!8xCk>J&ea|-bi#lHVhHi6m zhhdL+Ohw(q8q+5~kyMQ5-(8lmRH8^+WIP=XKEL;{!2`uy3OzFW`?tg8OCKc(KGlz{ z$mUHW4t-XQcO{AL#%PVbbh+f*uT4#KV}!<&p+@k-n=TH*f(mq7Z|QcL6#e1`VCMS> z0A{U5LEC2U=$|e?67n+yXCL|Q`sMcZOBp=U;_FcE3fxlH2cMD{A zR@yA;xqtVb#EkGQvZqi=ngF^8)&(G@m?nQv$V-n16u8;>vCuLFvP>Z8|8C0Fh$UYw zexOAHT{jRVF3I!F4X6OGROFvVNhagpj@)>*O@1Yv=4O5N1m;cml zxi3q0d^*a{ym#Gy>r1XNnUp>W*?V^&!TseRz9@qv;hcM)0SGDg45j&;)gEar5y>8rlA4Vy4FWQQ;|?Pu4y0aTmm!r~GCJu6WH z#da*N0h}1O`Jr?Z6hqT+wgeZwq8!G1k{>%X?##4VFwM_iJ$fKHq492^?C>6zL*2~6 z8Ai!qk2w&tc`*@t_vPYYP8!R`>^n|K0oZzfpK5=1SC>2(sAbJKCKIOtsl2v5Oy2EdZY@Wdu96c5{Fn zfyd~(P>LlrH-PI62l~;YsjNKBQapoQXMAQsbwhQo>vnsnO9Nb$hTfrrp>6aG!^W9j zr|XA`s`q}dynVyknPS_nuFSqjMB$ZNR$XEjtipkP4XnY`OIh5;?$L%z)z+Zxgi9TJ z(f8^z2wyaMn~=R4a}V`M7pP_W-}ND@8uyVTK)6}yZmB)MD17tmn;RS2zkrqiQzJpd z{IFi2$zxhd3n;V=02_Rz7lIKArElHx^_lAV-{2lPETOF**?D=cOP0)QQ~f=wVOdN|V9|MQ(ysfFhGEJN;LvQ-AxmFmqph?@~C1(0+gb(>h=dO6AcnfLdWLa;=J_a;jhW1u;bFC(ARzX^p%g zUj&s;WT9`-sl?W8UZSfXSC~f}BXXZCR`Z_)AS=J*bb}=e{P64`?joxK;dk)w9)^|~ zZ|>;q_=NP}bm(Tt|K{JIeU!U(9>Ur5?p9W6ogJKqqI zLXWyWyn!d1J_VH66grSjQ7D%*gXYdba?$P?-d5SOMi!3L*w|eCYP^Le616u=CPQH$ zKV#(A`b=j6p352D4CiCOs5#C8NOqY_MF2=K;>@XSxu;`$+~JJVo$KXIUhzJh%*t>w zhZI=FJ{Pd|^lOX#6PCVbX%DY`-aDWOER`So16h+s>{#(g6|WARlUStV{$citUceKQ z87^y0DY6}U3ZE_BSxg6qkD~Bfno%4c;CXL*>fZHDxo)Ci?byXrY?-(W<84`imAD>Mv%6dLPi6)K4h*a&ssX_=PR<*gGE?z`JD`p{WKDK_#{=<&_y=jbb^DiQs*NnAQu!#(G5%Y^zfSxQu5)y~0 zmXg#St8mihP(;(7u$x%yg-f8E>YK>=Yi|hw2zZZRF%Nt+vT5}oOWL)Xsz|Q>jyDp& z5re@ZP#jLeB}mW6Yq4TkWyN+TpcHicGvf%(!c$f++>ZuV@!ROL!d!E4>TiqrsiLtn z+K*x6ExZQsF^o|O#Zzcz^2}V@lY?6_;FoXpAeJ)#`HQqPDPY;3Ssbn}JOsb!uLRaR z(!8*sim1%CH+$k>EG&jHjPn^hSMBea7iRnhhgFgouN9_BMnN1}Sp$Uib;3) zX`jR9mlYmC3zp8`M74$1Vh?MOTa1(tDLFlKV9FjaAZTcTqx?nfX#*?f!Rv-<`7*M= zGF!o{(ls9Ss-lWSSB~d#A)u2Je7!J$xw^n{iM_B@#Av!CyG0?ffz z(K7Qr5x-Ui`x`(e5%eSNvDM&;9PwlmRAxb!^)r1!h_OewQ;PPun){K!5CVV{3(+ZV zGD?YizzXFNUX6>G_SNRMMR<;nkY`eA61GJe=kbW@9Sj-q$T+4VP;nR2$6$ih%Katw z#-C!we|XJh8{szqvdmXP_?}?j&Q0SQY86XMh6FZkS1=yG->!sUrJT^O+rOFC3oId& z9DG8ImKVD{K(Ctj5gCbGq(Pz9j;-B_MH^}Hd)roIo2@6)Vn-lZ>}<5a-G5ge~8GEdFWP8I0BjC^1k_jGUn9Ie4P6JB#rfu$_h4VJ5x&=P~g ze_JlJ)8h(v%YMl(CXjnBD-1-JGqGItH~9fLnX2ZtGgcY41)`Ksr%UWq%5&Dcz=<`}oCe4&%nt0w~TH6U8&S6kN z`DJbyM)5$Ar!b6A%Ssr*>p6x2V8kp9VK|D_80Cno+M{$l*Q zL*swruOFCW_Up|xqoO`RW%g<^iNS{s!4;Lbmcq4(iI!D`+Cz!^zn9e53C|k-TIw%9 zHV|aG7y@ETl)@oFeu(6^&Col#B6nkC8Kq_qmB%~9ynV~aPMA1zX{y@N5;}`#xV|Ig zYuP|Pw0e-e($=njiZD)q)Tz`o)=N3avc^)nzA~thX>Vd{ir{|S1PqpIK#%4aO=+*e zm2ql+-2>{j8aON3GaeM=*G+JQk8Jb9XtOJZfY?Ce=~bBGW=;Qz9{h<`@J%@-6CmJY zLBo=QECV*&W%J9s7FLz?0TB7YOt#|Aa{Loe%?3t&7CGSW)5DkVovx%wO(2$MbU`fD zA*xe{+f#k9YCLn?7>@?IE@NEjNTnpH4ZXv2Hkr$h2zmBV(WR>)WNjWHnzV(eo`n=6$NSmv1#2mAY$GDYGTU0(v; zAw2-HJDY~r&VkaE_ykxMBBA61gzTEUk%O4%QT`}ztvNS(7GZ{DYY4slxP#Z>$T5D* zwMXV0dx0u~b{SmQ;d|o?&~%*!uHM^dgJ!DFHf{2(H|uEVMJj65OO744)-D34cS-Hd zJ1go-Y1Wtz^9+>|J1p-}z)}m^HZ5OG3YPCsg9`YV6@{XS?TNr>3hun>cY8-CUKU>X zgj#&%(48hEbDxGb>oF5geuce@hFv3=1yI8>bPVZJw&D0w@*#%yDmSe=JCO;|-Rlj` zVX2Ea(>Pk@=q?rXi7DZEVAk4QVj}WK)GB?H7&$i+|6a6hto_i6@42|t9$NI@WiC<* zc}DP8ce+40*7k2vE;9*i6JN63V$%1XlMclzz_gCzx30&dDjNDGAOc!WyYU3t*NU#F z24ZasTC}DB15dU?2#7Fq_4h+%d(h@v80h8~)#2`oMMRfw!XpcU0!jOEd$m7ZSFYRt z_A}}-H2(Un?kQur%7zYIeLM5^{4T^sH94D%<%sAwd}kj|iCoGH0e!MOfX)T>XRE!y zqNUzsrb0ed@LTB-UIM*6&XGNZCDD@IF^38cUf$>(jsD1p*wF@TcM9Z9*AYk5A|wUC zT`&zCY>X)Cgc#9z+AE=@5uL*>Nb0M4T}AUPA!dU64f~lm7Q3mgrDLTj?fA|pQcyTP z)!TNL-)RU<96bSsodgSk?+RmxM^-Kpx0y(PHH*{%JDie0MP)(b{pdlQ9-(mz&NSi{ z`8daF9rqowD*pGy!>osvS7_LBb|VcF3OHO|vJmo~`1iy&^BBa~rdr{q)|$e&?;=`H zP^q@$aQ4)nV+Jk$Fpr0ib1Sk0yxQ7#%=e&UF7=NAzjNFifWU*12-|ty)?R|H-mdtx zJ{?&|h;u}K@mYK=@pbaH`?wiHqGpWYG2VXNJIc24`FOqX$>s@3**;E<>p@p$ z zQ0DjL>t?SJOjeXZ5Vv9?K9PDsX$TdJ!B!!|KnR6iO8h=aKixP9yYgt-8L4_eD?f_^F(eJ_F*4ghVu`Q4jjPks zqtU<=UGX;Hv`ER~s-Q{_fiYta3ozn8x5F^NH?=Au%lgSW;lj}JK8_I5@AZo0fu?={ z+W?EIP49_i5%*~H>Q*N~GQ*~Y)i71uXAedFjtgF(jyrmltl4{uq&P?=FmWa++OQ?N zia9FougN|_cZpIvH0{w?JPAU*rhx^tDY@a=%Th-hi}hksW@%TZyjS;=-n7MbVu&X#tXp$k#f&HyV&!D^BBg9%ItXd?iAX* z2mpulAn2Bi;$QZk2chZZj3jK03>th*cfPKo#lAnM9Av7rG`iS+z3=vPhOH_R?T9BV zn1_ZPSS^j`p$gaIY#T^lM-}(oEfCpnhSeIZw^?% zd*B7oja>h-NPt%yyZeczP=Z}1Y-I}Jp)|DL!WCou4i?gN@||ZDK9#4(jfTVmE_MM* zpISd_i+ztFTwagowrt8l#XZWXfHv7d5eN+gDM1jn;_3qiv3Z^+AxeoR__hdIZ%;6F&>Jals0u_^(~y+SK+IB zPZt*jW78y^k|DCD2LxqnS1Rjqk&L1Mx*BCctIhsSf6CAKf}NJ?|Jj}aO42*1vos`=XT_qOB?6PhM&5BUvT)#(1)4?Uv zBO2kwyo4lB(&35oal!T0awW%G1^7qkBl^mb|G=IS61g`Bn|(%AnD&WI>P@qY%CHs; zskPrKL<*oaZT=)aq{sDm+5BXrA=8V=b7^9}9W&>5l0POEA|J2yC7Q?RI+G}^z@)85 ze9Y`4&z6MRuF8s8RC?9a-H;R)Qrrx~B~IUMy}+u2OzglXwfGa-kOPXEUkT)w9Imjt zZiKr%E;mV9DM~6UCcHL^OF=0OmS;$!5j9|Az_{7?4Y0SN-P7R{cwvq>W8AU-(!{fx z19@EV+?RCAsv#b=CfAv`Qee|x%UYc&#mC=#s531ZlTqJf6Awk4VbB$3v@6=BgSsLf8n+>mg630Mm!A+~^wxd`0L>)-IQ71w)SMP7^sFB-dnBbv= zmj&V=Gh`3>p}INcV97e!Tb-Zu(QRn|8lTVf7d-x52K|Go|L#!u&pn7t41d==|D@`_ z+Y|mj%b-jQe;RfFDTDqYef|m6f2yPZBMf{Lw~GFq zfDduY*k$17T>@+5MqIk|9fz<<{HLncD&ZLj?W*_mXgzJk*B4SwkIyHzJ}ik^TGJ+B zeN0b^qlX<^QX7Hik#wbv0)0Ax%&mmBCBCn^K2P$6YyecZl&!Bz){FKJRXG-;uhoE$ zU6l}XxkhQ5w9oA{bvb65)2cM(206Ry1`WLMG>EQur`)}5JZUqo({-0h7G=TA4!;i- zVV8oMP3MqVe9k+FRf(Q!ddRtiU~;}xi@FA$XteqQrtfn<97Z3VZ?X?`eeX3(g>kvy zSFM#zpoSNOP%XKl~oT2&b-mC3aZ+7Sm?!T z>z6m?@`&pw*X7<6Eps^cl2VS&-hVLEaGc{yHHG$eNB5bd?B=h_>o#V2|EANv?Ja#= zhnqsnb4ah|8(5PYePJpXK0 zN!jkv<-9TvzXYErMu;zQ#G*a<;K12>GNU(kK9-g@G_)2=?QK?LLspthPHlM}+~?-S zOq%V=(=n?T_3Qql-;6k}_h`OvgM0^64|OmtW<>Sa+rzTHV6wHDr7dz^_#qQ-Pca79 zJrWY4K{Qoa^a*wjNsc&C2(qQZPmv9!STg0WOx|zPe%*?Vxe^DGXGQXbRg6922qi+d z!)!U|dYg+Tz-SW3gz%{!IHXm5Yht<`GtA8o#e2OQ#*D#~jy9ncv2SHXrGm;@oV2OB zH+m~O(k@kV363V>^J|?19aE7Yaay>>EoLMS6M_hz5=I?G8Bu>^YeGBr2tHQ>si%@{ zM42Yr(FjB3G>cPPA2i4I6dKJy>OK@QsaeZM(!!6EAX21HmnXQ|qUDpj*yt>`C>v!{ zDr0~(6^pflIu|akUq>4^(oE9ZlEL?tB$kLraqDKzxqB9h@;eNKiXa2VQ5ufSq{x3n zNWu=m-)`ltgf|@ zYHw1L8KgeXU3Cnk>mnL$bxva#Wkis2y9D2q$-i<$#~>ev&f2sEA_J08l5N)vh*rp| z1GN3mxuXLiD!6OzdZZKP&&ef30<))?X2m~LJ->}giCv&Uf~1jP>T%sDP`!52miyBo zvLBV!YV!81pg2l{LC`97TU_q5qfm=O$h#?PXA#I=h*XyjmCBuzia|UjxIJh5-k_N$ zML&+BgDQ!YZTNiJKFx)AXuZDn-8M+*`$cZIYekSw*f8TF7@%-YS#vtndDC*ZDqw}u zNDig)26A$2g~&bmyuBrJi3GOwGFvs#;;SSIw{`O>rc2M;tgiFU+xpBxsawjzMG8eOL&56a z+RaU!1%VlE$$wY=rlvac=+H!{K|h4!29b2gKGMHvZ~{Y2%F1O8(kf}#=NJzL zqL$(5+2W*%|ywf_e;KU0#67%fC6;_}Oc>sk%H0s`S)@&}{ z*ov5*>-!LCSY^g>vFc@zju&I509!^RV zNFj0Tu!9t)jxr`8HzbUPP^!CM&xQq}K`26X7<>j({Vv$M+Oe zxKN%6YoZhk?JIu~ytfh*`kY#356KQ5lU)fep?%!p%nJ5Y~{-Oe$ z*eed_DMFpw=1LWs zzL!kdZt_@2M=xU(8`Yx6(j%5w-RJs+PD+9ZIJbo#QbIpvQM9w3p?W7_vOYx1T(0F# zOF5k=&EWl`Q13v|T;o8TaovQCH&EE^7guIc)W93L;(pd_-j0XqHwU#>(VuBdtA>&B z1SO6&$PE}5LWfXT&agx@GP{h08x8B}cm$IPzyrh9#+Gz1E5zWP4sXRxZxAe&Y^WL! zJYn7i7218qL=FOHuI)7Nq>jYx$+V`mv-rqn@wjXC68F{?BfrBD3%BmwyM88_dx8wu zJ)jjk;)a^s-mhZwy4gxNyj$#5C)P!N4z^oJZ)T8;2XID*?Uw=vp@7C&Lh2Qw6$+99 zBayY$yr2oII40JJp1Lp~fEMe9uz;$go97Ii+4dCATimsZUKiOGitDF5TvjcU91?lzbf* zcT-uCvKgq<_`p|0<&TYjR-w=aC^3chT7VHt{zmS|LT6Lx$!<;!#}UN_1$~5FJ-GrE}yom zuvxo02B!z;K!I?9KwP|%qbMhLAx^|Gj%BIy>ajX#k@0lp9Ibd`QpUC`OU%jLC?GbJ z;}Ew7>|PUZsgj-lg5&w(VMDLld8S5?Ui4mYOh7~C-4$5eI)eJkEAiK2d1-szjWE$+ zr~W}})&w#$a-=mST1UHkV=UdAQa$amkG0+5s(cE!an5wKG^nr?HdL^Vj|i%U)|H^5 zvaYqTAhS9GR4sm|jRt*G!-Ii|m#52KclMijS(}=$A09iBT2GUDj2PKw6Bv_;6@i(O z4s?YJS3MJbSmsTXjT?>5u3<^$aQCj&P`T1}l8)X-^O?}^8g96Oo9?E(F-0B3qvcQx z0<-A$QQxhD1@CoSWhZip(~fV0PZhejhb1nHpX-n9Y&%IUh|!rVe-! zb9;&LLOgpe3D<2`e-D&X+DK?8X&t5+d8qJ;68X@69dV__aUvG!$d~sQgTdau2aWEg z>cRAAL}^9=^=|iQyH9lzH?gEdnB5(wJfqt}a@0lJs52Q7QYa?{_%7ODCCd@;|x zf`3@)KM`%bf|NZBGK)hukoZz_8(ENG8bdyXxKMWO2bqY193lB*UgC#Yw5`$pnt_)1 zg?y>qecZkD@p2T3l&9hTzG^!PnJRjRR_nIOJt*&oHlL8#U_4?=hm!ZQ7aMXhxspvB zoDGLs9#Q=(Zwk1SZAnB0cp|1`#ROn+2ucD>k9Zt}QnID7Fa-L1=jDySySbCZKHWvA z-h2a!I69_)XGo&~gcs9(S1M{7ZJ&(q9%7|}jza_7&3x+{(C-?vB+QQSNip--jvM2z z3U(0h3T6$0XhGe$OzgZDuX%(Pwl3eNN}AC})enKUV-CeANeELH%%jh)8O1{8IucA` z74r7nZnS*5I$qB{y)}<+9YeL}g166sZXfzrCq@s zleB8%7}7YJ7tEE4D-cVn&)@|1fOYLdt`h~`gc zvBd6`NnoJXq{L}5*RU=w5pIEGQ^Hv2Wkf=Ea0OsRK}p1O2!lOQ3}UoFgUXB)RB|%v zF%VZ*3H?ga>eiA)Ru7qC8hb#?PBZ~zK;Ofg7~C?n3`8j~at8N*=Pm3bBiIFzl3@(* zakc7i5X&>=N3wqDOT42TjoOGv~S(}lOs5(V?1HHP?|vrJW>?@ailnL zx}gQ4w9WdKsDfwr%W~AnS7b>BGar30?3`2_XastDmTL*{ldsPB-Tcn9hJnh%TOjAl zu)v1pN<4pYVR_a@s{6cCocK3vMdMVpq1K>LS%^@3UYRm65Q!!sN-kO%Lv7DI_WgFz z2vQ(=rBqU=y1hp1czR_#Fej|E)ozik$9775%Gdc9*n~(dsO5UP=sXLe1>|hj7@`Tt z9TEO7^HGE`cL??TA~hO&67XZC77+>{0Of}`a} ztYNz6{vgxi?Zw!!{ZHdDjz(vZeak_ThpxgqT%VZH48kdp{#ro^jV%7uuw?x|2^E^T z*Ku{9K60wLIOi4fjHh(7NZV34%kj!<1YpZP?eOcUJbxhc%ObSzm{ZJ=aK|k{eTu5D5hs7A za`O27w=0+IA539}a?tHD zd`TU<^lH&knQonCD}VIDjUxzvC1>M3V}a4pvY{jXeRYW@DX<2&0+NpgGWr zP=5w3L>PX&e_W)Wsir?H_*^8i7U77pei@=Ztzg;cui5Ly0c&8PIGkO4e<0g%7Ic%v z<4{^*-j`r>yOF8@%{d8h-&sts|N86ul6vZ$O|Y9fNbA>{9+d*roqhJv0OP6|J)}y= zeEw`tpk>opxo%(z0g{{oUFkv&4v?}Q?syj?cGc1riLu2pr=j8ti?#g~ostLELwBM| z40WJX37EjeH1a{Xn{q8NsuTdB#>MYF1#KF+b|KhO!E|iT4 z@F%POACd$7x1ns9zssJ){ zY;|Zp?*aFOT%l8cu?`Zndb%`Q_l49fBuqwPE#50C6|bn;EW8mKqt*yg|62BmhN7h< zh;^t=?W(lR65)#W(|&3f<5z^RsThZ3;-S7yLPdT(>Hc!O3;?bAQNMPkV@JK*YUvL3 z%Cy!J=L{mt0_(?5c<%4~(0JdkH10_ydh-xxyg6xF!vQ^-O$~|oLK&5A2j_aHDD)m@ zj}7X*UVd@oNmD8P;+SYF16XTCO#xO~2#*D~kv|j)8KtguWShnOJFNBdU|KjJ+=1Oc z^&DHTc6uk0W=hJkic5HKvn7SM8Yb*5vWb($z*!-hkhyWEIh6d4&dVnoLxuQ}9VeRk zwrh~uzMrOPiw@sdcn6TLurF)+dWxiY&Yi=_<8YthE5(SIBPN$CoO98un8tS|)m%(8 zTXn0CbdLpAK$A1NDu0OrPm+ZIOPLU-JblbzGpR;HQGwhzv0%VE;gj%}b!KNcS#B)a zGapcatXqW@^V~F+acXD6#b7r<^2n4JBlPG43PlAmcb*c1FKW!jA`g~>$dgL|-}(B5 zsQoJxZ+6NPI5QJ?f_op(I$K8UQ(JH(W`!h3vPMs29#OZ2N4?0UNaxj1x*wnbwBmp~(|xl2Vr4QL+^8E z)O{KlFe)i(Q19lN20R&90p7s}68CujeiT~CaeVbb@67EDTOo4W$r)VUcO5lnlug<- z=AZ5+_VXKi%g9%xp(b?CX7z$hz+0~_xLzYn^rlv>UXyyw$Hm18u>wfU?qcmQ2IQ## z={$O9b+7{#K-8oflad33*ihuVD0_!g&oLA4O_-IDEs>p?0W0`gXudxT)U`x&L12nE zgL0kA<5fi^<@0Xx83MNlVAFeWTE#deE)$qk#dGM1Nd?H9PST66Ib>p8#iiJE_V#x7 z^IQ4T;P}CWC0bEo6AdE9XgYVekxTup(6^|-_qwkzaTmfgvPxJ#31i)Q>k^0fHb>*p zCP<}iup_6nipN_iFbz_YS0a5rRXeoZWgRbg_%uA+aEt8RDm#iXfx{)n>U`;G=)a;k zk-v47T+X_q1vZw5(N#QsUVdaY5442u9l&qookSs)R=Gs=Tc?gxvLhHTO}#EJ<{86` z3~P)p@aF1lJ}X;!^X9v*1HROSo~_cWow<0kY26pGX1lf4HQWFbQuRVP$_@m-_~pUK zt#@i?|8l4S5knz@>w9qb^6+TW2{~JHQy?oK;PZwRBpkST6!5ht=FdP>U49w(5eC+z z)zzFE6Q~c>Ho?JH_!PSg5A}f**1kR1N~Gj@y3sY+)H81O4%lF80de{GlN5(V>+ls+hWW0v zPRTH#&{3NV9eT3Su#yL4G5=?&Z*XE&pNdfd5ax0?q)uuj9;IY&dt)?ZjK00YJX&x# z>g{opfOG?Fq@mBIKAAttS+J<>k?`rl`bg6}Lnx^!neGOJ?6rfZP zCQn?*L-Dz&8T;)}O$i{SiD4aRLVGpeg+;8DfN$d09t;Tjt=~wnAY;D`N^p^{V?yu` zv=yQNM-D^=bVO=x?*!XhphjB1)%zX_J^Hcc0hV|!I2viSth?c^+Fi1i zGE5b2E4$&hd;Plk)R!;Ye&i`3f%)uy4nyg+I1T)LPHM7sLc)}T4WEl>X;sOkGw#6_ zlRXhuINcjZheCl9dzLzKdlcj{Fedo&78butXM^hSGa6Jf#e&LPQGRI@xS~-!G|Ti2N+mxaqXP zp=W!Rw1vHyC(AnB7WycRmS-AQP#?BsaLS9_Gcbi{^vDC5?OgK^8hiSuj?fZNUT&u8 zHBaM=H*4O$4w@w0-USp8+oj+D62gTyJ9VkE5B5i{47rabNa0L-n4dq^5Fjs+^NKCp z$+tIW0zyV%UNnqp>Jz_dmW?r4EFd1=BYBBc&Whj3n1C^x;fj$~7O1fss>VB8rIv#c zWX2TOI1S*|>6O0)Fc9E@Wp?_z5(2r$1JO-?teCtR94=#1E}q~C7Lqc-<_Iq{1w?mk zFb0APg@=9VogEDkoVzZtF5ywZTEP_1fu`y(ou;giv!nc;F!A;hrrT#d`*%s0L|zCJ(7O0K z{s9stH5rXik4M`RE}+qrf78plr*2*EgbD|%c~S*ZX_hZ_R~NPz3U4o9F^XW7m}=Uf ziZ6xx>+Dxd>8EFdbwp7504K#UONNCW7N(z!xNk1LiYrB=)9D>f`52!PJX9SC`!*`nADntcQG|J^}dsH#pv$3 zri;G3&AAU~ZeMGf>U48`JaCcs#(?m~N^n7d+;uPcJnZQBwev}zZiQ;PY5IeeeYAsu z$E=7aGa)fnP9&e^u$D&=pG|+;-0P5ytK@@(=^m22w20Ta^0H#bMYM^+in%ULg#}L0 z$Fn0$@d~^P*VbxbQM&R+*UI3x!;X4g%h~z(2Bo2?);hxa8Yh?a_r0BH+g2;gH*<*) z?j`qVoI>C$xG6;|OTKxJh3hvNyy(+VDe=>f-ugy1=NDy<9n*YgZdZYq*PR(C-62_y zJ_qkJclAT=dzGb3S>)hU-9fB~)CAI_FIq)}1KWfT@E_B&;6U;=dGj7kVS34j8{|?v z+B>htF!u0Kx-?wy8c7^f`zAU?=k=E3W+ZGoOSx)KnZ?>eQ0#PFgQpO-aGi=FZQ*k3 zF7<$t0?)5bt?4t&^6pQ}JeC8QL8|t*G<61RLHt(kAtg-%eq>eQ_C|WN?~~r5t|}ns zAAqhUZ;|c53MpU~1`&=JR$&iFeJKc29gNp6L_NJz}S%S1WO6=yW;|s4#c%* z5U;RM*XTq*yDlQCTM8Wm&#$JNmc?-3I9y~EYa>-&M;)+RW^sGdiVdB#s|OzpdO?!g0(Y7p-mvWSfR}^nq|DL2-z+FNhk<(49VbFP2tUk zVdylr2RahZ(M)`4V#(!rPw4^)YF}8fO0}1CJk&$mHIIrUr)X6CyjrT#w(#g-1q<*7 zyV_X3yss7qkp)V2kqJnB2ATFjnDspf#$dY4a5r*q9p>RcjDr{48ZGnx@N${A26sWn z9aE?l9!_Y|C~|H+0Y5Vnr{FIHw!qno2!f1whMm4F{w787o`jhxc3AQBVCqrasP2i3 zq%9Xh1auYt-S-k8bPPpFWP^l^2)-F%-J2VS2y+7;?YnQd;RHflCFwR+yMmE-n(5>>xGL%$U{V5xcEm9Kl##diyfdso0=y*SoF$bD*{{s)V(izd{SkTd z^!~>_=(0(1(n#6RaJ#tn6`o-bZ2HjC0d%+y$#vL~BzdX~3qfdvwnAc)!sCpEt9)|zk zT1?9E5;)ASIY@Txs})C)vZnp)2681*D8vXZou_?Q0OF-DykBvj4|q#Fi`tcAr)0i! zZ{QYXvL_Tt^}+vsCIytICl@^SJc3VcYs4-g8uwo9ABeP7 zb}e@8pqxzKl6-eQEZY|u7?xqG7_7~28H5IjFLMNI!-H$8{S%|o!!^w>q>b=B9W61* z8$Ahtu+&*Av>%c>_j1%#3xZ?34@-d@$iG4%hiDGD<7*%+Ht92C??3-`k`Jw8W`E8g z`_8PfN$pB%w0f*)KEj*mcPd{lrlvE<8u2Bh@UcH7m6%)GHD}Va5&~cGx7;3YSbUIx zJ*lz-9p?1D;n$U}Kj#^dsdHl~F5KULdfCNI~%r zt;e`x}$P(!<8%3D~s}VF=o%+jM`!@;6(n;j&4UwjiA+sUle$H>)i@ zB;eIwf#?}5JDI6f3<{$uBURq+Z`DFvEITCG=NfYH{5#5w<$BO8g?5N1_XMI%t6e5m z*{pL)mpf!+=kWGK(_HBSx}mkHC!%qVEsc@apf(X0nC}K~`Nu`pmpDY+v)RknK?cw>^pe>l5ai-B{xl40 z{vi#J+Nck4fcG~0ldG6s`~atVFYwZ@x(DoF!TVh~2t?sRDl&&3r%@yOLW>{#rrl3Z?Te5m;ECT;w`)abrO7i-%XV>@A9PB0V&m=41sDw1RR15+-U2p~ z1xXqh_|;7+Z?$wV6R%j8+j+IHXH#;|`q_^AU?y z{1B^z!`WZ1Ky8}qh+H8;NdG<`0EXR!MMW$V@pdrsgT;pkuk1qS$v6vFLK9CNvTfwl zEax8TY>Y{Ii4HH(N`?ggd;9TkLgqk?c4oJVE}QCnYR8;momoOiF_EJ9Fl5@_$L+XV z1WGBDLVEL^6vWRav#)!3$YmJ<&BxeW_Gi5*Fj@XzrUdmAu zhoyi_BGl%?pu3^3X2ZFyA`ZE%X($n9Ub=@24i>=41b;kcX%Y&TnRA{fRtIYt}}U4Xk^)(`D}S7md6l!&q6@!0ey6FfXLO*`vw>o)A&+ag?W2m zmPR$JA_YEa;KD6pFm*}OBqI|`iQ|&0sam9Ru&xvjjgT!vusq_9TGto4kpSS$Zt_yC zAyLQ@vrXu&QCYfHnM0OFdLMVnyE7#htd)OZ_4HL>V6@1`vK0R1qKLwS&U9I7gQ=W! zhP-vSOcO#$ofx2@13&a3NaPJ8@O>QE>051LXkL0hUYZ6X3baOiLM`Qv8;nZ>R4Ct@qK_-bzqG1>DUJqP49Si{jf{c2klOKtfUtQg0MfmjqF_ z^%Oe-x?2#cs{oc=C?$_WIJc&iqBF;=Z8yOOLPdZ%8~Xq}pAmN|xlFmw0jP8poQJeraw(tIU;I$?;Z?6}84Z4sFQ{#nyk2`1Jt?+Ny2`H&;brfT>VR3it_rx=ro ztM`ldsO?uBRumr}jLP_i@Sq~wa#D9Pi4wzm6X!Ap=J+t#b^2m>f9DXbx#}Bjs#W0b zMLgH^0w{L^NkeG9GyVX%ahS6PLpLZyDhtz#Hzc}7untlB9uu5MCw0wcP8fA{&55LS z!5WdYn(Eu_iKY5EBXG>2;X#-{Xxr3yKuxNLb_at~dU|-RVTYxAbEWHwaRb1C$#S|> z=2+;PmNXCTEHCYvVM6nBF3Ypq*{NhW5JNa1Vv5DMekDK$?a4PC*M*s%KSx-9fqk~F;@)FS#ZAvgqTl!(+}7x-gUwCpd3y?)74H=8tt1>zwJ zS+X|^^*0OBUCuXyB;98Z9vcap%4PlHWX!`p9cnEE4=g}or}H42uDftyFvAxC2m3yv z(VNcCBM&USdF5;IFP$qxV0J<=xNGw~78G0%WJxy_EZEx$QjAKmOmUFDU$T^W!Mx0~ z0(oIp^x^gBBX?0EpVEZuU|g$9uxo=7c|VoHHXO%aosgIVlyJ#&hgi<&yOc5GrngHD z1QPn+crnUHuv4q>%9~EI-*cu#*No<_@M2>6RdQOS8+@Noc{DbUIgVoc!^39CI){=# zBEv*{(C{0Hu z4NR6!nkSdcU{J_?6iK=hK}p-1%s~bn3jgENgcbA%psL&Oe4qRtcqx=8PlP!>1%^Rn zg5&KOy7H$X z6&{QheD7GZ-X|w{Z8b|sIxzEm60W4R-wlwSBR1-t8VUQ%?vUI)R4xb z_T;*1XHjp5#3^9EPG@53NPkbGTMb7Byb3FfV!IJRKQn35|FmurL&vFi?)|{jk;mG= z`IS7*hNr{yi4-r9FU8i6c9(O6WHIU-AOV4mlQTDHV{x3&dX^OtL6^?)19XU1=V{in z^t8%%9Z)TeSPhc-!ZGJn`94c$)%MM=PGVxrFh1Bm78IdpU&Xxq<`7xj;Jqr4sa{V2 zCrhapULxvZylzO7RjgK#o)FLT6s=5rqO2({ET(VqoC%7J=hyDDjrQ-ZUAWgK0GV0q zEVPAsJR9zA@uwNC`Qb~C^Qp~>nAU>=PB@q_3p0$19h{^h;!?sn-w&Q zcQ?j+p+5{tOZ|lIl=n=DZJAoqEDud!byILG`9C9ot)Vm8^1mK;UDVi!(4R+PLKKDdAtuq5*H0%Fbk=cgz zv^Yu5;U)Yd{=uiBVB`h0y>usFXix2HqMjgrD4~@#*5g!c4!eZB-E`AcBx{_!`jNqB z)8zwAFy94Y2s>4qYYBMFB?Al<{X7%tk!m*(j2Mx7f9Hg18#P;d`M*fnAyvt)E`OHK>ADGbk<4j%BN?|G`%&~QZb za61hN=$aZhR^Bor4SG!Kfkz_sMO+Fw=&EEA$D&YVTg=pO(-={}Y{&fqG3mQz(jy)l zdznS~+Y&;ub2H)k+v4V)Ej@W_3+L8#M7u`tKm3YLV zE3FJ4bG)rFs<6N88bzDsl@(_5J8VR*6?V`}Au%!3On5z7ay<+i@;p4*D{+Pj#0#W! zq?CM$mnsfpo`6&FF4{=VhgKD1opgk#9kWPBa}gfj-h{{)$FS*itB-^VrCFqgmAA`# zKw2)?NU&9m6RTn@o8iLfeU(KhkJV;`BRASEVMenHjz5Rx)dk4Oo51GX6mVtJhRCw$ zN{E08Ngs|gJM)+0sWsYY-J$x?6$+K-I*TBtLkUbQEo408tC!5`U$rf|R<#|4A+tqJ zvQN%&anJ|uF?ajj8>0=Vnc4!NbINZ}|9rZPpk3ZXyQ&n0C}ujO-)ZkPZ-_N>)= zMW0f;8tKv0I*MR<8X6A^!*`QqgdO(a^Z}LKMi$IQvus2@iq>Zo?s89%%#1z2Y6^uXAzV$%?cZR|6$K4DpC9a{f26_pwTo_zQ zQ8840Ix5JIj)NJc$zDXB9YxN8I%y6JefGYdd{QPhI(G~IfNjxk6a4}E;+!1+9A*(q zEj|YQZZ+8A`{W2=;sd|<7d({t(AxmnBbiRDb6z0u$SKv}<$-b{-}rn``v9~ju+ZMo z9$?L0V_%w_=-U&-OE((-d~~)N=XyD(M0Jz~&nB|FOT9T8(A7_lw#+@{8l|U4dOh7| z;prH50PkfkAU9EcXR0ijP~qy$YQ0Z%`;ud{|BinD0DS)`sdM~gFJbyUzTzLF-#_5v zf05Mx^w$0#(C?pSm;ais_IGGY|K*>_>%Sr4zoLx)O(gt#G|AtQ@Sk3pe}#mzF|3@{ zMr_FN=lHkNy&?A1c`k0Ub$JS?V!S)0R=HHYm^*!QOB)B6Ma#TNuDkj51NQrizE~W? zM-3WYgQ3-NIBm&giN@o$_i7vsH0=(V9e?cW+3I~<35J^W)f$oWwRbKhHD~&1;hXj~ zk6E(0;TmjsXS8-T=E_vg)-97P5Zh&z$z(PuqSv5TEu5j?^ITkxb>zGsK2-C2UkwO^ zh8NNrrIP9(4T?uC#~2SBO4O$B0kSA-y&pWu zHislKQDXri>#8JNKv4DWK+!>X2~D=%32~>GIc=|YRVRBe=g|l#Vp-K0CKgHrX}0Dy zD$qw(C~&r|MOSc9H??L2E%nl`V&rlrxhxu+C>X)Kskcijk%NX_$CosX{)%3YCxjxd zAj@bHKjB(K3uX2!CX+m85U2$+2Tc==w7Tt3FW)RD18!EE3|-ST-?*sDkYy{AEkE-Q z3oaRRZ5QS?At2=Q(4G2j=VRgsGdi6XY61wwpoks0x3sTnVHpBuM~{H5P84Gp2{RJ& zlC>DAVp32SSoO#mt;#8Pj!F6?r$Z$(!0q1vxq|(Sq zOjd*6QaJ%uYyABTR3c;L)j6)|xi6l-v{!OxDhqUp;)NGs)6kXKF)!f6KcD z3A@BEV-IU1hCx5+Mm60fLq%@gt#px!JjIir^=04WS0b^uGPe4tj!h4kl`wh3DOb~F z=q}8bXMC5iAp(Xf)Pc>&XcNU z7O1E4*eI2-B`b=hD$T6Dd){<$;qpra6TcDT-qAI*@e&&zc8r& zx#67#L2E3Ikj@8A8eb+eoH9g9*+x@tQLcqw!G$(Mb;IOCFu~Tp`Wgs@wG*78&@z~4 z4-MBm$qR6hfk1=V+Uu;0DJ(E%SVazp3F-qLT}nBg#`5hd?9O`Iog8;|(s$3g>HV)%rnDPl;jZxDV zSBi~*+LH9s8{xeZ%YGAivT&ArxwUF$z?F>y?A42ebY~7w1ma4D+(bT0rW3NKS{^Dq zH|5QjvQmNb#F1uTdaS9=vnBpV(veZ1cAusVhu)1+a@H~##t>ebVF$NDToHx+NAz9t z=kFDpT1pq)T{+KPE^nn>*zKlaII#_~P7(;x#1uy9uW}c>zl=B0u!9^^ zki|AwJpJS=#X|@L$92?HK~d?t_GjnXC3a)b-jO}@r7y*gC=diQ7~-yVd5;(SAgI|- zH2!pV5wCHjp#`N|d!PV>zL;9$NqZ#j zS!XvNQ6H&?u&l;s@B8tEYLG4ljvTf|$qjoKOZ`@`FopB&G`#qJ2c+4eIuVf?5fD8D zI^zhrk#xj1G3NOxaDQECo*&)3$X~kU5v;Rhf{V+wneq6_MhXX+_j-M)|y=UePl^y~~)^g)5>a?{t%v8V5!U!;s;7g7jq3EOS>aLl35ni~Anu*Il}qFM z4ZsSS5EQ>2y3&{*Im6%bF9?y7>ecy{fVu`9o^#^;lk-zd}sWnBy^fO^D08Ik$kod-|*lv{CsvaE|JiRoRdXP1_b63<(7KQfC&1_vWXy z_1T-t?_0{Xk(Q-m+z>AWb&)h^r#jXS$USoF-?wG3_IwF$GxL4l-!JZ)`9+SS!@V&{ z+1rD38mT{zMSVL~A_*Wmn$MSEGHo!xe-1}O@aVwUh~26sU$B4ve8)8p$3}eeh(nIH z!Fbj%hQvMfXgRS_7KPFYh`D2dHe5$!ub^B73J>^z;@W3-foyFzSf=J9xu0jnUKBt1=3AtWR5W~ zy}+8mz0&<{5%(Xkf;N((Bq$iPAwUmEl6VL(fw#68dV(#aZ^i3Zy|IR3jysvNDPxlH z%uh?1ZHCuAQ3nJ!7d<&-wZ6Hlt;{xCLaV5lsZ$DoF(LfKW94EB>$HISY4GH)nGIR9 zM7;z;h%*$qzO(gK5Hfyy#o}7rw4}PlKw3JZIfEH}e!i{MG^l|kniJFB>tvN z78SHfU2d=WixfGUStqrZPp23a#hH6=?)d3n1;&JoK>7BKF=FaolaB z2V=@dCd@x3)ZobvMR!jkK4B89;#r zG@4J75&+_>X-y>H(>txtr7h$VlXo-VoT<9sYzSx8Xc_=`Z73`BNH1e=a}KVL{Q%13 zDEBw)SSK;Im{BDI?~sh|%$$d9O*!K@Jj-%UD4s{dM74)qg$Os=V;!z^CLADD;KkmC zQ$45sY48^)nS7C(>FUMR750(pAQfIZ4jqr=dB$-WLSAR^;eD@T49*TRzcM&c`?Asy zjs`^1qF81&zfW9q7=y=0g4GPiAt7QQj^Lt$v0Sp~!VpfXU*3yU&99-d<`7WSMFW(E zb}?%?i`^!+Hp@gN6VpT-tv zvSVQHdUtpXL5m0O**cMJZm?+@eEhtAwVsQ#KiLnX@J-v*t*aIpwo#XAX#c$m&_Hh^b~_=~~jgLVbFSu%{M!{#(^IecdpeH5(3LDuQW!t?33?iMT>Ua?JZ095z&*#D$gnHJjt5 z^hCDjZFpE|(ZG*n)+#Bf-xbtYP{6$IL5U`k6`5D?zvtiwl8|*Z?TK1)E)Xu&zpy9{ZsLai)|G2!0YfT`-5hn*N}E97?oV(I`x9YaV?|}ooL$*F z_grwV&`aCE%`|U0S&)9Aku|yo3N)%xxfu{CHX|1$(%ia0oQqn{GX@kXa+~{$V>UeT zTpSw(6re>I2KL24uSRm|{Ld2-Y2?e>hjFO66X)5o)8ZSqnKEtG0qrM^rvywi1mzRc zK?Do(m2GUT%#ho?BLGqqUYs!f4`-I9_Q36YL+B08bwK%Ed|3qws8rr6Q5W^fMwwBo zFtp4vg8Pfbc?jQD_DK>=abHsou4k=DQ0Nr-c>NNZ{iw*?Q5ywlIUfFIVkL{?H4A(2 z&hXjeLMKtEc_JcW>fUc(t#tSOSU?-j`k$>Pw>@J=GrIE^Tn`Yx_e_Rpmc9I1K8ESc zzE;u_iWbsg_{HIZ{Arb`o>;uKqm@<-TW-~nkpNRf3b^mY&Q_qs9~MI40U&qO<@=*b>lLE?!HN7!|XIS?Cj>Ei&-#!E`h}eq%hL=P?BboQop|dLXzBjj@m#d!k$} zN1u|KTOtL!EGya-okm?O8i7x;@=3TmQS*T%e&T;k(H#yo3%Zn=5gtV3eTO#rS_n41zYe%B@|FI_F58a3D!<9Swi`# zpLfB|^uFeS|9ZyJNWA3W>>yv-_oaU7=0gLBCyhR8?w=5<#yjXj(SpHpU{7qf_eQDbNbS(qRKN@6X z`8NmIqE)0~=NJ%DI3CoLh0JjU>za_+YeB#Tc!C?Wo4`o~Mr1l=8cKBr>Dpf}uidlw zTp#^CM>=?8?TV~r`(r0HXrU?hJBsRZ3$vQnR>JnrZkVeNzoI3KJ>QUJ&@J(g*3D^5 zffCC$FIL3+=7ove!*wtHY--NNZ5sKx>`?wDN(SPva=G6;K8?x^Id#HhEMG*f+x2?G zIF6o_aQf)iM?AVqlO2lMu|3**KAibgA%8U~(^BhPyZa>UCqGI2DB%PTL;hA{+F=Dv zaQMfN=f0O!T}Kj+^&Cz^`X0;MYh7^gf zf}mI@cH`U-xK~BR9(i(d1tnUebYZ7Y)kD6e$Sm^<6{*#?h~(4`y5;=7U+c8K5fYMB zpWd>C&aY$=*iiw&0=>gOu`DxeJsvDBe}#55!_zo;5^N#_-J6JsS=JAe+@$>W6Q30c zyD>^q)-zGL-+y;tLhkf7TwW4xl;HHO#qOQ&4)2+76#l{Th}{$$Oo=3Xc21Rvj(E>; z3c448LoE+r5y9u9Cqg_OxG9ItCo&C={*@0ImUPU4{-NVpOJ>Crie1GQ}9t(iEMM14j>jezXiG$sFq{Uau8Zm#>MT-oOE$5ol?h%cBwHjhMGUSe_BGqRH)Ipjo1SLK@<5i%JAcvu`QHa7Z%V zcL70!_1fCH-m5xnTU%HQ`M*Rd$94zj3$Q zUcBSpfM{5L5Fmc+-t`l$F2nUUy0qywNwgt7)Qvvs48>NF%$H$#4g(LtQkH6rUF@2Ueh(`10_zAez$vK>9yU|pBo<2q&oDRdSa1Wp6Rx=1ysW6mO!%Ax`VT1x9fQ{dui6Y`LeizxW6{UnQ1@riIXCXUp|1jxp72M)(BdU~TuIJ(aI&pFvDi;cWF#ATGD@M+sP4&uqQ^g4Cry$Fl6 zXWbN%y{Ov zjO)QT2D?46To;YUWS16vE}c#NbOgw{Q~+Th3nzM&!F5lWk|OgTSDGI-Qcua;(Y_`W-O1>EXz26pQxJU=q@zZ4nUk(+4B!XK+1f zVY6xHHOe6l>OhVxpJ!!|z^UnB@yVAje8N97sL<3Z6FGS9!m+HrZGDgaYG5 zZD10GA-Rb6j%>!qjLn%*zU@mA(kqxPO#KFAigQVsO*13e5@md)Xgq9&MbwnKv(O~^ z2&e*gfQB`Q()LYL2GH1A(MF++-|0!7m}xIUn*HVNl}7k7MXo(^ z-lXmT#b(^_j3;%j(DNH#Bu<6>Cj8l;sjzj^9>QP*{8Z%oK&yyM?&`NSP~RGpOzy3p zQ&{LBKjhn1S`79U=UOh-qz+(;lTGpRBG)vPMFxX~_9*h?3us(Ygb<=uAa=H?ox+tZ zz(J*o(2`>BXu?*2M^A+dN}%mVt!60lwxBfJ^iC5EGB^q8?4Q3pHYo$RP@97epM^`_ z=Wq(kjba2nlpoIo3!Huts8T4=Ut19@>K{lKWVc}r-L0!YrP}c`(ZqI7?nKKp`-ngZ!=f2{G~HJ+dkJ zOq!aKm{Kvk@jlc=uoQkN;1nDd=!CY(nkD&D_0fCB(M6`GZDEa=v(2$KtI|T^pkvOh zPDHJ5$*4-w8eHM+$z>wTAC2lDhV4lH^gOs7rvhvJCzHlek?Hf@{bUyVjV#t$qQ28E zSZ@FGaDC?BO_KcK=>s!*jL+EbLhc;B2 zNL>b=Ftwnca*`+1@M#N0@{er(l;8{@mxV}`v}F+5znW^6pS<;V!>VxDT9#;S0HTp{ zY$3T{t-raKgfJ>)ERT61I7>RszTNL6!A&M3vKpnvp%$Jm+20K}nX}cMrMdFFraqNL z&W>n|)GfqrFb47k5dLf@3tTx-rQ*gU{#0Uj32thIIE@PgNLb*=;I$JIFJ3{%gso(C zmKGMHD5O==nG=Z>^tK)TfP9Ml)-(EaoZv|S9Ej$AOjGm5j{`|9%*US;y+z)4Ke)nO z`qCXEp2NfeD@>nevPtw(hq`lrGxk%X ze+?J1{LUHvF<1Gcxc(QM@IMUN_y;)QcmMHkT!rngwI$4)e;xnMVSeW=e-GNA|MC}a z`S)BAtiL0qzhMT}KN}YR3NxImui37BK>)ouO8p4@DFnD3x##F1;Sc0W7Y*IQ090uH z;~{`d4N2kWm=w)D`zrj!*;E`6X_3Qh13fH^aQU_)o!QjH)R?aNl^K1Yze6m&H?=Vu zJi3FgLt~eE=Z>L)7EMFnpl5+wRpPg=)hXQ--8!9)Z$Fqdw?hw2bVa*qWyI8nOjOVF z7d%Hp6x?#6s%7Uh8dz|{AlJOoB9kxhnmg>~^j_G1b3B6wT#Z6wvh3Bl;>geg#ZnN!-&*V6(~oNsBvvwAw(E z6PM=Pz#S1KlWtN7$41*E_k+(2=PNWBjT(0I$}CEXNk>n#sOp!|hzupC?;UhbU)LLl z43v)An<{gvRNHw{S=%o&*o>HtRh_~iqac?GA2B$sT4^8#L1@Epe6=X#Emv4T zR0VzALRP-8_O9@q<`G0FkNy<%NFF6W3bmmZLIVm)M$ht~;=ZC>mtT9ua*MF98wWr9 zv0%Fw2JUN6;bzf0o1dJXUMSjZwuQL%;cGiXqJwXwAXY4W&W_K(8FZV~D-~SS(|42z zS79~&!jNc$FTMvysX8?I?mLu%Fah=IL%B~`Wime&%;D9Y$ok_f_WeU{Ow*~ar={FV zL%y}EtJUKfEcH^E$z%iOJ5Af0?4lH*C)i3=KMj2vLpX9!*YW9ogY9ZvpJ)-HAoU6* zozmihj>Czw2G6RikF$(+vC23f^!|=<(j(xDpCok?P%L|?t4An%O!ZpFC@%mC_ctkl zF6r3^lBdk0r9#++2Z}@=qgRqRUIPTBSy*4Zv6zwq7!goI&tOJ>rIFsQ$Rh7D4;zAu z)g!vZ4E+Gag$Cfx+eSBL-pPkFbg*FLc@5>_T*H(rSNKB`Fn~y>!y^T}_+F}2-wNhF zGomX`YLoSSd8~NL3F<;uGM9D8bn10T?5TBNh9w_cTfL4GPHoDhh_n3zk}x}L3N~(r zCY_8Ci*sM)E-_LcMLmA4z6^B(l{&z@XpQC~$l{=+Fu+A(_;=mrghG;8KFZ$b(IUKW zb<-h$BK5NTG*QE2>1eDdinwE%l$a@P7-XhogSj~(cWhaA0G;$L4hxMz8GH=lXp0)8 zRwFdVoJu7e-YldX7XWx8F4!Q5?pg)kIaw+vR3JI?evF_MrlF|s?O}F3$dZD!Y^Xt4 zx3qm+4BlgVf9-?|W>4*mGrA29bGs?=MCpNCQ>r^UN-JO)ZMi~&d8-Oj3d0p5>*a(4&pTKAct-LEFq*z6HHGjk z$mG%{8gk42;zEZoJWCUGc9jD?f-?t++QRV0Psb+rIk-0NXRr0_$o(I$ug!^=(My-@ z4AKrVQ5>f)$1eWG70SZl5Q}SoXh#$3`!>mXpHSNmPJySH{dLF258+5h)a z`9~G_4@S+%!ol`;O3nKFK*>KYm4DQf|57Ud>>B^Ovfe)^mA{)6|0VUA>W&+QfoD9FNqi*kmvC9ekW zV=yZHRW=1AfNw%ntoZ8z*ym41ZjDx+0(9`0R2~Vsm~4W#*m+&w4$t8*yY?az&&LeL zXj^&&sA8Ex=F6&TJ-@wC9%{_T3CE=k&5`UyMBTq>4K z+val1HVoU+kKjAHP5%=`@@SOtPz*Rlq7O4&grYD|2J9dfjld}r-DK+e3(H)b)(dAo z^e+b~47m0rAFViyB+gnkE>=y(+IisG*a2tF^R2d6@pB;)l~2(5rI?lvGUd%Mc1$Z7 zquOxNftY@4w@>{7EcU7^4H^o@m8FtqY-%|tsBrZa_5dfc{$_<2zlib>`GNZFgg0jZLpOC{w5zi8B2mHFhyXx$jv#qLEt$-CL zHOjc;(kh+a391b^SnzXz^I4VVp$+Q{c!pY6e1Yg?0Q*e4BQm*aK-!ea(tPS9S^0f< zgh<{KeUrRV3one&m;t&QEq+XU`rKCFq%=$<429DHTEyU`lIY6wwswB z8?YN>$UfOths$yB2`z6cNgMs7_6$VGashW<$&TVu#N+N*vdw!O{;MWdhWPgHbx`@H zv~%Ejy--SY_p<%?OyX&_t1;2ap$N1i$p-xG+v|=@9l>;14tQRVw^ra)$VDINGP*>& zhmSJ6gI`;ak!aY(?@mL(a+iN z=&&0I-WyJR#&M`Cvk2i~*Kji^mjIb|b&i#wVw4$i$)lhel^|+1Zbh+n4A(kMwirTd zt)92HQg^$}%?vm0QYznaq|DT#ifq(c-h{rYHg~|Wu?Xet?nPW?tLf)?X$QP-cT|p7 zSA3S2qo4AiW6(W6JPbbGW*&Jp1?IKBb@@Mm^`lFWJ$QYB*;h*GWM+ZeA2D;xNn)#A zAQUNMk`qu)nxoMIj8k38qXp4YOYNV(%E1`xXc2bv+~6{ZUCnT0d_*xmV(3wF_V>d^<7ecX}#+t=q(;p7j$H#@%S9~wHHGEdV;!)yh9}WYByB$Otg|Go1Ks>s$Dy=FI z2kJ3MHIQpxCCMjnuPR0yvXHyd$WzZE^~ID&S8X97Pp`}?r?)Rg!7+!cG~y|XPrpA} zrDSgiDt&+rs2TRM>ZN_Rw@%=K?mHOz4%v?OGJh)cWRB0dJ(`htnrZ2#K z$Vod8)VF}hY!;c=z1i2Eo0fJ%J?u6?J$!l4(A72JQ;n)&%?fVUx5Nwa=VJ-;KvXLk zLc}8L8I7A@%F%HHA|sJX=zbq=A}_h2(z=9Qn^UWqPkXTXeKzXgKxJl@+B)SzB+OdO zD(0ZXNN@!8YyG}ER6SIOcAC$GN|T;ScZPRGN2Ej_nsNO!Zu_UqyWW8BWj#e@_MUpY zZ)8f2L+5Ltz)`VL)98VxXT~Z$`>nB|1bS1$`5kmE{vBHlT@;A7MT;*B=ds~YhP@jC zMh-olulCOu{cm$-?RyLN`{NIv?B$$v?b#?J0GR{R*#z5CuN(x3xGmzk;;YN3u^&@j@P&1 zilcg`*^0)H7y$hUK)!fenCGI9tO$niQ1?)yo0ZxrD0{lq^(es?IA@p#em8r4`E`nV?AyoD|@<61e?>$R&*3?KfyFByt);;q z)X=EXN3NI8GG{GB`C(qjC zknx7D*NM3*po)jAvA&Z@bJnlDShyCoM_b9bR@m#8tDK_ zDj~5cRQ^+<)1o-GXYRFFuB1 zi+<~Mv;Nr<{@;V*KhE&~+h_Wx;P>yE(Z30Pe}~_H1HZpUaQ>~{Ql+|0>~0Gz*um~w zq_?wwgyHkX+K&H5yN+{Yc5AQA!$o8hK8jd+$!_!Qxapjpo?;RX_wL<8q#<8$kws+w zNLXwrz4p^?)Uwuq?U**dj`84m`{enHFao`M&l`cJ^~0)V{cAgNoqp?1)hXBIxXBim zo^t(Ogi>+{P5b^Bv|!_26)_fRQ!9y#YBKw@4>6ZY^0=SvN886%fL&^#tz?&)Zcl;$ zeGTUb8sF{4j0LwwGsTy!siQKi?C>`@JdcxbM=>z5P z=D9}~piTSswr+Rdf9+@SRW|ut7*P@K?$&2gd2Z1d3g~ZO#;cu=9WjctO!f0{cmG88 z?aBTUsUx-1b<&7F9seLbb=bSjqpx?sv&ubt+>w{7`Qk{fb+0X1F!+7TPIZ(NKD)Y% zwab!{srAxH8i5aA#jOT!G@fcrc8@c=(O2pG-c^h z_^(9w+0ErX3n=-g6f#wFW1F5DrU$`p`N#$3skRWRy<$q_1th48WqeB9g9p;Ie9 z-CGKro8Ftwm6yIn6Wo0i*kQra7L`rFLp|4np1@r61?_~1tSbAa5j(C<-TZr9?JOTVn<5d8d~2WpFEy; zKC6oJ98Q&kXjR2%Q}YumQGm0TM6t21-avN{x^Hzs;d~E;`2{TjCcbi2)AWRln09yg#Y<(x^8-Yc$wBuJv&33(Hmm{&WK@0+L*ooXBRP@r@{2zc zvHFG9q+y9GYwXQQ1yr``M9Ww+yGcZelR+i8RIljIGfM&zU582nktKU+$L|Ap`zwQWD453WosDK&E z?_@Dnc3>{fBF|rdE38m7Loj!awNt9w$(&y7>6&2+yvlvg6A;52D{F@mfgtVrh9LCh z>kZiQBsl}ki>7+mn!8Cj%2FNja~0R5esw!XUQPr{x;^Mf++_Rj zG%*dNF#I;2Erg}qrAxkoKObLOe!oF$uS}kAxukS)id#=Bw&+4=E2EY2Q0fX;B$qUb zzO}hnSKfZfoQY6ak(bU|gr;?Xy>oH8OlR3;d1IRf+454b%MS=z_YtBvh%#vNeB;m4WVoGgSu63g>pB`*dy1Wj2xP~+LAtrdg`yigLasft($|s zH^_UWE9j_#6zq7na2T*G$Qx-N)Foz&_ud{ooIM{reDx%9@a*K>Zmii>NHmw&b-!g& zVqq`b=2CZ8v9mhln)1Q97{bBWED?{-x`B3#B-<(5pMex7v?$A$b|?tAFgp)o39V01 zn`Yuop8KS}f+jgCV&@l>b192kQmEvnV9q>Z7w{(s)h)wr?H7z&4gzx_lqHcbVkM6W zy4Eg?UT!V3gVc{3r+ob)ym6yhc0AHEwS}YtEW12rIP1;?oFagip+I8aWPM=I%HzTK z&EV!bi9B&-k13^vrg!xb7#`&E#fAnFBldY^=I0Qd`^XVn5jwErh9W^W?5+1_q}XG1 z8un1{`MZ=KG1n|0BKthA>oj?9wx5dD?6D^k>sFr+W7-~s4MWQ84Mk6xNoQDW(u zLQ*f`tOspR?hFAF8m+yx0!-b*Ur8GYclI(3{yBkcoKy z+hl;rh^fFeKXiIpS`Vv^5sUHHW6|rcJ+c;$RvWxfX}|}c@dlpn7E*UC3{0RtkI_He z6Z$Hzk;T6sMyCUmkwX(Gt;&!SoujK6%({~`LZ{vLYz2m1XPbNW}lXC~G!-%|u>Xz&fnmpunbZ+cCe zSANfrS$YZ&fmm#f3eNTM*N6}eIYLRCvcgC7Hqcf_G9GE49US<;p^1x&>oPT|sg?cujr?W7hK{f8W&gzpdpof39ipaM zU0)N7H*q%>O=({nI>Y|zyyG-}qBMy${Fm=&Q4ob5Qn!t}1NYwFvOMd6@tuXwT^GZs zReSZ!Z@wMCMfv+Ef(jxD{o867J{7WCybHFr(Z~q-BuIGrqmw{ich4?U$sG6@MLvW= znn$bDn{cgGDoPSKOVD5+f8tA|P3J0$^`sm(9j~wZ>%)DhWmX(2XkMo~axo$WB`o)T z=+`NWd%1`7`HRu-MKZ-P3Un&fV`sr=JLGUTCaom)+d9hX&~EE>j;%kZh?GT?Hl#P^ zPH%iRS5mbBC65Zs8Mg;ALED-Hf1rh$OakZVsj2{tUjb^FKUmY|%FXz zD$n+I=E6!>8&L#MnhjXQ#Es-@W1--caij#H8;9NI*Q5bacL7?eNF&gKG|y9n)rd_# z_S9_>OGQ;>WNMwtwCK%7on!5`G#4Xyb4JQuT60OouuWqgs>aKyf#RX)t^U(aA3AoP z8iYkrCqf5U+Mz&kibI;Y(}0#I`i!OJybyQRvtl3&U*{X)Tc;mEuxsZ{AWO)@Nz1!R zG5Gq2zIZ^AE$YbY)`M`;>UZVTdN}b;O!D?(5x&)mw#wn#5@x`L831O=Q8r^5IVC2L zgao3kO6w5Oj|ihz15gNjQf7J!vXmAE%C0G8%n|>BDR0o%*%I|P%aSRfhqiCeezUE- zg4%tARRZ+zXPL?@SI5ogEhQ~5yxH)6CwAU>wnd6E#53oN#LofQC1;7}%RPAoAfS63gatr{x`7#{D zuM1ga?2?)Y?9!UF!zMW5%6aWdf@Mn84+d4eqtis}Bwba`KrHDxP6l+HqUp~=*ltcdOVrT<(klT6Tu}5my1`OZk z5^pDAQ)n(I=zFj!cPR*RiCIdEnVSVL3bP?ZJXv3+29(d(&Dwf9;HB2iFFL3 zl$6yv>#DnCNUiQ&;N@doGN4y$f;m8>CX>_-930Ke6B%OKrKkABHL$Vz)tH+(U@qHb z`a{>0NC%YR*tCm&1u>$Cl`Hr8mM4p{- z4qjY$`@C2PuJrn66ENeHDYF11!9@Ci+P1J@H2*VAu@o2SadngcQXsuJcK9(w08gYS zp2zswEde?^RtRoBW~PkxI14KL2s&K8o{b@<>a?0ipbE8EH~j#~GuU=|NZDmiKw~5i zpA#3;#yTj%p0xxxMFK?SqUs;0PYRlvVo~&@4@mr6^;Qi+)-dXMtZ6hLfL5y}Rnj<6 zx6|Whz&+)JOo?7gN4pWo@n6Cmn-QVd1FwO3dzaH<5LsE=ot`4Vx2NMxXqcHw4&8Z8 zPL&>COhJT8sDj@i1pq{oI1j|wkyFHDFcuryM25nbYj=sovmxr5+e%lX$n@Bctm;1y zbw!C$qt9_F%FufXg}fa|p6|xKs*wl9o4SaDfws3pXNr+bIj0b}krWBT4Fa!TGow#h zpnJxqG~oT*Cz+h|z#>jC>(1#*fLFw#h(ZAC$tcl^zRpQ_Z7>Y16Xwtz(U28m^6{G? zK!)p;d8;OcyMi|3fRBI)kXcag?z*RiX-FMIQ57?UI=gDCtw;t?j#l{frKd`O67Z5F z7R}5Ucl+->KRK5={Z9L}3Z#5FJK=b!gkr9?pH6!GjNH*Vpr@&MheW7Ep|-?7 z?lBG-`GeNqrLYLiSy647VDMxdDyt@0ATaGw?sjy^t_~x{vL9pwGa3udhi5mjFq!}g ztph>;*aivl>rYauA1mXlAtw_1Z6(=A+nH>KL^*Acc}4s3T~9)-R>r){r639=PoG0x z%;eDX82r?GW8TntWOme`kv((E8dqT7KE&+xfi`DNED$d(R7b`ZMyLg`s!OrCycT2k zjhx|Zv3cqQD z=J5mAXW~-gs1WW+;#Ye&@Lrjwq6gUQO)yu#*781|jez@&C&SZ=VgV;QyFBg~@J`+f zyWjQl>ml@s0y$%ayXn~xBWxZRV!9O*iU<^{jvajiBV#@K*2`+;@SUijx0>`ef=_p7 z>kg(3%rT=3sAF;YX;+R0TXzRf=D-hrC`2lNLxg0w&XRUn_y^f}DE&QB{?!G5gzv2y z9bZ%zZsB0+=B!uP+P$!mOd7oYOZkD|O`Rp`#(tIuoEpase{4E@yh%wj6f&SwAkd0zj{i5dp)>MPG%v1;K!8>2{FMu4jxm0T1&+Yi0mR>}W`z_wB>%9|=KHBGOdi z2u5u2KT^eJG9+fS5{z1&%4>@hU1P%s#nu4~YQA;jY7(%o*})tnpHC|@1t0aLtB4&! z#1cV5HF2Ms^w2XTQ-e?z!Fss--07LN5s7vUG*7uWr+&-lQrx9Q(9fFMretU#2C!>1 ze@HqLAVFk=9K{~W@go^|^!{k7SEsQ=rSrc3XtY#s^2{~PO9($4oW@DkQ+7Rn8K48w z3E5V->ZQjp@%fh8FiAI16RR*3ZFnj7w$@w>BaZ2&tzbIzKzJCshi*G*g7BeT!8fzZ zJm&B06a_M>y!VPix#*V9>PSUKC9_Wfns(jWuZv{fxl?+l?S*cxPcSaVXD|vD#!LkG zMY1F$Uv>{9rZivN*A}&JdgpxlNG}TmH>#=oKr}y^!PXGN9OOF=1m6l)2cW4bgWt6G*_4|r}ZqJ?GiLBY;uS-;?x|Ag8?e$WMQon69wG(5TVTP z(2@`zr_g2A)+FQB?KMk8UrZK)_i>erYLZ^EUoTATT6EA27G;1Ilpw2U_~EUymK{sB zOwT!J#u7~IxN!(-wM|>BCCwysnLU;w0>Bi2pDWVG&D*N781q@p!x8K1N|o~23mWW6 z)$X_z@GjwfLg_s;!i>yPV)+O#Y%;Q7m5r)z#$JF^o)?b_U)v7&gAYgsZnbhZjoJ)F zG6^$#@c}e~!$*`n(EmCb?dNer1#IpRhtR|Ba(wy;fIA|f?XPvuzZY}=o?p-UyM+9QX!|#_ zAW0aj6|1aBLxvqY05k~A?1jSei?l#i*u<@-)6D%B%j1Hp;LCZj^Fw*6$xj&{BCp+< z&=isl8LU@wg9e8?RmHmt9Qn;U$(?T`uB$B|=`HW!U`ryWnz zTzk?w$}N<>^XCSbLLJGsx}~=!wT4SpWIsM1TihL5_2g}tD@W2kTQh~ZOR1}THTvSo zM96MK+sanq(Da=+uXvdhO%xxp9srM!VrU1 z%fVrvtO69rOY^ejc;6HKuxp>KY$pLtHiYesQW8r1Ik!=yAP4%u1f>HiQ@#=8a93eZ z{S|Y0aQ*x-lm;WQK{|^sN{CXi71k)BG18bkah9b}WObWGxLF${zHuIwk6Gc@83*p~ z%KdWIEi>_pEnF7fwC<(pjE$|sAYpZLak>Z$R;`6J($?i)fDy@ASfEoBOCWd@O5(~q z=rb$0n*2e)S9on#k%UcSib;@1JZ8;Ui84r_ffqQqcUA%yg4L+=+3z~cN59^Fw>Er$ zsa~pLqKl{zl!uJ=-x*4_%}?MTqHa*prG>{o%o6GUN)oo!BA z?Tthg9^w(9aL9BxrU|A|juO8^yU*Aocrxt74J0@nK>ANxuDFXC@*x;B1;koUs1E9@ zkJ)abV9JLMCLUp*!T|~p7r$Bc&>iR5#2$bo?bC*>Udz^#)fykYt8ClNjt{m#Gtall z=U9@E(Tu*Jbv;rV44+N_?-vWtAvXN7C|5c;dJpgxQ7(=nmiJ+%17hi+$9O)Q5mRg( zvpG|G(*d1Oko5hsw_bfJD|QHM0aCE|ulwuHbapK5Wt6Qk>Y*DO%xILS75+sRXPN7? zm8k&DTcDJhS2ebtZ~|I$8k)>cQ&DMgd76rf3ILg1#EnNgGp_}MBYsWw8+%5ST5@jU z=-vP*Eu*7Qj{Y#LV+rp_>Lto9XezI1-YfJPMj_TXg-z{k7p zzch`Td$~SgM1W}=_XwP>)nMG39?hB*4sZi?`BhasFVV=91g@Ye38}>0u<`gtf01E{ zABOs|u#)QztPh2!or7Nvm#xLR+?5)*Dja{vKE5;1aQ>vV<^q5ETX-XDtdr}gB(^it z$X5@kq&f>6LJV&XRVd(XvfTCgbF5juvx5XzY9zBlk+GRhJ&ppe%OpTRw)J8Z}qzEPqgvQ1Yz3lukg(?h0Vb{H=5RR zUzCiSOL9~gJ?QIV>p?64iW22rKE8dzm1sed{HQa|b4~&^7%vpNKr_XYCjBCo8Cvnu zpsoy-S*#RggP(JS3e}*`IRFU;%n}X zB1{b_FS`-D^L(ysT&go12+oUiB3};ATi0nZaSBDIYCc?6LxPJKhIy!@lhj+$Aip0z za4yp>;mCpF0aJ2hj}WoC%R05e0IeiyXk1TTqzCY$L9@&cZ)|{^M1H!)`Jfq>xTlO{ zKNy)5wg(utP#DKc=G!QWAucQlzjsl+R<(hSOPWQdp2B#H04(YBKC_mzN{Um1J)Qod z%aADuC5IMlc)p1>3{d1s^{=%uhW)_4@063*6d`Xz-?$G7 z=C5q-Cr-fTJM4aycNGba#D#;40%0f|Ii2vyVw~dvN1VdLI;vVsvDK!6yhIFzYt}ou z8*(E}PAuezagHtcvS5%WN3ZRcN5q0c6SC@^_>I07YHS%LH{&tfJ?6H=#y~Ohhlu#+v-Y+KNrnyrOvRz-T9H0BRR7B+G-Qr?|@G>K%JGRxa1N?Gz zQ--UT6`DK+^~`kGG>R@N*Mih?R9RawHrh*Ief@M(_NvkCNF+A~$=K(YNScy-Fc%`r z6MfZP4}5|}*d`KIfw*HCw1-9b&ctWr`n0iSRaP^$E6*s|PBgZIPZ^18;`T$2M0n55 zNc7D-s~5SKd(kJ_#X0+s5eI9*OYhAa81h>vi4a0$YVF=k#XUO%i<;}VRG0=akr(I>Kfzvbgbct5jP;tUCyJSkU9j~x zPD>s9Q5&{pN~}5oE>nN)4R6lT-jt*s7qk7Gf*VEgK_sZB!^+F-Prx?CD)qm?#{Y~4 zF*36Kwc5e@ucJYKu-5;Djel-?{JYr5`u~j)ev1&Z{=RMe*9_pliT(VAjsG4Vvi%Oj z|A~ie|7JzOuQF8IwI;;>@buZxnKimfA8!Ltntn`>SR-S{IJ)v{YSkfGUzZK4{Ago{ z_=rGzZ_nI@ajkz?X-9)D3+dgC%Xp+`zu88|LG@1;IhH>_ufRtYRerp??^nFE#=!yY z(0uBj?k~t~EB-vwl9&&(ua@%OJPw_g+ob*QxCT$r0*g;4X2No1qF8Uq%eqkTwC%MT z@W4bd`8398QqRCbOBYcYH^3zA$QNFv1j82SGO?h8JPA#+_sFqTS2iez#6xBJ)wG13 z^j&bWjCL@W=m-+;q5q3GPF5O<4nnT)OzP);8W&iJ<`%#u?VB)2bdGpGpp3zZPX`(b3g!fcTQ_HH^%hHbn8m%Lu=9AsB?K0BL331Fst zUVip^&&g@%E16=9xn1o>a=e&pGz zn{Ul++=TJ=aZ`I4$xxeph3{oJI|-ezKnxgS$yIF>Di2zy>HI=0MtS9(eXI^8Rv&LI zNi1l~rqY5_FAN35&+kaIk9{33HY+HrI~)=SWFs7I+L{|UHf}o=%@WUMnq4js&K07j z8OU6eQMm|vwYK6k?5RTsw25dSosW;s5v9Tm5J|j>;jXM$u_)#fndUpiO*a;zesLtP zvZz45MlovUo5pYwXsfDYYj1%CideIkm8HcETQe0Md1RJ~egzx*g>5X4XfrzE%?w2} zSr#~kRN#A)dYToaHT&|dJGRUOdi7;S*3KuWwax%q z^xGS)w&=iV`rhgKW092HDA>_lDQ>kD(8TcVOZF?MbeG8>GtoWk58w`BBdU6vRPffm zFeQ;q&{iDjRrY{yCaYc-31ak{oR8|gP03$CiV(Y&n7_IqTL4Hh+;uRM8RIZSAaT2( z6vQC%>Knx(LdE~Y+SkCxm$L(K=Zg_k`V!KP|_u%q$f2VbcCWvz>Bv5s=P1U~6s z29Te;Z23t&n)&O=xal_PpMX6tRH2U&OABN%Q2xw?7B4@FF~E`E4J*Zx*S77Kk3^#` zFYMm`UPvQa61|D134XT@V-er1M7UQS8n*?ZlO&;%Q9orGvaO;1m}}=X6gVYg$TkeS zYwNRHZgeS-YhMZ1)7MH{dYauWOsDHUJ_Tu8yX?X`l2cK zu!pa&OmaB_IP#)dTUsmYGBez+^VY(C_a3>B0_XF=@5Rc2;Sp{OZ9x-gs4r>Q9wI&i zv8OI%bDp05hwQ7^wA{!*ISCMcQ9XJkSEO;$;C<0%wF*F+i{Vm>K zXXVftW_ip(F{IE2;IM~~Vcg`iQl(}c3)f-ncph{!8h2lnM{`D};mzd;b%s_NgRRgpF#v!~4P zR?qvKv^{_D0Gj4d1T>6f+=F*{oS>Gkv!a#Uj#e2A%n^)he0NIzgqczfNQmy+*Q}|gldMWyVzSy11fukn)*}JHOm}4-e^CcT9u}fLE0uI zsG$b<8`i~@aU+zKD0qea_tJATqp@IT*A5e4NDLMo6vB8Y4};xOJYyEqL>Aqy&VywYU?I|s&~%} zeUOuqW01CGF4kj6K4mV7_R@*>9{0}DBYhvkEMb@+Brr=$GYI#rBVR4OR$&{?eCB-) ztBN1-MX6Px)ZK2JRqe^_)EP-~31dVDhbY8IH$SJrxxIqCzpvBZ(LI00Jw~hXuGYK8-aC=dQ&kE_`?lNqvF%5xgyLRs$i)lAkI@$JegCj}N z?HLi+2XU(p_7+2!52haP2f==7F$1Rlv-;6zY(|+gk)^HlH(UYx?;;0#;pqbRmx4+h z=m;kS1oI7ya*cYJnI2o+1TMErc1x7xdwVDx^UN^_%f;=`xxN{}%Ed;ODv07-Acn~Y z6#X)CEfATHTQlX4?}L+g>4Y*jC0ZU_@5HYXQy^~HNOe^>u1$fWYIsw*(xI!7n%+Y) zU(M_2kTnU`z26Y6!dgWZ9-35mex{OfNG z1WxKqF$LV24X;i(*aul&{l3kwUWLZ94+C27Vf<5j^ZD)iT{cKtP-H-=$P6ejt!GfD zHee$h!B)b)^Ve>~kF!E8OGW(Gw}{}Sgn@FUv|cqB+;LEKSR^awNTWBIOU-RSi<3jn zevWY_%}hMEb04j4{S;txD;{T?y@ zd*k&F9R05Z&GtVm;rVBR{^NyzHeP>sTYsnNe;BX-CPDwM3;sdSf3E)g%dHp{N!wjk z1h9t`-EB=`z415?^{=wyE`fz+5yA^Rcg}GnoTSw8Kolf)zTO--FNDH6deHbn6ELBh zUbi{R0W0{LWqJJ(Xh8%0c^0-8i)^m&!xr60NMy?f?anHvFO>u&?oCGDX0}S!LZiUA zwL~ajUle*Tf*(oWOy5V>1&w4C6{dh>>GFg2!X2wO>|u*;l;yI}p4?&c=b%6@;R%L- z>xG3qJH{^7j5%};K)!nPUzp{#gapgO+Fcybm@5te6I@LYV?a7vl~~5BDO!ct%q~O(wYI~rFJ3i*?eS^K1QdB zqZ{I%EDy__0X?UeT5mfSs!zud-_K_dT%=gXuk?e_C%~cC)ZvyAl~Z7^TCXw}3I@Hu zot-^2)w0;Qn)y}I4(;JB6FyI|{FH=8OdIsspfg?k)5=a=cOmFRgVs~Y4wIDG4OK%N z!9xexqua6@B3eOnh(eq~#bKG*5pqbZT7vsC+e$h-L~}qbUeCLyr&vJMN4q5LQ%}EY z32BOJ^vOy+HxSD;Q-!X@s+}v$Y0rg-*eSO8wh<;>!%XL10>9@>i8jCLyS)%bt$Wy) zgcUl!3YWH(NudMjqIf|6f-+h2t1ng}YId{f0(V{?A1Nr}7ng%Fh%pvBj~&w&v!%_8U1*u#8e3kt_>>tFb=jS&A>#y<5SXA zQ3?;!i}ZB85Ku}TZMCfJXxLM5?Q(dxp^xDl3j;*Kk z*xKS%ZBSjr%wOsd#fdF@k?j+Z^=3ujZ@$bw=QL0Z^bCKQ4s5@R|NrL8{4;p`+u|YH zpF4T~d(+{M7yjnU{Kwz;UltGl%TnP#x)cAs@00!a82NvGpX`4Q+y2$}*^VKZxZ2=O zDysW}{(xp~BAmD-X=QWKckN;|m{>*@+5lb#QNC0F5dzY#{3*it-u|vbKoRcZJ>=v@ za0U2u)s7!#^Ytk&U2EOQ9;r{>zLu-Q&G~a@|CG2@UO&$GTe4mmn9| z_4S@qW_ErUZnI)r%G55a?1SSjMitL`p8IA>T?fab*N^RT!}}At*NsN|SY)xtchtky z2Un_=sgZs?q;#Yk7hu2N+jNXWhakX3#IFG7c&9$o2BfH#CYCc_G8guO55ovGHFAeQ z8p@!O^%O#Fs{UBIYU9WDca9i`ZX9b>@OG8$6J#6awjb`fw&N8u&fW%Q<25wy*O&TO ztPRU=SKBl&L>o20GkJE-J2WabHwr*g9krrhA2ne9M;`z%?5kHNgM}KUN@B+^Bbf{8 zEnnmNh%-RJ=)&O5bq9$ehqAD<#z_1zi0XeDK?;iowx7p1I8FMFcW%_R%*r(IxF$J@ zz`g_*$z$XUEF^amMbCZ3((AgL_~tY5nNL&t z9xlXB&h^2!RACssHN%ql1Jr3152Y#bdg41{4HEFkPgm`GzYN$IIsw&exXX1W#vfe% zIXriT8=E(Gkm%9o?xeIR2t4v=DE2NQ-`KR1D#aWri4x@!OI+u7{e}E2oodgsrn(ZO z+T2{w<|P3t0YSb$6c3{S@{-}dvp}l4#xyd-ylDZ5&|;(|L_r=SD|Nmv1w?<*O$QzD zAtn5rP(YaMXph^6zvLU~VlJFzk>z#U^}kkc>JKBBCeRzw?)?m{VAT1T1V5*}C^=M% z5n0JnN`ODZ`h&kSmtLOf{OX{)qr?pgc@|CQ(?eldgIvG72Dy_wtD7s z+iBArTNrSYU100?G|-EBWbbvur>?4w-xew1CplicMJ%eGgl9<4vcZ}35;-1gZl+&h zPS01Cmm}8f=3+kT*!0uZMvnUl5RhU8@>)r5(QT~~MtHpz6O>7!lzq-^&9KDt=?Q(1 zAl!fQLBJ&v*cE6Z0lATJODyMZZ!$@<_cDUWGXgCft?K`|2$MRb^0gr?zMOn)EBxp3 zyr4OT8plU|aAHx98xo2%gR>Z3(5!}yU)lD=<&48xz`g6FvhVjg^+#({O*@RD*4aY? zjZ#DU=#c#u{P^?aeeBM{^^2Eb$U0PGJvY z=LvLa?Y(AlIyorXvSq9^G<5}^H~=vJMpvrOC04MlW<>XW1~(XoWXdCaVJ8ym7_nbwUE`0 z%vn|KgcnCGwD9P;M3(M-I{sW@FZMeuutb%|><=LTT|C47o^Zh{#Y1X(@J#$i?PP-;s(I94 z<&&1*Qc#uN<(a&fC}77G>D(Tmy$F@f)R(_oeF+3B&+DDynn)cLIbfW`Im3X=bG+yx z8jqX_A2I*Q@Zz(OYZ%Fjw-sJ3OZd<`{<4A$Y=aZu8TzU^`doEUqqh|6YS@`%lM$k(f}kv?R@z1nFubj0X!M;gG4k6 z4F|EMgv7SlBf_s2S{AV`nNor{|FlZTfFRg8XeV$efK@)C7RdU9?#f>xqG|Yz0tGL& zuy-)Iwx{BeXRdfhgLUj2M|qh~n;3`2kx4m}?j7rgQB-S-iyW3@f1IB|0zjIY0)dkg zWoRC;On?D4qO3YVy$wpp_DQ2_OUXl_>%-&zAnVa&L)pEmNl`O_WP_nMfcT3aC{xb_ zMrg2XF^D$&-WDVmyPxM(Qv%^0I>1k?RRJp5SY{BumE6=tWB|u`d6i>^Bi*r4dOpMQEPvB(ib|Pjm%`cUfSH6Z_2UmqbM0>BTY&RZ$rS!YqL``Xl`Q`nC z;J@l0Hn?ca6g|lF$dOa@K3C!ETbs}jY~`%xT1lQ_IBQ@^*O0f51mbXV1mko89}84Dl*_aGhwZ!T4XbL631xin9&{CF^0MIK7yTa2YeHj`we>e}=+Zm98Z) z1VtfzC@GVit z%lC7qGgL>e^`9y24>0*d*fKD){39O1_IoDw?=dW&?-&^B;6S=>)<@`Op{m*N0*niJH|AXZIoPYf*lJizyjag+!0=-G8JZc=` zZ_F>HIp?P4Or4N|Pq3mtwyIiM2iM56mXXX8O}=CAjPLfkT!^kQqKX7dV^*g|i&XAc zEZ714*5UEJzk1tGxAR7Q8XcRgz-th?mwQ{SUG10NBiqQ`{6bRFrC2&K$A!muzrczL zvy=utH}KF$`0(I@NV`}uaUzFxWJtpUUQ;O5H$l zvBY9h3*lXP$vlb5dP$}2>XkNUX}!6}n7kSglSI1FZl@62lxebJbLnE{sW@V=T}){sJ>E;H3Th^--8)Ht^KuoNsM zIQ)_)wPtTaM5G~l!A)~X)oq8anAcdMo+22ev2Otv^VQ8rAAqg;$9Uj8W26c)q|LTM z4+e|h-1Um3a)I%X(o0(FBOPT{dHK{O^*YP6dKT9mBbjT2A@Op!H|4@$VsQDllH{_6 z*K#Wo$HJU;CU&I)Ynqw;20Ca`W@nr9!1mn4^)Z}sc{8l-6?_m4@T!PD?R?r24(5j` zjP;U(Az)KiAWJw(UnQ>Xd!rVfRK}0un6J6%P#Jz@xP#Wc=Z51AIwuC9U>JbsJaoB< zb988c({q|_TR}(qVOGQsSorZCW%83RNI*DnG+X0-uD@XTO54T+%c5%1zda2 zQV&R=U)n;51jI$y#WW{CKn|*&j6_js^kjDtbpiV)wqx-=Fp*EoTA4JfpzN?jzcD0TWzwF3nu~iM-HaX(}UQlPrv04Lc|` zSl$N&A-ISmnavKfoq>dEmqwHh@<7u#j{tS8y2=*ga;cUO;b+KKB(auO zfzA2o0OHAL_0&8o!@OM55q0%*t4hW7%p9Td?4ecuG=CpB7!RabAa5oBzk%Cl#v3h1 zVkl4VZA?WU1fM?@1Q}rd@GfYp#TKd8F{{DZ{mrqbqreiNf@c|4ioGCsqj60I)h@*I z%_nLknlios-`#t(N6?kd9uK~s$Vs(oS^)RgHFw^p>iHJ!8`SqP$>#+9k!OzM_ALge zkts?W)&ZBay-<9bCg(+b`Ufz=buw7ihK;0}dLEpz)l__$Z)Nq;WVfr~VWXIuzNWYt z^v=`3ue%WbPFnzXSe3MSU^Db!Vr|hD*9KLmTIjaGnKiU-&lL1ulw&SA*IBBUPoV;^ zsnh|at=+Zt8_bk7!+z8cHCtP;P=gq@a3g~_0pO1V-i1=C57)w!3j$$C5fma+^<2+P z)qXenVTz&G+edSazRpo6oG&-@>wf$gTZ`w6h6_Nit7kk_Md2mZV5A+Y1UK*Sx9oDD z^!C(fX;=+P!rmcI>5yn($40Pf9mu0qGd%w5BO3)*wqY@VA1O6rIFV{<#D{~3;vc(Y zALYAGA0LP4&|WoeBm*O<`F`b5ax+p}A=81vQ9B!gIW;Z&S#73m2pMH;=NsKDuK2@3 zt>Lq@+suqE3OMKa-TTRQVOw9O?3La^*DS#hWrK8JFC|7h<-kwmuVZc9PWYOA(N095 z%|$UW4L(QO%ctD$uLp;r>hQ!ML!RMVD4JbYx5}$=jW<@IZ!SSQ9QhEArI~#7x%epZXNVuwyI--m^GLpxCahT6&Q0=k+F&B z$0e;~9pf9Tm18PzRTY@Q@{qWC)bY5&k$}Ekub(}s>-KNx+JwY#=&-IeL4OXM>j`mI zC$B6hvV$8Y{j>~AKq#p&l_|N=!e^g`&hN9v+nzn=CXIN$qK80YSu3^D&)WZn*ibV! z5>*x-1uFWoz(Av!o96^k?DKNv0$6zp?J@QRoW(xgVKg)fvdEN?bSqB?3@XFXJ#{Z; zTB5IKLw7Cid$nK_7*`YFK%)^&JO;SDy!EKJM{SPtGOg2OBS5;CO3C9-^ehw4hSg(x z497D~@9q_LND~>>_H#8PK$bSd>yc%S(lkz_PlM1yeY3?YE>P`;7Iv2g03%pZgvGTY zXzqB}Mr=ogoSTXA1#Vx!9eoG1zTGb%w8=$h{4Fk$RkWaFGY?(l>Ui@gJRo8a=7nc4 zQ+43uu~ZsA9Ch;U86Qg_5bC z?V)V(f?U@w^So6ZJgILH%=bLZehfgmeL#Iylz#m~E+q0c^>_+!IHQ9{i3?JU{AH0j zcU45GaAg>Xb8R6G9T8h>AANUNkcbl2F+DB;Fvn2(-}p(UW2RAeN}8Du{WB?0VE}1_ z(^xj3!vU66q8|hVNFHpq^+I^qwRKHaMWB;w-=?mmCrguQ(aJ-K*_`RYp1{>lZl|c6 z*E-*(zsIO=3^PuBenM9u`VmHT)zh5A-J^{rZL=?v!Qf=jZDUJ)geZ$-x~jmune5lP z0x{t6??%!aR6j5`742&Qnz^9K0m{mOVugeNv)iyts(zV6&u>1UPC|xqQX@;RWF{kY z2WP&5Lh;^^&KCfxh!TjgSmkHt_kClSA-N`m4~Q{hpL8(EA)GqE=uz#K@WN~iOb&wO z+2+Oq=VCj}dLqUNUQQ+R$!G^qVpKrdZuBI73sRGx3sw>s&|y(jwbu#W2XcWdNgXItno_{5@#~wFm3I=1ARfnh9OJ_dXfjs(>e2uf;rXI z-hOeAMRE8PT__p{Ms}{SJC{vxWkQa(gHpT@9^)H}A~ABQq`_l^@P@4mlOun?_^}i& zGC?mjhy;_Wmmmazo9S6XkUOc_kb?EDI;~$Aks>QVzCFv85(inFQ}_?2Y<#n?>O@`4 zs<`$K#vLQ3x%8a2sqT{rXU0-buCtA3)-_LXmM>sEM2k zbKoAy_+3}F`+;U|MA+XJCJ)@tMC19qfWy5YztL?Hmrf@)66+LxXz+HPkD-s(iF_|5 zbKX-=dBebcc7c_0mOhGRk5S)vv?cR43 z#pXror1jc^cYLSecFdIZ*7UmUupc`6O};}gQ{wfh+=9>RHV=jnG3M!5qM1{QZXS)u znk*acc$wx0DG@&D8$-?y{e26TSvAtpBkIvh6dm46S$Hr?WLBJQs~ww@!P{sx`t4Zt z_wmH;$x>faIQZSj9RnnfCwSArrH2b-J<e7fkM<* zqFJaOE6m_y)G1nIzQV+;l(qKG*LxKV@29(y^LNV&!NZI9!B$Mg=s@G|B8jaZWM&jO zPkRyO+boTpclATKMwsYrHVVF&Ps2i4iw}Mp+^iSa-(;)K>gUSL+|#sU+2t=`0h!Lc z+nx$v!0OEi=j68H3m%;(lJP5&PYJSc4C-Z4jxsIwD{RnD)uhBQ+EnHX8Tu#QeOyP& z@xkH~FJ}iedCgFOZ|$t~U6F=j7fSl;DkveQ>ULuh!rdsmr;PhE=27T)(MjMiS+Ut| zS*S(s#0KCN8%Jofaak=)+XRUVZpeJqlLa#Y_`il*wI^0ak@2w{?knt{Awk*`g7{dBjqbd9E z`S$-t#{KE6|7Nzc|JjlKzn5`;yztLv`)_jX?oJY2KsU#AIObiDHr_Ot-qN(+3-OSf-F{hjHIh~D;M8f9#J$|n2 z#GyD22fJhBO*h+gO(QkVc-9pPe7djlKJ{Gf99O+z8at%s!oi;VM|m=6>!m%sq2kn=aHA}YcM*X z;e)r?&7aML=cAPozN>N~&{fPuy192yqi_v0hkf^5)Oh9`oOd zN#VM^zdX{TXan*vrquTUs!$oFPv@aNB&rNCX%I#yis5FXhC9ym@w|N|lsWBYo&g$4 z!e`2rnWl|Zz_f~?J{1_J^rVaA6r*dXtGM}Xa@?>F3aAluL zm~Vux(_Kk&f@Wv!STK zl3DVgV;ZqIHD-xz8WEN#+Y&tg=5efiABi!P0^MlbUsFUZTw) z;LSCo`L|{%t&-_>$`KYbG-|W^K}tXGdSDnCJucy1khCtQL?^2&$w9Er9b*3B;ZsYd z9PL@C3Z_xUPJC2}2|y7uUA-GqP+ER!dUAts{X@mw&Vh7sV;zGSyIp6C%9fZ)dUURF z8oc4M!t+$`i!HgWgkAP~-guxc99aacT}TbrspKu<7y35PTaH)2-G08`AHC zd8Q|ru{hy|)llBm08CxHU^pv8Oq1k8|bM}^0zqu+XS z4N6&R&gPUM;hiT}dJV@mIE_Jnr_2&&BJIv7@=~^BVylk)*xwT7!hKJp;ZGv(2Q}0Qtn<4Jk+u z{7_BO?I)*XJtyeFR&Y}l=>>6i(+HoBi}wHrn#uzgm2c^5liCT5UUVo@EgBuL$V2=m zdQk#u=2|`y#sN%}tF}YVX_D=rA$Uy*L@dY33l0ZtY9WUzpLH@r(M3ik;vC+{+58E^ zFba0^_OKC@@cQ3ji5p3MF|xk3Nda*o>)BvyilCSPc@h{BM~{B zSO1I#Hu1s-hDs@hE<*+9O+-ocP)g3lO-&z%bwEOnG3c5SXm3y830FQium=5!$ArFg zSPc~FHcYGyyijXda;yZ8vebw0($%f1;mCXf;vhm}DEj0#=ADH9V-bu=D@?5~1)c6# zx_np&c(>T+q9lMDk344H`YXcI!58;g_5e6kh$cmvfFlN=@3%*Sk#o0XF(5S11I$6* zkd5?N24s~8W0+SQQ#@rQhsX$lI$%(630v+~&#Ez?j$@_=RX6TdL_dc@+NaNmZ4Iva zU)wP|=tDR};B+Q**+avKL%X)g`aS#+lr2+x2O;~Kr&h+yv7wR;!t3*|_2_i}4{7fd zU}>AB3x;hw!?tZ38MbZPMuu(M8Mf`nux(qDUHzY`Jk!-vHQhJ+Vqbih?|Qfvt}HXM zZ$z}cO_k;4q>~>*`cFtpoGb88X1HQDag}Wh-YKz2%U$kRe{;X6Q? z3}c%+>#OV}%DPdE3Re9dxmKHzu-p~oLjYw;2w`@DNQNdexwub1rMJeesLA;4*`s%} zKsLn!%a#Kfc$839dMF)~uWPD|2;egO$QjZvb?y<6!}}A;NX>-GI1gbmVGp094Cmd% zF3Dl6+6WC)Wk%K1l7O>te#cYW_k)pw+f^Q3?k&rPoz-noL z-SY>Oc}z_13|)uIH88Zu)=BR0!wM*4A%W$>mT)x*hoV>pc?`r|4-=g@#&G?7r!aH_Mw#3WD{M%dmo2#t~ zLZ%Yi2{Q%x*6&3T#M#IDL`Z%QjyLqT{dJW=u$n{zV5Z!8f#16#G=*cC z$>q4UqFPq@O4gio3NNmi;ViUZRgTtWYanByod!eYyn>9rx1zs6JvnO^Ar(Pd_pp-> zzwVXeu>XW}k~MSweBiJ!AGz57Lo!xzwoc#Tk+vwK$d!U625jJ7yASUZ0%Zg^QlMur z1^Bo~FFCo|h%bOzO=|*(Z67TUx|;RAEkBkhl4Js0Fc2;=0RZn>3U(rZ=|bFsy>1B> zc8!PCgK1)wM`D3H7QHJ z(2q(V1?JJkYu-_mh-O2z^ZVM12$a{-i9ZE?7_L#3zwPO>|K3xNh^v3Ba#r1;xPtk% z)GROk>^C9s>t;z7XjNi@yoAmu@T+3NdU}ukbkI+m3bO>L28o}qHnC+L0z8xU#jP3b zL1lbYqn0)k1=t~$d{O30{=z7-2UPGcWg>maMC%|Ax1Qj)Ch{pibot4nH<;vhA7{>I zhZ@|Ez3d<^z(XNAbJ(X%9HGBa3EnCD?}Wi7XPpM??SX_(`58~J=jM~H7z~D`q#^K* zO|TK@PfOw(fRT%4cgbJ{D61N1gt-%yn(XhkcS`UAQV!Ft$nEIyd&NMYL$08>;Wnpo2J z0Yu!b^{P z3Dx?`8~aVQ82{11XaBR6{@+lozwEI;sn%anr~d`j`ZKKg56A2es`dL~<6q|4zele) z{#k*F~*_2Myw=(JT!qyd}0%x|k;kD(X`x&EX_AI~x0!ayGj@Pw3Pi;z>hE&2=`X zgkOGr7(T7>$|PiUUg+LYZ@`{T@p~%RZ(b$x^RZaS${gPe}a{F?^H4zHhhBBwSXh)^Xr@%iqUU4hmdaj z=-&K<0{4=+>%S2{dhd}*1g14nqvRrOuV`qnac6vtZuH2(bM^HD5as4JIIe{tKe6=A z#kKojq|?=DIFCI9wNAu+tQjT_2gyi6{pJU-me$bY1rZd0P-==^Ih(LOwI1Jr_}t!b zB@254lZeAeH?CEcFfVd92_rZ~+0l|g4Lgi|tWsOhLSV1{^zAZhp^%HSj*lLy$?Uj? z_}!2o-HzL1>YDLPm=<{`g~>o}W?0*_cqMI~SIVVX{chXIk`5-XrK=HYnt>q%1^$o* zhwzcrWwq)VfWKYez+g_0|KpSnhs(T@!~w&(p1gPKM$Q*HLXdsl9|3|%R~{ab5SC=| zm~4C|_u<^02bkC^y@YJG>{m=yp5X_-RbnV%4S!sOP{Pr`7+->6{x;plv6A*x$XZaw zLN4*Cs3lOmbAbuTo8Sb{qm&#c$ngqeF(_9H6Vw!#Yt3+)m?&4quLL=!k*xcGUn+wA zVnbP@!fm0Q=3I(Nr*_nLKb?m0`4NRmOgfWi^ZUq2R{O9uy~QhlR!?K`BJm66nJ^C| zNEycx7RDjs^xX%l(LlfsV6<+~;nm~jZ?jX^~zQa5R5(jpYv#hqK`GTOzh|NYwsJl98D za)J31*NdJ4GAhTY2oJ>|^IgRvJ{g;XXfDnJz#{R*9)#c*$h4@y8rBjIgMd-g1|GTJ z@`I%5K=o3HF~}X8$x=nslEC-0!OB$;V;_O(*a+Rh5mYPiI#oK+^Qdhy|2M9uOqGoR zB@Hf^+*M@wR78xRXY`GPoICs3f{CtZlyiq!7XI+IU61@~iuN3U1+AI+PsmaWUXtrSC?YSkvM`$~wsN@q?c-x)u;s-g*W#=Em~NNbTa{qrkjAEE^G@=6sp4ka+W*>qZuDrrq@oJvyMt zK_nX=N<1Ai@n1WQo{)k0gAfW8pH`xI%;4eaz60iq9N7Cs!^RcXEzp)nuf`ekzDSL; zFZF2)RVP`^RVHC;!G(-gwD6h00hD}sGSL!K|0-deARbmXNB;#^XuHqRV2~8m5w1ww zs*8C-Hsr?>A1bvxP86XG%Y>D08n5D zbdIWk1H{cT#;0n^1 zW}KN{7Uhy`PfgJ1+NJK{rq*3w<^nVy9u}^Ml00 z+0i8Xr`1sW)zz9%&gD+g6Va-6CvfBdHf=mENAEWF*?|*;+&TCMR{~v*Z4nE6&4S5M z1Ixh5HiN0Dr~1&<%>ced97!yV4$kL-5QZ4%(bA=7C)}N8Qj9hoh#0G}kA)YK@6v1B zEM+$W&2wzbe2MM*Jtlkafd7+gO;Q(t@`xcAn3%2H!Pw89*q|p9{Yw3zu^u&KGO%rd zNiTiz&5%(K?)_>ijR)M>+|C7me=UrWk3k@OZ1Gzt2)R7BzALwcrfyil=^^rd1QnAr z6dr7Q`{5BXPJm^=xs5e^l+3&X9mx@5RczIXI;(Ea4!O-NjmB}Um1!mrj7+Nf@PeOA}HU-A-AV~tTlzKCY4D_Mk0dzTlHWYCJWP4o1 z8Uicx-7KiC;U}`b8fZ>h7~z@xo*5(wfp{von70`vpCtYVC}kT(x5R#rOC%40I%BZV zP<~P9YKvEHM%rDG7A$JeJHt;3W3CX+bE^t>@jhRnM}rE2dRuo&;LO&VqvP4ix1Z}x zE-kc!sfQy9g%lm2tYWimU|VASKbPL;cm$_8c(iY{hiVMG-hQGpovKoc*Sf{o1=P-j zkZ{6OxGRR>fRPiM``sjLE?_)6SzZV^hN^AL5Zy4)mI=AUi5r0~IB18-V2%P}`$f&d zF9j1BtwN&bCyev~+V~rCk(-i}fu^tRr|o_Rkx$^6z76m>+&ZZnRv!E|m`GA-Efw|B zfV9_taG)-e5=x_sCn$-)yKZ3m!rH6AOh*Z+-g%NLT+Z^->!8$?hnpNElP;#alV9!E zm`ptoy4LB&n-B-tD@sp8-7iu6LlLRb;t1A{%EOlOeG*7jkzCwLWEhNvkys-lHOHcS z#9cL_`0HQ@TRKqCpnZpzdw@Lw;{>C8`M# zbn~V<8h+4Zv(!p6O`^%{Wk~$ati+T0;N}Fgw-MRgr~8bcnsn1!17G@dat?)0# z)2%|-Qp>3E7$i(P>H;KM7VX&Rt2Ev7I}pAj2cTn_(Bv0s%qX62rE_!Fi2Y`s->pAYn+tD6^m(Iqhi494o@#P=NIp9Ir-mNWt0shivH~z1H{BPLwZxJ?DR{Fn3*f{=mH|uXm_y3Ntas2=6W^w$d zee%b4gTH;@|HJ$EH$nKHbmISb92|cUh=1*INc=uZfeQAz+?{Wu0H}?6Q6H-WG=Y!o zGYV`*L_Yg9=X&pnv>q>Efe`z z#^wFFwsyq#mB}Wj{<>OxF|R47v<3GiTY1^&K2@>*de&>b6cFu^`;&{Q@p=rKd znXEWHE`0to3lNf0wpDR4NdCNDg z;!$**ioVwJGg8l%jlE{$uiN*nW?ujyI`tb3<$;ty>~u(U74!Nd(F?K;G{@K1oW?P; z4Jn;I-3N_EE{#{w3033W;z5@=bl$ur=u^0vpcCc)6wY>{L1Jc#iju)iB^lt zGl=6)FBGpup!t%!TvJS~!C4S{W~zRf{1AVxzUcyzKGjBmR1GkiwbT_l3{zuKv6K~( zBYpcPbs$z+da_X1tPIWq6C`Jyz=Mg^$C1Q*%bvu*Pi8Cz*yneU;axzHA#jAaEBG^Q z(@A%<(_Oc<5R&XW*bC-sEtF);g|?qq#+=vmz!mzXqJy>^J4y|&F)sXd?rWsJIWru* z_op?zGPgjfM+XtuuD|dj2sZ2IO*;Z7;EzCQjDIdEf(0VY0?P7D0JxtaJ9mo{mnKoySLOl>}GTz}{NUkZs>k{Kffq!FX2a+br z>3;Pl={{$ zEer3}dV2uhle1#Bmg`kZzQ}cQj?PwRt*Q8-8$U{=vS&!ss>&j`2^L8g(b7y*~>|8;{D=M@) z!_;?&ZCvf^gKwaDTzQ&`f+(J%G2?gk8k;|OudyX(W_YN$u)obEC1tT&^iVQ0^1x`A zIKo*P#H~*gaUq-z@Xos-AWWn5uP}yi*LD*Aq_r4%d9hTlmy`;tRZb`U@|c==MFYX) zs%?OFeO{fk>D1L$Sl(DnjD1Uz2P|EIFP<-$7t}LqY!A1Q-is7ET0%AdsnBvLph(;( z4bHu@j;{(FB%1sC#6ZVI&j);ZLbvI?WH*X?B;m`LtMHIOjqihbBRPKTBtpcsRVJJM zzz4nT9hn8x8ny%qkj!gpt)yU6{rV)0?y|vn55JNdnBVi_8NetZF@B^1dh?Pc`5Fd? z0?M(G65Sy155;2Owr67b#0g@v?gL6FXbSP2!YDnbtw-p~n<3cH1<;SWqvbcJ24iC7^ar2&OQ;5CHd$eZE>u zZopqiTKOy<03tls+IG-j<3|ON^CP@X2R57Y=G`)J- z*NYwM<&T9xfS+qSvG*J7;ICFsv_f0%b;IAFhc3?#ym>I+8CS#FP6RY4jJI@^YXimp z)>J161@E8NFvgLN0~5>FsRb=KubDl=_6;;iBVbG}V#XDEv>##4!gceRKEQU){cPTS zP1qI0q89YHrU~F%98FJ3H*M0seCQ!f3)Mj2Yk)!=Oa)v5yAU7u? ziR)BaZM0ra|FK_?S&N(uc_r-_x4ra2ylUeDpyXg&wFZfo>Nj-2?xfPpau<`)LJb z@1DRX2ANd>%q(w+fGMgkfW}L2kfP+e1-BFh!4-uZ%F1}1DqSiX>?Q7Q09mMb}Ss3BY|^^|v3&{emIJq$4vWY}_o;CqfilMWzJCB}YH zRVBd|oS8i@IAUbvxtkw`bi9U;J;UmJE{uBYRmA5C!{hYYRV*aW5aB6RIAXu<5zX1s(!#ZnUYVhkOzKm=UvoBwL923-ejBf0VuyfwMat$E z^KClkiS~ho6w~g*@?{N&CUbk+sBg@979dR$dtc}d2LsU*n1x)GoFWIn+U2md4s>VP3?m7~W!QeO53z@w@9{nQe6c6I zeS|99`e!LN#LlDmft%{=Gzn2B(d>G7*{RoI5An|a4X@;*!aL*;_Xsm@UKeNHYPS45 z6M%NA-~#F1zGyi%>T|VtN*!J0k-|Mjozy z>hk!W6rh5S6DVWOE&At+lrX@)G6?-!sn)wAVl`P6p*gCt6WTTN9#{~8;}30p0B5=7 z@u4Trdl2pIx8&sPj8{y;;Ng1y*1bFSA3nmpM)D>*OUStm4z6MGV13M5*iA5Ic|7w{ zBu`7y3fp&-yt=VsWKyymz7EA>(QmJqrXG!>E*A{MaNRsgP1c&%?F zUr>GTnJXTCsv=m!%2WWDroLQla5Yjk+wh%a+PzD>;hrP%PNZWZCvtus-d664uXx1~ ziSCx&*Jq9CK=Z}YiJ8tE$J#2G+YfaT)0^CG9I$cCzD#Uvp)-~#K z6gB7D3r!N&om431kC)^3w>g%nX$eWqTIJMeN$3(FYjl8Erdg|`28)#62fhvgBAltM zZnD;7^&Fv@oasrerG=`y#)fuIb$*nY2Ny2{d+E`vIjO9m7MriD|5_|_-oPIsZ)E4p zRD;p5LJXm>ym2uivbim72<8O9uA^RNxmJ`A;Fte}w3YB8`KlJ&H&7_+ox~(!9gV&q zVuOIO+b+bvoZ?gks52JV<5of)?L{toe$}}M5J5t#R*2-WRtAuJ>FtDeq3547XkY+D zxI)b~U1Q#+MuttgL93&)WRK%bi1;EZvcIxkWPqQ$6ZeEeruLK--}bU1bHCCa$VRqu zt;{dt`l#GnHfU$8uI9^HKbq?yH)_q*yCMk;e72!1t|5e32x21~A0p5jP6M?RKVRL& zi6ippG5-1B2{+Z}ay61zr}Iq>e6^f`<)WSgn1H>lEzxbGZ%G!-TBgH@rxuIGUuFxY zBp|eYn*cCp;=!LwMq-g%U2$r=T@ZvY>@#;)fW)63T*_gUXiS2J`RS9Wt&LOJl<6{9 zN#Tj)h#6N{Yu*`x%MBP!qj&@rT|8>coBBY1$E+&U%9$@}-EMck6|7IiyES@Z)ZTma z&Yj(gNa87TVu{M^drvaN!J-GQx5d8Q61o@xrL?I;Ffm*RxCgVBET97W7Uz%U(*TG! z{_wJo7JBn+dz!(gT?r8p6&Z4IjWm~P7*N}9@?Ts^j*|;)hhTJG_bRHohegSDMrar{ zFs`6c0g>p|-4zuvR=|XG#$~6=3_WlZG0y!XkyeFHFR~i60&37P;J@>%EEcjjR=f$R zg1>r~zhVnLugJwz=sleYb*QS^pwz?ey28W&ap*!OCkO44(-XH_8h6)v?OB$Nl0u9{QV*?IHpzCGIscX($GNZ@Lc7N*pJMs zVf^ebey~wpOUTb`lDLOJnJV^NSm(uHRZ5ntk@2fyZQZ@GNjU7R8k@N9(Na{(%v;wl-bMi`&@yk2X8&5r#?2p2zBH zTrPzu_M%Yk$KuTsUf%7sdZ+~J4&1hF#$+8SdYWFpiF0xCqlb3%00C{F^0vkfj@00` zo(8(>_6k4aTrhqpxT-_Y(6B>L(;I%OH2MJCTPIEuVuh2!>*Bl-G9;F_3jXMxLjj=$ z0yDov4q{x3cZA>3w7u_HkhO(WJ5XPvZ;YuJ)>Em>4_ zFNb!;ialknjmCa{NugGiE8$urv>7KG^+I?QpQj;2v*^x$fn@172WBy3JvB7gin_#0ZF3v zqzvF$;+!oOQ*Bt`0F0>gwgOCo$%804Dhcjd-O#*8wD@^8>Ac+eq+q{i$&GW=MJke# zA)w$raGS!h=UnCwJmbqyAhwHiS4zm`TED=_wIaJoJFb7;P^E$yPpo3NqWnbe?|YJl zzb95`0tT6=1I_-V;Wf*>e<563Azvm~N?IAHBx!mo3&BnexZ8BrEwQ3yvL^s580(VS zyN1EiT&A2Z@9S$;UJLpVR*FBts8n|mbw`WOXiBF+fT68UT_2K#C~A? zExXjew=*hGT0S+JZ9(fq!#wkPnjE8dDbk8+x(PNfo414K16E0NLFd1W;s2aY{~_8j zurmF9I|wuVpSv^u=@|acS@-`E?f%OC{x8PxpMBT=`569lnE!nY|7G$2%Q37nT3^A6 zu(4HhjZ|(`oeHBdhHv%zwG+4d&~mp;`tI_Qz8^Rr*yC3EJtEjcv0||w@kF#I#iqVo zRyf&~k^nhweDccI8)xXpbTvSccwubDjT|HH*E3DMP{+6=qV^s$a-4(g$_?5Vj_=EP zy@heNlyTC6NZX1rW1O>0#yoSlYZ1b%02?LkxY;e|=D`~!mYnc;L1ZK}BCJ@+R3$?; zYNvVQLz}Vrk<&806ScYQ8193M{ZT;nlPU`PEuFuuVoOty-(4X!@!$$9Z3r|39CWy{ z+FEYJJSSH6#Ql@P8iUpBrPUB3k?5n{p0YNr9jfzjf3nUX(mX{2d0H0pYk;f;rncOw zV(czK4`4yBlmSu=SVL;>#yiG1x1xP|F(Pi1prLywCli*ZFzG&a(WG4~ha-3NM0QBm z;uKSX)mwNP|EmvS9eR{%s~fyv!AAyy(2&7#65V{CIp}_5jL2@@1o<;pH33^T=H8e7 zuEo$ieYC%ODNYzAC2x6RC&SI;vdefOD-P}KmaDDMLN+HQ)qEH&R0W5SH!~-}6zj)= z;3|-Bm=dBs|A?9_6jP_aMMFa?M<3qQ!v+h}*&4TT?LJojMtx<>)T)53ChbM5W|+>f)dWJgh?7tsu;~ zPY?=qb)H6(T65=;j9IS}n!6pCCe=;3BfSr(S*hFAQ9Z@zk_lr&E z?G*FFO{BPaEvmiLZ+)vUQ~Gv+G9!bu04m8O#%-p0lR&k*wMVT+jpKPY-4H+InOCyY zK?Y?VvvL<`ED^KqwEK}Yrmlr0qacEz?p#42;4)HV0n&*|(OcYq)T#!Kj*0XZL=HfO zJH&w#uhz%F>0sAvLqsph;89nX_=huX=j93_ur(I9-&FhA))@O&V(@Vy7voatOfo5s zmV^Mo%7Rc_#>(vq9oNy^h1PbnW$HR$DTdy6o|L=C2&~T0XKnq2UlQ=Vhlf<_fm+E- z2)-?7S}pS#H}IW}hQzr5bb>pcYlxBs>%!dbJrEU8OI`dT!XKgImWBcd$`=UH%MAOy zy5{A841gs@)hw^0!w?s98n>xQmzz5P48kW&alPRN>j-GCb$v^pJ7p1fNTDG!BEmmJ zEQAj()(-Ev?!Yx=@se*5e3Ali&K#O%DpdKd%3gI|+Qw%Py`-kOCzgH*^PT*eg8Oqw z@okYMnz{VaTKNv5z=0ccss8wdhKy%T!b3#wWEZM<`X0Fxw)9G^S4e>mUZm6M@vI$8 zslH81VRA<2yx%L(jAu*ar^{tt(@F9q;>L21Q^#u=WHkm~0Nc~h*(W56&Yfp|EA5Rd z>a>8Qf^v6Mj;;roFGDx<^%}Sc!#GoC{q{^O6fiS9bb$vHzWT)zz?rdPL;%pt;|mHv zDndfKGksB2?~?x)!6`~pV1}12G2`^8oOa%+(2s9lK+Wq-um9u`|BE$2F);k&WH>YZ zpSAh_<`MskIQ@f5G1LE*F!~=q;{W-D|LVN_BmeZril={*SN%B_{?CKX@RvpOuO9SZ z3~gKNRz&fwTdMm7MZcnXAF1PnJ!15&6@l$%J9z7FrP`7Po$cQxY*4;p{$rDLO_Ebn zKwqpYI}?t>$cqu9dXFw+*_=e$!5y`_%<1Jt`*1U#_r$nFTWX3ir~5j>lHnbjbHn_b z*}GFR8nm*^*Uj^|+_#2B;>n}MdV?YLu5`MDsO6fol1-U}V9v(kmGr55hHI@2Hkfm( zuqnQV()>)~$QO$`K?Tfe9T^i5)@g}V>&&ToVucL^5Do12;xV}fN&jaXYYeMYJXg+` zSa~-yU|&yk*QTuqEms2xWka@kqq5BfA+9mI_l@SZZXkLZU0M5nPMeVAmGzKr{ZdQh zKGbsdG~+ufWoZ)$R=Qi%vc#oYPW6z7$<30LEAa+7)^ev`YTf%tN#qWV^N5lL4&9=1VOBk@{+34Y)2Qsavw^|%~=|?;W@I3%q z7Kw<;gpq?j+(6mwIw2ih!W%hV!wWO}Nxy0{zHSqzo2s>`q&CkyOdpuFOKr&)8>>*y zq{iJLry#ZN40>%IPCh^TfhWe3DR-s;S^B941q_SL8OH2AooIv$40lpcM^sLK-(P2N z^|yW?tXe38jc2%KW46JfpWt+J3xwc$`K1gnz z&B{)tcKIov69VTAWB;x5te_|iqM+s{i#LsyZpWI(27}1Cb%$M#7wPyS%@3PFL75gk zJEs7HMp?V)6p}J=#JfYR#Zf!LX$~Mj z%d_|8z*~y&Uu6lI`1#%PzJ{Pnk390;Nl|iCg@-?Wsy^$Zk;J_<*5-p|L$EiQcG;p z;v2mzE;&82*@=w;PPNZ*+LzwB3wm$?x_*VSJBtYsk@(Hyy^*0ZAq80SO4Bqt%F4;> zNh+d+96F5-Wt{q5$zv>^rGpRWBZ3Z>wf$woib0VgD`p!Dh6UcEsAroQ|6=u8GbvWF z8|Nc1wQ==w+^-G-Vnq@=+F&d0o}ppbB@37_uz5d)|4q^Qozi^AlRT zA08(jfszQ0>PK-Syhbni>5Vb><7R$XX+3x7?EQq)(x3-@xtipidMRa>r&*jXX43=R zi3qi_1MuqlAnu_M*PLE;+S>og?> zj^$c--dR41 z5|RO&7b%25Q@9gI>%J<##TiK|K)th2H_IGccc~)v}GXoE-*{a%8Dp`3(R`RNPw>U{3T1 zn$c0j3>jmMO06pjff59kGgcK9$0?`k{Pn*FVRghWC4$6Dd>;|L0tZMJYsr`rtamq= zaE8q*FYo~tWt&M7fPma9J)5C6W^ArbdIivCsB$som;<<+-ivwxt%~7E`EIY-9&U(S z47zH$)^RxlaxD}#_q?^@^E`#WP&0cmJmu@fvkg6XYKM_$_>G-a7TST|i0{E5au)nc z7dtg6N%ClvTmSldc#^{m1iwMvcGz`Yct4=|aOEaMj3OiFP&rN~d~IsI$%%pgqhbJ} zw(T(}qC`4hRlx-4Oyn>aCED7zU-1Z{LmIaz>%=y=mS`}N4ygbxNLU~0i-{776E_4E zh`i{f-hE-Ha43*LeI(zD@p$EWFKUcI%ebqF>mQM#YzcDteIFG~T;g9Ss~t-KE{b+O z&@;gS?hcZ>pVJ2+>&SmhHeN@0>{Jl&Ko%nu#yl$a1((2Ns!Nw`-p5}0 zA}gji!Sq$wThFpPF@H;aI%)fc#389H2UppQ<2=vDLPqTPJ5T=bT1;ev+zfailAwp3 zrUQKHoia6us+DHkbGEJ_1zWC`_E-6|>V6ntVumT-$Sxb#c@%TnNZz0SKn zQQEeNAGTNKZ6Fu7kRS{08Y+bc4&jB(-|)6R;q#pzZ~iN^{~OBv4}FG_jp^@aTFb+J?{F6#?jnNH{!OCSxQwo+Ae-s7iF8$>FE)$2to zgw1VNorK#}-D|8O5+RiI%4|{2LdU;U@o^%!xvbMgazpq-4)I+If33uAo0--07bBqT zLXe8D?`abKO#TqB6c`POzO@gnVam9vkb|f46%Djl&OOKiE0|P~IBFo|Zz3E-7$1^yav+jrUZ1owx%GPZpZ_Q1G?Q)5@hyEggd>CR=Y z-22Ba&j82M|b4rYNLXqw^rAb)}LM;mok(ZLGM2KHL z^T=KRaJkgk0KYKlH;@cxfCK~8vr(pR0GCzDZDx>gFSFwG@>a1bhP02wLAaOHerB&k zu*AFiP&J(Q0D}PScy661<0QW}l%oFoZP5k*UESEqU5Z z*iIsuj=EImc3l_>$FcJ?gCVgNdp~5&tH4a8Coi`rK&an_>-!SxF<4MqRBVQ!f7oC; zY(|c9d^BPZXRC)PYB}zFV!nlLC)Ja&@lObN^;huq9iX^u;zE@co`InTlGrF@ewRme zbJ^F_2j}6q1kinwhr$H2sp4>ov)z7YZVLq)OZ(i#{Ju^pEj<*^*O5(3(?>3q>m=-c z==|}+2E9igXV0`4yHqvX1@nJAg@*y(ilkTwomOL&D2Q&g4@E)NQXW5zVl@^N*up z()p&2_*h64#Brcs7KG0#u;7|2$lEWNq_gWUh=VfydglzVLLk0*ai$W8*#D zU_Aq&XV`>2n4-x8LG3#(Ph;?LSYzc=yFLK7ZsrmbIaLj^i_*=f#`X@`$Op45K zW#Ebg^~M9AQ;Go3&~UxM*{LiamEsZ-sAaz^WTeHyr}!-z)<_Rz8t^(9h0+;9N)jZsyKm;eKG z`g2o6uf|~L7hlK^#IzyiYVwM%1Fh%vX#I(rFVe-3Xv6jxj4@os=gshYGIHu_2YL^D z*5oe54&&rSgL6vX(Nutwj3-)dB0K+hc!(=bl$mpNQP_d&qbc{YVUk$^m3^TBehJW!<-Iob@ctwqnpNmZjSpAU$OR<##q5g>#b)o9-1tsxOEX-Q;WV3 zilvP%b|I^b*UW{g5AB29{!Mi8Ie1`u47eZJ?lJLe-JU_)SOP~&fZ&Ucp#`rN!?Vz6 z*rnsy0bGupO#Ys#=8auonpo`{&lA;;D5JgOi&xn*?gteEr04kWlWJ()J&fCclFdp? zNEwJoBgFe=^}xpgG3na=0ea_VWV!&%qt9KK>|MsGHBgLG_)I_=>C?e=L7!UeUhLh{ z&|P1{eC1U|hKjasGkumgZ5r| z;{OE9{iVo4F*30K1I#h}S@itpVD4{d=>Ia?n17#crpN#N*Z&;M{p}0?Rj2(sQ2V{8 zl>wjOZ&`Qze-pI*2@n4c+WyV7e3jao!(IzwReHyy>Tz6oMC(A-LdMYJx|K{Zwe11H zD6q&Zp$oa?+2xqweZ-QPr%At= z=jnL8WkWvQ7iV{VJm}d>qr$Zho*3Zc7iM@EF{FQT4~mpp?fO}{%N+B(=91-`%-Tuy zm6LXBn)fOT=eQ;lVXu=6px@?7cg{0&_rTrf&m5b z9oNDI_UXVR+VPIGJ97TPx_eB&o=+MZE=BQ* zs*KL1P;ZSLJhswTOigAZW(-A;tGQqPG6D5db92t~idW^{I?guBhvo^WkIQ!JsRKk9 z?H+#iMyO+gcFSeTzoT}KIt>7Bri2FzKnA@;F8jHM)Um__WQNEcQrLDmQ`8E~EpT;r zbz7*dkzf=+nO@h5AabCt=8oiVXeC$F7Ik7W1QwYidRdFn)h;&5dChAb4s8{UFRWw= z@)@BYr~7Xn87UVeL8(EsKW;LY9zJJWpJxts9UyFa zGvk7i5Ss8IIr3-`NvvazXF)`Z-nbNJ4@B!}<928o0cHeWAZm$H0r2|SCk$-{EKG62J_0;^h>F7RJ zWCWugarG0lC1r555nX)BwSd;Et^%YgMlfv>3OLKZgv#2z|C!@Lroz}Vy=Kea470H? z)&v9?15UR`+%gIXi@(}RRD?%@*v-bFP}sv!*j_vlV`Z<{b~|el zX?G8cvw7_T;flV|3#TsOy}Ha5^fAR}ZUt&VW%XHC^Ua!X+W{L9@c|K$D^NR^kEB2K zmVj!>*g93e1b4ho_Dm_U$7h_9CP#CFHp*8#teOMd(|RtN*4dIVpQ{r=A7wOdP$gLp zN9R}1%&x;89=Sbu;8Y4Pm7~-vy;^|!n$!THJ2|)oRFz(oLGj{7 zgRjeGlUeskJXa%OxbaBu@>IkG@4>oGZo8jsPWuMhNTk~oO)%tRQ6USf{=JUu;Hy7W*5}3p4{Hw^lEgleW?E#_{GFdrG7SkwlJCG2TGK zWfO?|l*YTl)`__)R|zHP7CQ|(I(H%keGnLqdcg_wOaVY>-y4KL1#)B##{P}W-aD$1 zcG3V$wZ5Jq7(V^>zF!8`oZl?NY_|QktT?xJ>T7tjmwS@z17e{mqj7FA4n3Q5`ZuCs zri#6v4Rp*-z^tdClSUJvD0JM(S{=A?an%B=Z9`%efw2mfZqH98$W_r2sTc< zHFOS8+u#_}N2_WQ357`!UU!F+TH`drgmFoBcz5jKlnoeJk&9`S`qmbVEnkY8TuRSV z+glg{l4PmQed=)lP%Bfae9H)OoBB8UCpLmx0vi}geWl-;Xv8GhT9A?cANIZko~myB zpJc8OG9^cbgdEOsj#%5(NqSw03y&#o3F zNEl5`RaQF>HCJL4BE!8InZC2f4;+I>ALzZo!7Y$~fZyxU=)ROn_6z6T4h8%qjZtP& zsW_s*eSn(g2U&1(?U3ks%C}^Lr1z5^v|V6M3|QbiAaUMMa+I?=A(e?^Vr5^h&w&Gs zhGUn!3&tK(c1mWFWTCOPQlFmrk#}>{Xo`O`NO-JH5z!-`{AO>@J5)KgNQEUQLV7j2 zk=t;%ZDwYMSkP~OwG~gCkw)q@4VS}F;pef~4nteTxGCD>1*+lX%Po>Go04343W9E2 zK2G2<)Vs#>lY5c>XYMgoc(hZh&xQKDiI^*mB5@CeWQ9kFIWDYnN#=W{$nlehTzoZT zFu_esDO@N`H}Sc{xQR$}U(6%*BAN)3C+Ae;GQVF>-^&cQRKEUrk$(R!Y>( zg<W zW%EGAb9;xn9&q2FDJeM{xO!y#N(###2w@wxGa!T^x0YxW7W#+4V`2R29Xo-?!b1Ns z;IXg}-YB;nriicMZ-ubGv%3DaLG7Ot$Phb4%T0j{@lUQH;uTx((j!Q%curt}-bhQP z5&NB5aW?VA1d$g_7fGRAkp^cb_k&A(hKdUu%l-Ql>)f?S&uEO8>}w3TNJRdIjWh8` zB2(RY<+`yUY{2|vIN1Lb)xg=)qx?Ll^oH8-zM`8Gp?r!jvg6OMxGLwi-fsM;XiQw8 zUuJO7=_8$U@6+%^wS#5Is`8N#j)Car(7;Dd0dDZRp@Bm5{lXr>c+K|38`_D%D4DMq!h%S4UrH8-@0WuHxa@YWZOoY8(O*ew^zJ^Q1> zLs^=wModH3$38PuAjM>1{_DXq&$Eu&8Y8l!PZgKB{MjBY)#r_?(XyqC&2)V%5G%g+ ziijpg>9DxS$O@WxcQ=3Ah1SGJ)hh$vp5OT_YSz*NYxJzT6(&Oevg5OD53H5_oxGRe z7mMdc)2p8qovk=EY<4qHy?<_@<)t?G;ps`g%%kJH*kwfgIS!7zQ~Fi6r>szthaa5d zjc2=}u9bX@*mh}`1+o8OtIG!tmrhGcv-_3TD3;f7NV5;prF1%521?Zskq61Wu759N z%9+YRj%hzio=t5}J@m9|$=kN4V!3dAnDX^S4c&v`NtXG8jI;S>cUXf+%q1&3TuuyY zD>#|%Mt1p)o|_X>xl7k*p6MA`dK7U-`u5;q1aFGv+ndqZ0dKqKHOU=hB>i=JvfFw) z`mSCWK30}_@$~hBPu^-DuMUb)Jnfgf*6FH&(mi~QD(-Sj%##zrKCI}wpT|uv_fnRX$+|#29}kdo%s1t$UplTbJScA;qnS7tJ*-7NtfU-0G3+X!t3faJ@^pc7cmw zDEVQadTn-)1LqKv^Ur!8Vy;@2$s!`EI9}PPB~#O?u5rc)ue!7>v9zw)Ap(Ke9P^Xay_h(K4#Pt3pQ`a zY}ifz{ceiur+Ica`*@0o)3W^1?+_3C;`!&Q?c6k&<{n4*^h5+-N^Xt#+Yb)zeS=K+ zqKIv!53w^;;E$C)+{^o`(uequg+>3t_XbzP-+pg)D1HCIZwx|oCpZV+6UZ|_0oV4j z^z?JXz~B&3hys4X9_!&LZ)bsp2~xmSEjG@D5F!+CeS2$9I}eyJV#B~MXW@atxfg_y zn57F3Bk}6*jxQ;$EW~DGm>3!fLxPP_pd%W5N_QTi^FK;p5h(;N z^JalR%JlbWxWc$Un>n^AP<>F^rfg;0CW)X$VNlwpY-K{C!LBP%<3y22ToMulk@mPh z5N|{P%nM4;UFb2QAUYjNn+<_#6$8QWqQHiL7J=wO;3xzW1^f{a2HWllqQpQ2pdj$# zh(Q&ggkeI$U_uB93IYn)aa9x)0~gRAP*?$Akog#9{Jk+J<2K1OzF#xa2`DPkUDv1q)A%I7~etdkQ= z3o79O1I|7S)*E9j0FZz%YkMnCm?g&6-UYmS;Pp{RfD zwsarTh)k94Bo7)!7_D+5q8C;#+ zygV^jn3}6K2J7;BR(%YfVUR7%%lY@a%AOWZ_ExekwoVw>F(F|AL9j;=u6OnxC+vMO z)*AMnR(3EO3nvc@jt;)yzp(-09+qqSd5D&ZfXHOZC=~h1KwgV zz8EWt?)R=%66YOmzPOkjq?q3PfEZ4`T$?k|O2$LqInN9i*(;7?IS$E;MX_a@q>k(JS^E* z>*Gsv(|~*W4XF3Z-=;7uVLsHtg?q$$BIO)7b)*-8_!M)4>p8t=sNJOy5ZAk4fYLYWSiH_Fw>?d1bkw5=No^DyfB_YedapSZTVcmda~rg071ajlw&ccIXiB=5j=VJ zBg5_KX)S}wpi>C#>#^baBp%Bh%5UBCLt%PP#muQHhVP9sDl~+973j~O!FqGkJ_z{s z$ZS{6*<=2!!r1ovj5)&J7K?{2TxU66b^cxU)2zdf<3crJZ|>%~Lq)Eq69Q{_sh%a* z5#;DxG-F#pU{C(u3SoSw`O6n~xIS$uqevmM>5*tjUEY+o;Q8i;d(68jI$f+2)bEN& zBfLH`)`tqpy7(>jYwi8O6kf&h+ON7!5~ zd=Tp3S#Y@>#q&8cw)CqJ1?%ABEJ7lQH6LNh@7W@`BG_7{+4+$4Zp_*#7Vo5F%ZFC0 z@%L!u_XdRV6YQHu1$hv*JLr)Lb1`*09Su0k9RIz7Arb4ZLp;UsFtji7Ip#9a$V)8o zrPnMU=_@a$7aOI1zqBJ9Ad4~!fU2`nUPL{z)PAPzQ<3II};y+ay z6#jPw{}Mc~a>7_(JzbqGu#S+Ru^|zFGe{y8#z_~@8A1@XZF9hh7!XxN1i3ApY?>!T zP(s_p4H=sPh1^=|6fYV>#eex#+A4#Skil)3q$CAg*24;XwqYVDAbQJNxShoRV4_8l zpfHfCp?l!Y*6-Yw*!|mYClKmAG0u8GPNaYvY*Ye7aePVWM+IgK`~7DHW(saQS=a*c z9y}X}|JoQJ+x+q?brfF#fc0R2ScSK#Y)keC(G8a1dV>3D#kKS=X&A_jXzU#t#)e!6 zb^aF(V^h3?ZH^+YEW~hQAnT%G2oQD-9Z`VOJCD%$A0;3?BXOBG3;a>0zemFr#{JpM zu}y*MgVHu-E8{i^EezBUkhUpXnIHz^E<}Kg4<#{SVO)ar3lz{DpajvOhlrp>aA~s{ zP@SSs-GCb?&_Pk4sh}V=0R>6#LNGC)o1lQ2AObW`Q9)1^YA6B?ScirR0$oBF6oQl# za1#yAQKD!V8dQV^@()l>KuK^S0#p$ZG^FX^Te$PM*)w4gAf@j}n`{vVnh%tq@!F(8 zWuSB17(%TB3lE5bqcG_CX5E`T+*v2Cx4(_=wq^ko9YNUUE4OKGLvHA_TbczBKXIPh zEzJV?i|zp$i^S_5z{d9vGz&0p|Ac0N5ElaK$NyQ)LiBfq;KaYUW+94K9R5GnEP&PK zm>76J@r>(z z_VyDia!Er^xufg+R3170taX!%n+s-WOq0_5NU(63`$yuDJI%p&Cu1*uS+)Dd`XRHt z^2+eO_m=l=U$UAU5w}hYWqOoMHrrwMT0>K4X|AQpRv$4@)PLOPX`t*aH5qG7?&zi0 z^QrH2lG67ldT6u_^`Bb7WHj8%6P9r6Zzyh|9FLOEt(7GbjPOPmS}bSp?<6>>P{+WC z^?z`s+O5>!vGhpket-6+HqUfX^0A)pe6b|0EVKOTpK641c|C7QmbW(O3D$U0(pWq+ z;+mjpyW!oFJ;>y~D}?)=MS--&;Jm1a+Inu=FzFla`+8TKgnd5G2>F*uHMTON#0j;z zxGm|JY~J{|h9reVr~6Xeoqwh!cesE`Dpizh(28kV%tQPq35U^FZseZCcS6sXzT3VO z34Ag&^bB$7v~-`i!|oA_!~s&FB6< zffFSl0TdyK6xkN^H|&`}jo9`t;A9FYE0DLgb8g_XepMqj6^@;1#P(7UzyP}~)BK0k zhz;5L@2C-?Vt=AW{QevAAJrpp!k8AtwjLoQv;n{Suh1hvG&4?!+twq1`5Z_2U5@}Z zc8I>+4q;qbydHrDGA$%`Lb9d^5c_blCy+%UDH})IaYO?NdQ%$5l>y}-`4tH?rcJqa z=Mj==A=wq8ZPo{!ivfebke~?u0Z;{-f-zJnlwq?$5Peg4#?e6|aSsF@g3U4z{hvut z{>=n+3hvIaC8=HLW?8C#D)LwY7bifGs?q;>)oMb&V2)!99Ikx37i3wH+3-BL}F$#@>4e zK^O-20Pd7O^KF3NzhekcH~7#2+XYJS&omGb@R|6F&JA8dIop>}>QBqtN+`p+ZV)2o6ipxkMQC^&Bhb6=iH+bI@ZU$&Bqe`U$Gc`NmTP2bvwKi zHbqtRqJ421MR>Zkzdk$GFr(_uT=i6{29JWyIB73CL4~pRvx=niXc2=GiB7tgEn4j6tun?3Zg-$fUEXzc z+P!ynva^V|dXi6=?P~h9q*sMMR&s8`k2fPO>>4oB48WI6KC`*N#HLW`$ZtinHh@J+E?sTt&!8tHIm$>+H??zQ>|0 zg${&Krr+B2wCc;3K07S4&z20?HUFH^v@dR!I{9SIAjh)H_Nt|GP5OcsJ^!u3 zcglW=M!2R(ZS<{3fU955oQVs%rKs};M_b=-T5`sa7FAYG}JvjLe$RR*8i%8f%c z1@^@A;_JFoyF9*zhS6V&DQGzA7&+M6v|nRQzkVN)Nzexgg6Nspy&`vYVg#zx$zPrS zk@4`XKx%05Jhyn7LVi&BWb|^L@Ke=;3I%#O)D?_#D9f>{thOWfWz}Qb9*}-$;qJUX z@nSWRFXDAfR%KD#+t3&38H2|e8Yas3Oir6 z&iV{p`f!P}GQ+cwX(8|3P^Rb2+#zl^UvbKYk1G5$5z}fHG5KFUA0deER1B6qtj@5Y z8ePAB{Id0l=Xpv!*(KUpGi)pJYZ3;e-(2GK3sjBC5bg+fo)ME3kKh-l>m&x&vwwbj zj!vB6eS58AG{eXd-a*{fANxYudhz-Dz?13yb=;RuQJLJ!x_Zk#Bxj}K(-D8`JfR;H zN0eR`$UV{_$RM(9ZcQA~OpL%>VYFTxh%Uh-@m*+ip?!7nN<{Mz;ov9M%)SzJVFM{X zUFnm1A66tP(Z0Ts%KXtV^~JG;NGV%^Q6uAh1FN|e6l1L3@N z9&SEKe8Tf>!l8}}W$E4)%HlH6mBL33lxLt#nTbml_oAF>?gf%OyO!*L9}y#ZhznBJrg; zwCDbZ4goj$6)it5hP>pE9LoFNzkh!8tdt7ujnj*z;mI^>ce&{6vn{qI-bO#^HeAWoOD^w?(QZ(#Ir6;_2yZm=*wqo zu&2AnNj^7g$tEe@xuZ=)OVt(}5YCg%+rOXtDEwsaYr;rRdq+Kv(W0(e^z?MW^ArK^ z-ERx5+oEQlKc)=HL$e9oN;G(>k}DtM5N@$FM)8<_52GcIXVe8l6APA`R^!Y^<@jrS zJ*hfn=vV#BDhXT|ZNd(cA7>I6o>%J&85=^mqExG~L9Q>gc|AEG{`Mtvj}xIkB?R-Y8~4cCMj<{y@`_`FkY!5jU9~B1cAu zu*uQNPw0YWj^+~NkP^B#qchv22i^SE^wOqs8@!&-%h#P$&+k3=JgrDJMc0_qntAci zvwl|}0fqO^FipXTb@oIj2C3_-cjBeIm(e!bUv9;Iy&n~8rS6yaX;O2!I-#@|aU{Eu z@`Gt91DC|;`;rJM+s?O(bUeL$OH^Nu-`i8Blr_;$cT97z@CQktJsH!sh-Sm?_$b4XQ6m+GhN z-;+`7K?i%T{h@dN$0!j34~_uujt`+Qv&5j#U1tiO1FCA{2u@ z-{%ZZ^UU=y;Fk~MpGg`%FIamoA)Gl@?UO;Jc5B>KCx-gW$$d(_4fm=$^!)^hO>=F^ z0_mzT`*!z?lm?P;lEIgHy^N>P{Cf&Sn4+;XCB`EprXdasck`^x&e! zATR14dvK2lK{vOZxSJM5c}RfA3+*^T9TLue zap~d|+npv5+#24SJ^N33bOC}1r~YnRVt{!9NBP|n0~``~y?D!`iz^H9-xzo)L}3V! zpd&y${c?n8f0S^jL@4uSfj`Rh_h`7nxIddYwkex+Zd0~0Zj;c!R05@K%2p;z^MwERRBsCN;r=$@Ta0cg6O!01COl;ls0+^)hPlHq-Yel4*`P#S`-D)k0QXj zfr6|bU@AlA7zj-YfkDy00wRO}xHeH>F@Vf1Xplrfw1g;NFB&**ahZXQ1`SLjV!+od z0*+|l@fE?@bMOt^Y$)!y(J{!fu`_LQ1qnI=C1|)dX;7n~bBKlmoC4hsIzmw!n{{t? zaA%#k&Te#k^ZpKt%ueIyrbPzSjGJCt4qennDDIX+7ud3P!1m&7@3O->^GEPe$Z7*H%5Y17*9FiW z$m#--z{?JI!gzRK+`RxAd6NoWIXFPAy87TT%$sCj^#m0F=>MPDYyb@Pm>3$o0YU(6 zDk6gVGn)KoBvHfY8%^^g5aAqQdXZEKd35eFnF8@Khk0&O=-Dk@oj{{)W*A| zg->%#wxa#hkPoBv59-rb%Zz=eSuY>H#5G^3o$32EJcuQrv(Al_q4n}qWa`(z)8rIo zta4R-&2P@v48gC5D3!9R9;_Wk+O0Cz@~Owu*$Va}cRd*^V5)Z*(S4jJbm`|OQs#R> z-!m;7Z@-LXRhag7&nl8y(q)J|D{$&Xg5hjN(HqM5Mf5_n$J@K*uP>*Nt2dB@Bh42H#pN#cuYJFAZ6x^xDrZE{8;#)El_&S9^mt9jX z!ki(N79Exdc$hu!NDlrb#7G*%aW|a zVC{w&kDIx2;=)S<4T)#(iw>HnhlzwG&8;%)Ux{fHW2?z@VZMP$M|>StY|MB{7-W#W$*DIvAkP54njWq=N=v5Xl%G5L*aJ~VXnOiwmCR>=WwPQ zdX8^fb$Fj;r&s;7>@6Fi)(PU+H}#=FuP4enNpzZ(ak0b?zV5=$I!P=K=_e&E`C4%`NNP z?%ufOU4H1akw5p0X!YLEFYap(#?tRK3voz@1YJlrK{#eK9D5a)Y4P2;RXFGVnz~Cs z&Bu!M{i>`awXqIg%!(p0F(mmHZK^Yz^QbHvWe(Y%!3f#D=8yN&|L{!JOo-y_lla35 z81}bkTW+T$1U4M`sl1pi)0|UlYF20;CW_&;vA1p#zoXQ3CX((8M_K!a<0{St@6o$F z8J?cgH_0~$x0La_9Djs>_f&Q62X~P!#kVprIYK6u%cVaTQ70DttMtM{Icb`E0rBXN~?YRG^@s#QHBDR6IIng5abXE2F~A*2s&N|(2-NI{MqgJ0v_ z;`2^@9&~joZ)ZpJE=pe!wsc3Ugf2^Y7qT-Y(hzS2hoY}5w1AVYlN&Tl`#+#bjP7M`==xpk! zqNqR7QGb^f{*I1{lW%@O8*d0Q+rrBZfd!ILepOw6M*#z0(YBNJKdHI`B@Cx0Y^$z7 zox)LmS6u-f7NYL}5#Y)~<6?|M&EkCa5J(mR5deR=C=|B``OzVr41i|QK>3AqGBnOl z4(Vhfz}*d8-T-DH3@L(;t_1WpqzI&^0i_OBAc#Z-pArg#-7yyHyA%z;i2T%YREDW9^4B8Fx5($DT(LnM>fqamn zjRHzJ3h2zjfZs^)wnN*rph3Sj`LeCI0tReP(k;Dp!%(uVx8mnEd`oWyZT?kn1q{Pa zyFW(~s3}8G0>Gs|3UWllRJ@#EfZZU-5rq^-{ci_3f@b~>2>u@na|Dd~{n`J0nyWBg z<@^hv0xczVJg5K~*ndU670gSxhm(Z|KE4ssM!_cvF2ALPg3toE;?@L zW&27L23Oy@0%j~2{Lg^}AO;x^ED!>3^`8O@z$XWMi2tIJder0Kt*rA6r5B#vk`S2c zm)PHqec7agIrN3*0*!P3)#S$0B!h%E}ELXbVpMYwMl2KrcBQ4 zs!geWP33&&%6K>1h}%oP*$*6FKYEuE=c!HziKH>jgzwJoe8093_-LZ6)3+%|fty8mf z-SUmA^3&I&W4x<+y`L{B812uNt56wZY*}Za$!FaYJ1l@X}g$f@u_%s-1rAkfzZAu`Q&|8+Welw zrDv*i@6r_)IXq#uno?!VbrFzyD)Xms(P(x@+@MBhSKgb^;ZGg|skOK3lRJJuuj{wX2-w}3mO zp|B1wZiN^U`I6<8COh>~+TkP&@tMaZ#Ht8o%Ux$`l3D2u(d&rrFNfu|zhpR}3})F~ z9iOD{E?m+ookmwGQ0h9&PA}>d1{P&$2h>wfy{1+|ieI^cmc4T!u={IrAuK<=yHI5> zG7jme`r%ZjB8!U}t=md>$-|dl+=~x7o$UU2$jCL<_qrZ=zvRhwe_QfEowErHIie}8 z3bAdN_b>)V-qPZlWcMSFWZiGRppGG&oqbQ09((j^*WIT37g>hSsS~3kRQuGs2^Sk4 z(3$S>H!=^g2#@c0a(pGltv9`R&z_`M`)pSYnzQ;VD~)gG44b-GG+w1*6k|qzrcID6 zl)t#hQfo5xFzm!>?sDbny>4Gj-wxmUQFyCzfICi1c~`h&Q1n3d^5M3go)XdzEC;6eqL(*q+3zBCX`LIkkY+}j`UIx%yHk`h~OOA+YG9vIp5)~3hwJWZS6OdP$3BAr(9Oi5gjNo32^CmJ4#u$!8lM&sXw$1MFd~Vk6ZBt-SUVp$V z+(A=PQ{wSY!crO9Hs$z4n}czin9D237Pm5xcuq~-_LZ|_ekSmQl7 zE>p;`YT_@e&i{VC*)fN0WNhy)s!xvZ_)ke2DtESVua{%CNk3!zX#P19Wf;9jsrOZLXKvrym~;`b?9!X_ zQXO}CojY%1=RT&ns@NyD7rj{6Z}y?M&YIa*Sd`O!NpIT1!ZKLr7TbQX;WC4$)4uI!EbWeaQ_|5Gi$O7Y}md+ zcpCdOOL)`XuNhB1g_thilH03eS$n{yHmBgZM*L3-np1jdsytK~Lf$^neV)9+W!~-u zhgf=3cNHu3U@mHYCGdMmmT>amlBH5_iCZxv|AWKlp3tjYSe!vQa{jD$UeCUo#gRfm zW^L|U{pylbMt0Ot*+Uf{nGoZ^<3W+b&wjSYR=4zNhCZ_(P-I^`FFTd2_af0{xN`Bk zo^6iB$NoOoD5lYY367fKASDs6A7;W0ndLRZvt+OWuGQNdXoWVBHDY(8msauFW`uPZ zTJH<}J~y2k4p;c6HIQTsePJ!J>uSltGM2=}eJgKxdgh){b>+yz+o5~~3C1R8WSR~( z9$)Hv{=t-UDD7;|xo`Hhnt^O`T8HMD7o>QMOU=yZQyt}N-+r@-3Uhld%){oLQ~Kga z^HCfTQIMe;OU2|`8 ztLez9gU;}B|Hn7ncV8?pk;hr(2M|{@hubH)B9IQAYXv02ZdTu0be#KHx(XA{!8jw4 zzDjvi7DuF#Q)9X!?x~85R+qmXw4MGU|JacdNnili{(OD@kj=yKmE8(*D{|Gmgm2fA zS?+oco?4ee!CzK)1b>bSxpH9NpU@XUx*j>AI(T0<)Nk6l-6|4dF@o;gUjG&yj?ZL!8Z%HKR0~cdA{x z%oK-M&q$SHs~IBEdA?M=x8I+&#~?OZYuPQ`EthDP%K3!c2ToMv$X!jJ=#qypOof+p z!8KDyU!eg(?OyJY=Q<>AL$2DFyHKk+ZbTafglHBPh~?CFjDiZdQX0=?}YQ46U+O?yv}BnYgOR z=TqtFoE&dlS}#)_2szGl=R-qF0r5v;GRgOn^(tNYxyYVSlB2}?Q%r4bPD!4cVkWc6 zVtq?`-^xcoYUq`W^=$A;QOuh%5{bD7dC!8#F={W0e8Q(AD%%&xQU1Ya9KM$v4u7(~ z+e2R2RD_<~R`KBw84~G6yQ@-&+Ud)uzRcl+A0+ASX;e7QE_pDc1S@J@-xn|{zjq=z z^XrkQM{P2>|{7q`gh|R&fJ4DuI*I0 zw#z_PA8>o8ZS6k-0NJ!A{T=g~i0Gf0*M8G;{*HMKr-i}g;fio2xC&ey{wweUXNTBn zw%X*)e?*aCjw%0i3>)-zyH!660!3IK1(0@fB_ zUD<*}Y(gWz5d}q}Y``a=GEjmL2S{Mu*+e4jJVNzAXak70Ss!#R26kB#5(Fk62yy^b zunA>=Dugm@HV9Yw2HXKx$wpq}#)Bc+Kbtn610A8~ZpO9aU=L7-q1bw;vw%5hQDDjg zCMsxwFQ^kp$aJ>RUWj30kVy(E1L}eD;_3j_Gl)63I-q+X;g;*dWds)x)?%}xJCB>^ zP+=UmHjZ0d1=mm>+%>S}LFYI|0XnYp&=Js)z}AR_Y-507xKV;YE6}KcBa}8r0*8#* z$+d068CaKK`;FT)oPh<{aMNnba0ZMGI|z`0^cT|_WNF+q8~?fCObgf$0Nz7T?vD&- z2-pdXC0GFr{0CMuaTMZz8}T71_q*}xf6Q#ePTR4`jsq z6+8moUAUD!*2>G-#tAHQwhbNub=mH?1Z~+yoB*^4h$zC{{dEB|=vM&0&=~?a!8W3S zz#4;M=s#VLKm=Iq|Dw_EUpg9SiT*l`RWTb;F}zc}tmZw-|Ek!4eT@rk+MhXMhU7D|%TVdMCpZVT zpL!H|WSPu=eG(S`T`aBmR7^1EK*($$Rg7n9q@Kx9BVzWwuN*iSjH(ion(6$7_ATDl zWtI)1IP7*j_Gf^XOrojm*-Q@#`(f%82f3p^f=*jfQdL$30SiBL zO=f;j?wX%K-hhdbM%z^_8nqv-YC5|D%eeOU(YWrm>X*^V%F%uyn-+9#-SI3h&8M_ z>li;Nu1r32;HEX{!#lCMPVIAKoU{5#b_6ea#q-Wn9gPTb-z2 zk*~ovaKa_n`A>BcM!KhFsV2tG5X9VD#po{%cu_0mb;1&+(+LZ5?P#bHMB+!!&6Ch6 zYkoBL+KosPTuXoMWhDB1{3h?i3J#LH%(s4gXT0NH(6bn)Q8;;f{#9UEO+&o$wT74o zW(MjPZkV$Z9GQkam-OiZ=qOH-c<_u5FTXj2W{)NnziKs7VcTwlWbccbt(w;^B65w9r8C)u(V7&Nm2$)`inb z&y~cUUFOQMxwX{7FsZ6Ctsqn8{D9zA{~E#)6>nl??Z+M(@x^|+)}Qa-)f^|^7o!&W zq&G|XYU&Wrv}L@$DYHx%wtPdKkao9C)(SqY=<0Kd_xt)nSG5E@;?_EqN1?*gCi+ZT zr9t*vY&&b~v;F|GppC8=B%nbWwPzOraamnpY*^Y4iXr4S*cP`wiM zv{m=e47^53Njp1CHIhL_Bc*{*(R%Rf%k~>hDZ!|Z;(NpoA9m_OEg!5cm{2)rU|oAjv0_Eg4EwD>N2BMIROU7Wk~OVkSuZ(tJef3n zpUKZGy&QbWP(JSXb$3Rj%d7c;g{81l2W1n2ugMy=cwnpV&zumv7tZgw%u0~x6_~o8 zHY3^6@vv!Ezm2N-N)GRvd_U_P^{SDh)0%IW-Yye5rQR0e%#1o7-ZohD$>5wi{k_Z0 z6d{Vu(yW#Hte7Gr-q>_}O3{;Q{AuiLucRxj8a-&^Zq=kfzT5L=f>E;z-u9{nHX| z#&_5n)KV6;+S>SDO)@fQA8w&p;;?jE{63XuF4S%>_~q$^4u6_~Z&wAp^_a6B5PRaN5l@c>3V{E~4;WcJ9E$I=gG1_nF2sbuvsfDk#L6DXFv)D4)tG zIC6k=rs9UcHGh`+le||r=`>BxA12F=lqDy|OouQs?kX?8;2B0_{>C`}fwwm;k~?WR z$M&o_>Q(&Y9&7*0d_*xny{g{roiI$ko)SK0MR>4h^tJc!J+~3pOOgpU><%#V*Y6f& zR+Jq|73_U-Faq)Qv+D6Nm+w5WKBvwW^pueKvB#5}rfA|Ffhh;NHA_4Ipk z;$EM~)f;avlxj%5Hi(mqJ_ot3SM&+kz?t-0FKIKJ-sT1Qr|=bYS8RwacmXn@Po;|smCy>29cK*$9{d@mi)C+TAYD|~N&FVDG zrwG~)?@dW?uqhGi(#_~6i|~9xN>Nf*L$DC7y>Lum%~x@{#rO4MWEvH7tMJgB#7_$! z#q|Sxl5`GIvmhjHzkb~F?){lTj-i86M?Y!3=(f4@zMj?7&eg#@i`JXZmn-d!9?9$1 z!IN^ul`rbEybScs5l2^wdK(;()&!iT=;IcdaeX=`2UNX_>mMbNo(Lo}Z<&aGpx9re zJ19ITE0{r!JUyei{L%1W{%O%<|EdXea<^nPtr6!zb!v)mvX#Vf{||XZoQhZ}gQyu^ ziK#}XoYRp{5aT@|RY_a|%^XXTsi&}y3B_0>B_%^DZIuk-J;eQl7G7&MI`Mp4II9%S zUv>X&>(JoTG?CRvU*{{rGpZ4uw;wmlV$_sbToxKj-P8Oy4ss7Nb`skr^;b)H-~A?4 zVtb7V`?yAA^i8f)6d; zn;GstAT^=3!t=S<**(Y*r9iG%7b9!Rj$_GS^1Qz#vQU-pcteyT7`l zRM(p^sQs3`u5YrO#uk|<;mFF+_f$R)gS5Mt`fYO#w8UOn&Ql(#n&7*XTh=MJTdR<> zR_D4K%kJXZ^2uZ7v4>4sF8B0_+nf-4cpdI_ky?Znk;nL)0QFpWUQDNR+s%Z!jm&jlVogM zwlq~}MV_d-Z#O0aA9XgF+dO3Q;5q(1d*%RGP!{1lx_*=;LA&(Yp*@)~>*LyvO)F`d?p_Xi1onkYKQ=FU{^tsf(OrV@aF@7uD@8fzlXtWRI77zXUtAeP z`J|n{s=DU=gWvfa`{(cs83m@!Y}uvSBKheT!ld3Y&n0it#Gn(prH^#m%AuQds@En` zvONUeNcu6p9OUozv&D9QY_MPywY7NERy8U*ol7)5L`hT@*O_$tDoee;Fpqx|d~&SV z`7#kgrKwxv2EqNBB(Ixe-pADHr{3S$8{W}^M4Gat+mMi+xTg@Kb?!D{Zv)MsM}vk$M1jk*E~L@;a6KcBrnq*?hs<;l2A(=Eb)JM$J!xt(~8~r$oCS_#!Q8 zNH8m?{>+c`#gkUiqe*VBXNaA`EF^+PSOf*?UVY+hx4JiHL;5T{q2T;cHigVav$}Pv zBMOYlj%yfC`t_!ZYCRPkuNGU_e~|9|fXrHulQHvJmJx&7GNd1hW(a+j7Tnio#O+%aLUj*7VnGWX$Uh${UKX#*3ZE)^zfl&LAqH1MJ z#t!P@j{UOV2j{%@a>cl5V-o{UPEC}oGp%`zjHX>t+w-+V=SY~-?0hjFi5xt^YR zVehScZpl67@_P@`J@EL+jyjc|oYooH96{vUlaohh7U%YTm`!9R4zN@5Y4Kkz#%M0^ zJLzQH-Fy9p!nrSB_J1|IG<`uMwEtnFwy_(N*;rpvZ5Mx9>f2N**ORHuqof>KX+g|3 zU7~MnitX;GU$8y%Xg6uyy|ca^<C`lx-2`i6 z_dfovqIa0cG-Ku&pQkvuKV;T+&_}pmV5HS)6Y^X9fmxPuMj^R)3mF&Z9Kc27j*;}FeNNvI^l*A^iN)2 zZIzp}nUDTTBy}_F=e1yZF@Ag4;}C z9P*L-ZgNaI9U<21g;&TWHGiL&V&&|I{EFSdNjDBF{MeUwl?Jgm zPf^_d?0iVkaF&fhs}RlLe%%3woNtL#7C}W*b+X=&d~D;-ltg-RIGFo@QPsYX98T5j)Rm+1|7R73_+o zc6ux=V+uV#C~qt8S)hFEY&aUulb#_3B>jQ?*l|7~|kWSIs2lhrWRj&t7M%slv zy^K~x4X1q}`YBwLzITW)Pq6KAI_vX};jV>ss)T9Q23lj*bJ6vyib1`X%hk?JpS3g} zWcIycyf%#&(77_L-f*PHV6DMqqs>XSq*GkZ z0DPzYBC5@O_~q*nwYcV}#;au(qctR-HDZ09D2I6IJIv@zrRmUBlO8$??(J&7Q zG640Vcj4Hlv@WO9T&Z89n;uutbUrbl+|9q1r*z7rQ|Td)Kl^wq?~7fHE2{H5SYZ~p z|Fy!Q_pTQUUPoWdX%TpE+;GwYr*I|?Msi5GY8BhP2y%&rW}pmn=cwtJ~z zx!7t?S@gN6hOWH{L4hI+c5aWdtnR0;<^=B2I$}v!7GfwKcH}^P(vhl_HyQ^G8H>)( z_MEx{v9bW&~;e|7LG0Z$ct-!(YIA>MBK$l4t}P4d4y!3{fg_xCbSoGd zTCWIa?ZjI+cUU#yws0r7>n|8{oYP^amkjdEY`Y=wAoIc!1@$ z1-JHr5Q1B~dRkaPi+O`EY`BdR+y-l51#LJ2xAStbwZM8gJ6U*n!d3A;J16|X)fNPB zJHoNhmJ>odDieZRVlki-`1n8}v?>3sAAUm#Zhn13cCV1NU&Vu)@H>wiZqn&iEVFmQHXNFK0_=a}rw@ zP^puX1s3jx!Q#4S;SA0^EL^PdnQ^x3ZR7UNq1oIF13yMquFlRDa63OYJB$ldnZ2tu z9!L7?=;79%g4?)yVYlMh;U0h)E^yqs>G*-Qa&-bc@W43R|ZU8rg z8-Wq9uyVuzcG`G?zyvEt{D^H(@twBtu(G$u+sxr!U~>?xhm|XKgSohJ7H~_r72Fz* zf!lzoX9u^3JHQ>m6m*8Wz+K^PaCbNs?g96Nd%?ZoK5$>SAKV}JT+GH(vG}VUPxG{Q zvc`ajZ;`Dr6#|30J)D+c^CmF+T)~vu8cQ4vS#{cK^8t*7Ab;~D}g#kUwAv9OH~T z;CcY}#9CN`-E^>yaF5@fgD>g|I*HoRNt77e(Aplnd7ukmWxO5l3R-Am?pnM0xWI8U z6xxHu2~2J{epzEHKe!*5K9*QlM~n-IF#+=i|7LDh2)EkYVh8SuwYCAX3i6e}p{_e& zoS|0=%mF7?TWISauTQpwHS?DbPG;FzdWP?{>6rgJmv| zLkA1ZeNe>C!pR2z$PJ>$)*FFilq~*3Ad7E@?2btwiytA`E$+xd-zT`NB3yp6Ns9Qw ziul5cI|?gql~Ts%QO4&{-jPRnE03b59b6s%xYPLTr}5cO@5p|7Bg0lHXD=sDdp9RP z_-Xt*sf#bFi!Z9Xqp0py8AJR{9XnU73mnXUXYl>9bn<{htOwsi3;b=19c95zPvBL9 zTWnQffn$RO1`fXR_>lpl4Hd@V3uAT^1{v^k6>g8uV~@|X@wI?FY;gOnJQ%7ZC?!!1?31kRWh*@a+bTJa*dB3EIVINA;Tvx%^ikP(bX>ug`@d zcW{*75hx()2%_(Z-@=uJBq4x60Wm%}zz4MJ0q}u}fHsPPm?jZW3PPOV*TjJ0nLxZ1 zaO0wYY=#0Gv*XsW0LT+)zW`ydqzZ&OK!T7u6o}2jl>;afC{_r%j{>3*6i0*tA{Oo% zst3eDflxsJ^Z;NFfRhl0>IHE}P`nhB7m3@20orp7piF-I17R?>a2tOCgaQFZAPC0h z5sG2L<3s>v1L6mS-9Q3x3GhUNy{T~gfELb0f&SsPn*g8_G0-Pb&_@xdoq%W1I*uY> zaa(Ae5fl_V1)+67g0{23^$7yCfNKaJvYEEK1+5f<0zrk)zHY)03rEgaSgqwxz#K5DHk)1vj&{Kq#O&I|$%8pp82SpfR9Q{G|JH z5Q>^I4ut|Z@kc0>-@~{5x7Qs3&HNn+@;`<`fjIU1!~a`gD7bCv{(AftfK@A~DS%z- za6|jA2u6W08x~G(b{5;H4g^>r1mn3)0IwJvr~{VPJ4hgpv)5J;u*1xTz_LXG_(EJ< z?=}I{g8OyarhrN|1oUkhSe-;}`wG;nuzmGsv19<4qP&$8yrS?^+gAYZqPBGfKF{#& zjsWZ*z`ktV6avV*Ul1*z7cSd(!N&u>joA4oV$?tk?grcitiS}ms(*hW6(~jxzZ>FT z*nZ<*I!3MImedb&!K1!5gN+nINsesY2FYS7+Tvsf(d~M7Z@~zdrPyy7NWUsLrS_R@ zxaFwWF#D}Nt;(Ti<4@`<>waT-khyF%WGU;I3ss2ny#+)Fa3;QDx0+mU?*VA-KEbbnsnTohi~d=gCG-e+vOT| zLkqi@aZlOfC;KU7BC4I=9UlyQ^<52SAl4B=_x}E6j&20Sg!r=a&Aa?+Mtn~SYNAaP zeF?-2D_tZ9)8-~C%Oa8xXHLi)_nQ~%bn?|cdS0HcK>O&@Zqu5A6DQI+3&>o%hUFO8 zrD+dJ&{NBrjOf7Zu60+m`bJILl?3ibtt_)X6i44_{UI^h6t7$JawZGS8bmxA#^QKQ zU|NSaK=d8VHNe$Aqfp4PlQaO9@7PT(=kn~o2&Pi0?p_L~+!X!egMOa>)}`b{NuyZMVBGC8{@_;M+W~vA z;joen8ZQnhgB)(@GN!!OK`KF{%_;d|y5A-}M+Ngeh2+27clS7E*+qqVoYGpeHg}Hr zd@yYXxv*?vwNiZ+>(XgOq266Z9}eeE9xp1A38HAwDyiN}m=;Uga5`ox;#>PU>7@Gx z9j8Tagx4N{DeHCYx-qBYqVXdyv!|K-2FKUw_J_#Gr_?>Fxim=sE7!V{Tu+M0yan+R zmnY}yr)bDkXv;mCNf^m&cz56DyPJdh&kcF>^+Z}T(d>)YoRYhN5b<0&hhA7u5B1|k@Z^nrq^B??<}ics+(SZcJbw>$=*jb zoJV-HQ288g_cP00zYhwhpztvw=&kg?R_JENMFiYR67t<$b80p8>Tby+pJ&*8V)M)7nrwr$(CZ5y4WJL%Z!IO*87jgH-j z&5qIGowL{Kwb$8apL_1XpZCc#o~rQ`N7WoPN9OmAU#-35e!y`yfna9H*h?!>y`r4g zt^3SV<>1myR4eZmYnKn!UdI1qJMMi_R0g9xaAIy^i@J0!23;?Itu|Iyhu3eHOc9)! zBRnldx^z_>&0?2U08-yUtJn(tqQLK;z~`bih@{*j6b$LW-QPmvD_&XeylfW&9>+hN zkxdw43g$)iC@@!9wr#3vlQtpLKevD*p10a_P;&D|eoz;rfzK>9;ClNLbzi^!G3rl& zVnhOY!e{>R5BvM}j$n%pZ&8jG%8C7-2}?{+r*Qh9p0tzirX@QJ-#=()oJVo7Ub~J~ z7Qf5zC;YVZZB}J!XUpXs31*`g%T+5?S}bvHSLV8EKN#X+rRMPMDq<<6)KW?Qg-fkl zT`bzdY&`uMk6Vdsz6A>su0_cjJgtD%!NF{iD;oF0BOFB>`(TAKqP8xlxad)Q-H3~t zV~lOAmtpW#M(-mYgXkb{Q zpK*v`qeJ%n5fVsen3Ob+MWiy+8st{fx&|965sB4MC*`B^3;V)**_OrXlKe+U>m2^|;r$iU@Rji_TV2{G+?IJaEni z`m?_#rS{4^l`O0n69_raKaG>TiAJh_ zo{r$SIA-6gqNl-pq>h*QZeoKiTrk+sl?Sai{q-2Pqq6P?TfT}Flo#|N)QOTq+PU>I zB>bE3w^+KqUx?x7x+!XVs^U+K37TlhzL$L?jlG@?!5kKE9Q#z+LqX^!mVgyrPDBV* z!{t|oDF)|#?dMKg`JT7qyWZkiQ3~Ev1yOC7&oX$~tDlSUecDM%qu^L%G}?WdygvFE zys?(wqyLPchCN*|&CTdN290{3zO!6qoF{2nrw{;i$v8?fqAqq5d z1P8S%DD`Fkldv$ARnjDBw`eoI19M(|4$q6iOFoO`Pnac7SoM0dFYg>%we~+zBZ_mZ zOg;H`bLv7A{D2eJx8G?Ktmx7TDbm64_kE*2yPU4>AxBr|&})^^c?B8@aOifFZ+$i{ zq2!SAxhu@T;u~9cMMj2os1x?F&xg5=Rig?q%WJ|Lb#;shz3*g+6B{)WXHJt&m-$R9 zpWVLYHhLI!)Fw_i(Uz1ZfBO!X--FF;0$3crS4irGNuKKHp;jt5S zDZ<5o--M01D5$af?A+_a zPsb+NMnOg6%Wa7SZ;JXkQ5nOu;*y%`R-%rywLBw1&A2$Kn|qkQEubo-G;X_lq_^-Q zd-i-b%OVzg9g=};WJX6C$G&tM(SMd;1n0dmZd@h5{5-4mJu&!R4e`A$#x8^@wbCUc zXAb2?RtSRnM?waT1X_Ze-mC27bs=`IRgD{8mFPt-Qj4UtNw_0astnr08i>u9|neiEvo^SZF~}w#i0y%>Dk0E@^}0 z)`KqV_X)a9L#oz;!VJQ=wj$e-L)`P^M{?yjtMeHt(wYmvs08Josza92Mv z%t@wVWyo;~em*&$DufF(WmD8c3?co+w8X~JL8lQF=_Cv&A>*~>;wCh{3|-$MbYU^1 zJBLBm9F&ku98=??!SNW&x^Ay}&P7F?DSBTT<>HIVs_XBs2R`{(Le>~&^mQ}tZ@KdD z-!1b;#nf|GHinsh;PS0i;v|!42{bq*QPW_6$2DXR@-FZ3i7Al_6uZ|HJI$BVUi9 zmFXhkTl73<9PQwnjNA4h1|G2l6rau35iA$kT7tdJEj&} z9c5^B4h8a}F98?;~1Hf?Vv&P7OEz4^TK zWr?CVd9@GMJMJZKej%@{L+r4542e*N?AhQ#F14uHaX%_VsFhKnfz0Vc*1}i^zFNVL z4re*o-AfpF69&d0_D+Z`|6(kI(4Wa3k$9pH!4c zl}=eRQ0@Dde#~LOr5u^c2q?KVjcSrR*=t;PXg<8veBlL%c zr*-Nx4~ezlZ{KGDmz~Dmk!HW3d<*b>!R8_ijb$iswUsgJ zD5r`Z7LCWv_Lo;E#fm1g(e!!ujPF*%fgzC8gNft6kJ5oG2EAZyvq3D?IW0+0-;^v} z*vl2O_;R-{xy^i*M(yb%;;WO{xoOM(rlx&$_SW~+GF6J6jE}<25@%=n1)63-Ri@ks zX40bYJQn>(i+3|Z28X;zXW{3Wb1UdN6+Kv5XnGcB&0+SD5LyuKS6`N$7a9<}$Q*Cy zc|QD%+tH6ED!s9n_}T>YvWFLQ7oc4}2AeG<-<0Zbh`c(&Tc!%G80&)#LUogAe`5Rk zF0^M~f5-ghl2BpSrVUXkqrfuk2E#1WA*18*B>~P&GWYQgRP@H<#_{N};_3a&5CKraflsmlt$DaK7 z$NZlb2p#>g<5e-dXpnh%#NNCW8Zw!(;pc1M)2O9vL1|mC-&_)FY*Y=5Fv&#@3S^3wbmtf{+8*!pRj9{0GuvO2VV~=7E>(NB->4a z%BfCt&6ouzhZdAgm``%kRaP`?*oG?7CX?0#5q(s50<5l9Lnq8sH~wqq${HnjgJM5A>WiNC|Xt|+Xz!=u2gA6`Ykv=duYgT zak6F}Auk$K<^`NO;6?f_+vN~TkH*hmZc1YAKCtdL+hJ>+{#i^U z*1^Q+>Jfo8>m6j}Kb2mhKEz@>KuRed9{vHoRm1cNwqXPbLm{s4?UQ25r;M`IhwS$k z?Q*TszA8NJjkd9=q2~w_BXu`m9e1FL1*ooNFZo61k{^#|_|{~!QnSqh!V^dI@Z_(? zh{>C#9J9hpO&zEp8FXGA5gP~lTBHrC_mFNucwYz$AuLW@+$V`37{2#*G@y4U>Iz&; z#s!`lHzr4?ucjt&?BLl)JSG=^Xn`39F@A>bu0*jk)vF+dM32o(X4&D#ofoMQ-Lud2 zSDzO=6z7dvLdLIPuquB zG&Wx|80Q7J#__S$l2?$VtZZc>wkItD=#C8JzR26^#;Xk3WCX(>ay=s@1{&jMmuBPX z=8W&=J~EqWay~qW;hK|rQ9XEAN^00yUu~ym{eM*J|D#&}IjZ%`H3W1ea&qweLvY}i ze+Xy={JQ)PInZBDBA^xEF#HFr^~+NPwEkWB^Or9ZXk_{~{)E3^tzSM(K>B|h*80;| z`gd6CZ{nX^tp68vf`9&n`#+$yez_QaxfT9&CH$ft{@{Lo@j8Dvu7C47qQCeYDFB-z z2jFr{0L%;kLISWc|5nuCmwWEt_;UYWB|-mh9&2?Q2MafbKgQy3e&S!7#s41U|EV`z z)XCobZ*>D)$p7#&{txT^d$oTug8!+9{XZX$f9ml6Ud5_T0Q%x@eFpkc|9hh>Y2)hV zE@lmowr7E3k_X^szv8SM0O?vS8*_JSH-H295BUkEzwiWh?mu0%{|*!Q?Xmq!sNz>y z;7zDU4Agx1Ek3~lkRSk_|CUq$#(@VwsR5Ylm%Ifq4TuN;EkFk|5JC7?f53DWz|9QY zE)f`K2Y91?t0Vl@p#a){AHUK7E&R?84F4-A05I>b{D3-uJ^dw_3g{;fz~cv$Yxrxs z4qzN`4S#7$0G;N*egNF}zw!anfP0$!(whLJ{mut04~zrL{!RlT0KelvJFp&LAAljy zSr5FXUvWS_VEngK#P9L@JsyAc{ZFU;U-$yx{CA$;ef)Kef7*d<|4#pfZ2-p`*bWf! z__aL_aAtrdf4BY%O!!>{nDB=Ia9zNY-+Q_Ip3h%(0_Peq7r;WlG6ORLINd*32S6Oy z31ERg!rzVkZUYzsy95k@X)>j7X?{zh^D?qUGU@;AdD zOv&F2fC&L0GXDwd!OTmnXyah&;Raay6rFhF8B{H-Jb>yW9RCjc(6TTm7Pl}1V7mbA z5gwj@t2?5hszpObM=$o9SNxTQ*wlxZ`i~SqNg%RIObLj1TG%@|I{;`xKox%`|JMtu z0rrA*x3(a*w6V7!R@M-emk}eT7O^)mH@9&lmIUq;tLy>5yv&H@Z2;h+n*|NArIRZ$ zzyLfUHgj?`w*f-9Zj69Vh`14(5W6`8I61(cB>>*o!Wo!EPYmEA9c%#fvXdh*0PX~E zYybe#oY>uo*v8S!-UEQUx?2P6mH~kOrts*~!h_&CJ!t*_{~BsjO%0;~p*>TY7==tk^r;SDf45t~{No7=cK z+ne|R+6GkS450mh-S7ayum9))Ju!g#G;uWtKEME~0n`l~oqt>>u;}lrH*p3)ioZ%b z{mS)MA8g#+EbJ{A0Rr%VHqG1tZClwm{;_$OjH9I!U;+8V4%F8DGtCpoVgm+<8u*#{ zXAI^ygY(~;Wk%xvwMqV8nBxDH3I1Q1-v8O;{_`CD8De>2;6ekS_y1iP69*f6pZ}=TKj+r^h29fW{|A-*n|~sB82?~e znEnIpYbFgKDGvmg0eA1O#etXs@QwM0Y|me5>ee=9b^!3%4X~X5N&-OSfLqVR+{w}2 z=a0O==eJ)Mz@(#~B&V)U_wQF&1~q%YVqye*VEfk~mX?AOV14Z`R~_vIS(uQ+kescuc!7ucCTgvmQ^rucLnT>t;Y-yl>$C>0z+nC+kY8|4gRT` zzn;R?+yOu`U|spk@Ly8FNy|)kuf@t8o)FjT?_~iW{@`5FpkA0CRPEFgm=?alQ(^p6 z^MSE(iua|>XYt7Sc}JFZceEGNo~kF+;>Rcaw`4SZ`KYUu9b9y~@0kos5YvyfqW#Vm z%s$q~VRWK2g1o6MI69_G zI=H)~8XOxL9_qP(?AyVk%ZY=f;4{GrNI4=&cK0!My>2jjfqd}fa;sd-YSF4jP15l4 z#d}=xAR@ns-9TeWKV>7ddKCfPW)@yDjg#~G5f%y`%UYNyNKf=xY+7B`uS=J*Te zx08B7-ZZ<+FFdg}@OzC>OHyfMg&b=ysG8Km(t`_`x}gc?(wfZNDCsIXno$DNTMaw5`E9Zv4LP86rHN|$J)_BSN*rs56zzIW@UUg_B z;*FNcrMU9HN-=Ed(d+6?li+xdI4@*8z2Dy`SG)=1(=e}{Gt}5fuNQTozfpmg)h?Qv*%QgRmCQ$wNql?8j=4qYDWvf!^)q6qccdNCeC$x| z8`&|?8zUwwX8S|gYHiqhV!jUDC~Q5;`uk6j5)NQ-re8J#<)0>3pv4}+Gy}nsE4)_n z+MJ^(DV(=q{XIQ17vuACt1!s6y9gJbLp*w${8B)%bY_e6og31fSEfCovc9LrTNj$z zjha;xXr)4cqNz^`cBwSU;+1*ldU~tdt=-CJsf;^W$Wv%V^U3+s^;AijlT~OB1xnW! z$!CjKaX6qjybCQg`ruvXripUv=^gBh&T^Bl%q9Y&Ox()jift5gUwdV9TQHde1>Zkb54|Yd3HzP-H8%*}iXc3>wxbIei zeYSM4f+s44CMe9Wn+tuPBpd~+An`!Q6b*6?0*qY*=z z&*)7vl-1{E#4KlU9N^<|Iy7e?U(pU+gPs3kb$GTW(;vJl{>)Y@S9mme`mSMr{;oG$ zQ`h`WCG%74yY6t3GH`5VtxT|~7Zd$he^Qf@0Jj@%M(t`$5V4}QrxGcoV!HSZO62mJ zGGABq+5}o`-zl0(+x5b%w@w_B6-iaB8Z2C#eEH`q?a#swaSvWn$C*t8lb|$sy>4t6 zEZer zO^)v(o4~USyPTD;FQubPsj1DRT;#$_<~mudZYv)PG} zQROz=C(`)|g_ri+0GNgHkLgMUmZoveQM4p;W+PtcyVsyUV;_Cbx=AjUTi)F{672{r z{s@xq6&kjY3WCy8*vVQFcxNWKVp1jclG*oxDLkdR)44#BC*o9vmYD2I&!M{G9v#NA zgj##RwsgwO6sAd;9z()#^U1-Iw;hYko<0TI+nz@0!$ktl98sx0$j1Z~GH=|cj zZGxjGaD+AIFOBT@K}-{DFJb6%i0E|IpK4Q+DsJX4nton(QQ@61J7S|{e4nHXtZDnw zOBUKPl@_rl!1u+8n{aM0$S02`JpNi1-!zLefXXS668lwQ+`>Z?7s92Ou`_LcP&YA= zt|vXVSHU?c(>kEcVqhgp?sQFr3+6k~!R2f@`;eK`X|IKOI{}!It?1dJtCI}TbK*M} z|4(wnR<)0p63b0I@2HediEgcO7lR<^PNTl0%ksvc%EdH*$ba0HpX9sT^`k7BA-6N& zb`={foC<#z)3xsw%*y;h#L(ZjASV8p{sx|J9N*;?YV=527v)N}(Q7i%B#zcuGTB!Y z8m2klN7%DWNBq=19N+%&YXwr`hJx@1E%){I;fR_wr!Ue7n&E_^@@zrN$eW-zMI*$6 z-FUir`l_15 zVVGJ!;SM@F`t9f%FvT+tSeGd_lm>h%6cE=`b64iRQx{lTxjs6KDo?QCL%{(V4Hc6o zQI0LP6LON`J#G5JD?W<+UDl;n5Px(-fP1TpQab}l+rx&@FmxU?&OxLKEgpvfbzOMB zyaz`6>4gXR_6ZJklwZ-;ctYl&xdic~Gv`4`Z*t_afO=_p z{oGx35|-R}(`7V^zKurnLUa^Mqh0Hf;2zj%54ixNBRs-er^#^SQNW^Nk*a zsa_Qboxd9VDRbl$lf!-rNT2Q5cJU(&vnHYHqmfBUH4S73-7tY@*=YvN@I<}0Q0e!) zd4meOJ1um5Us@J{?pV=gpp zu;m>jW>-+XBq5J3<5@05_VdUWHT%>jj5fjzWsJtO*GlE>ZApGFl5e6CB9l<2 zUbQ1h+eGDmj%5ow3{xzzjDGV2Kj#rVz6PW(x8ga^eUDWCvaGeWg>3g)ai~VD`=08e zP0_Fp+@p!=4I(dNquL=0#_rAg4Se?+0%mhMY^cd%-E3Y-QR%#wT<#X;H4mk-v> zB}MSi^X-CMbN(V96X@EohA>8!yQA@H5bd#9J|y(zxQ+YBYmFzzK4oRDL_b2}e;PH| zTcx2d#4m$ao2d~Gr~1(~1QthSsc8SKX544)bmMlX698Ske7l2t=CYOFOxugAeidqF zvBw^8R7!_J&$QOL$JT=*)8qD%ta};;RTrT2CX_d3qOWKSoRHVE9|ZyK8#WI?1)jdQ#Bt zXAo^j2+D@-P*;ITYu?>GGeLpA(2Be`<p%0`J>-B)xh1H{iTdung`;$lX|D{`Ta^S%A1buI<&iw?iLtmK%cgzg6H6t zaxBIiaK`WD$4w@ps4{{-%lXL^1qrG6p()lXZ9flGC)?a@G~X?5Fd-14dtInrKYZ6H zX*J59oEUdG3OWQ0Z2#2&lpQZ>Yzx8Dy2I&GBo%XB!89|o7)#w_>jRn>G z_t@egu}6Y{Zw07{2k91*vY)`2hd=WxGVV0IQhZZ`Mxg5uPpDc+IXDyErWKXV#UO+T zyV3k&yA-tI#XEJtBC&FQd@SgTlXu*khOu=xx!{z=1_}!D{`Ti0BAFuG2`1OuB^>iO zQYuf)Epg^{Cov=s?&1Od*Q+l=sW!?d(Lcp|??T0l6D`&pi}@R(m?w3V0zj8oR92yD zAb+AX_Y2|se&*50y*zHC%<(ZELm4k3h`m1{XPE4TGM1tc5G~<(=*O67ZxuocFHMzs z5R9Rcs~9jBobkqk<}$9LoW*18Mv3B-_l?AWi!7Ye_u6_v$~IZk6eUIrcBVBo=={q(`$A>g;+3kV zVjkUVw6S2LEpnA)#GUZSDpyvgQsu$x#OTBf{p)U|g|UZHsFF|7X>w3CCWoLh2r;)$ z${8XNIvM5(V4~6Ew)}ZTOXF!hDhl5?p!#CEC+~JhUS!T5>3@1E(G1k4VjO`bsgvd? z5X$LAFrDk(nkfZoTS8%kNWjhdjfY6uM1L9bHtE%@zY4-4+m{Tax^#bbyt}t_hFoC% zA=tM3OmJ4wZ=g6xQr3Ocv|Zjs@zlqr%XQ-qcaGQxKN(iA>#1|FccZjFm;97pu?1Ok0C{`E=o(y45e~)e^;{j_M!aNY zVZsXD$wAF)vqq)vi}4EHOC--2Q|p=EeQ=NK2X*PRF8pFxxhGK-hJG}5BNQ#fBw^v6 zo2W0A3P6h<6Rhi8RF2}bQLydS0ylCG<9;?wuM6Vr9P?M2&Ozc&^cHv+70FqwG6X4U z3pMWa%}#{sbBhqZ0mpsiP?qs0w%B_lgs*|}+DMUmAlRWMVVW}_JHe>ZvOBL`<$<>6 z@uVFIHAXl;g*3_tXSC=l@96bb`DU+V_xv}(g>VoQP~RbVH6&{gX5<( zR7VU^Y!;VHNfP|(6D0kyo*b5ptfdP=NKX+`q>*t4<_FFzOJZ^KLq?p3wNo|t&UiKC zQM$H+_LrZQD14wO>7%D}ZC|A0=-;h&5YCWw*n1=}e^6zAe;KPqi!W?Q6l&SN6f-K) zavO|j<#Iu3^T@r6H9N$LU4uOy_#Qu^{xGq$lh@9-$TWu~Q9+U5p#yvUC%u21xvn;1 zmDS3&I}%71*u{mtacm-mvFP?e9MQ%I70r%Pz~VXPPMF!Z^z}d{bmI~;Prf-Z6#|wv z-}PLaRL}dZ#EUR-jpI)o@Nc<<0zuO=H?8xEb*Fi~sw?3P4E*vYhvkn8YwpJ`Keh)T zKImj_rbaixm=?sGh`gXCs6N9NzzX)J&T0F7C=bg61?;?)FbNk)bqrC8a7aOhOK_X4 z$RlX)z_JTb_a)y!K0v=1w_vFF!UQjQB=7^d%Q0HsHE6@z^v>)dIuypKXe$H$tPi*cV0(xM_z9EMtxtsT|`PXm62Ww=y;8~ccZ@kv0n1#`;VK+VD zIp_7J4}BYkYep~Q3@Lq_6zy`obyZ6!=v@XlUi3GIzPIBn+jb%mf*NMxo}(R)zcOlk zWOH#;fx;A_^D{#qke%1(NxI_+gSTe!r7pkE>jWtzH&wKeFq;b@I0`p^7NVLSk89I$ z#-6eDcOSY$fAXA)fq_D-7XSuU1Tc^m$C&j}jtQu)2Cm>12y zLN{cFDE`Q_iZw`t8faY00euDJ!qjQ78$ocM=BE* zmRf?D-U87A?HKVv3l^H9-?cC1CW6a4^^0gFG1`cf&CbW0OSO-jsF$DZi8+|o7mzmL zXZZxhLx)R3=T>jL7R?z|I1LUrUJByh%{g^}aKgXE|D3P*~;dP)0Iiusjd_KxxOg5o*?VvZ`l zbLQf}ZpUg*G6->&9|DYMtgsdp9IwI$)@RCnnENYEhufPFNxZ!Go~%ppQhDfj5xtfld1S{Cez>dretml(>S*~fjM z@Vl&qq?&rOmL0vCggm!$L6#_JqSbO-ofoZL2sr%i8dAGMl#bc95XtuAwHAB&_QGOH zNiZzbhGy0tRs0btE+%2?0)KMy!h4R)tYY-gMult%p>)v4AsD3>R?6+?>9;Nfm4$sia(k5+ z?bgpJ4j|?Zmp)2s4XWVAQ}Vj-p;)=b8$&7#S?^q;=|9kZ#n7YU(L<9+WS<2`qm9_? zJ$pKE6-~qZNeL-)*YGoGx!K}+JiZP;;nY9Y_jH!Tw+%YF#&eKe?rV;E5%~gJTl-$h z&GVWvk)qk!#glMYBGfF8$i#4O{RgF^d)T_VYRyAUyvCf~D@o(iz$ubG!|iywls>K7{{o7?H1LXjCeNCsTq^$0sovX7+T|#}|2Up*6sk?>fuLRUtaO6?a{a0ZR*aUHTDshuRYf-|N84&Sq-lnx+Pm z2E_P35T752-I653-_CP!DRPTmY?ne!yTS^+IjbSr`Q%hCZD6YfR$>iMeq zqK&r*f?Yzu=P{T!hll3e=m({v$WsuWr}+mhrH7$_oVuz*u?)P0mNUS9YBozr)s~$`-p=8Kl*H zpc$Q)g#5gnMWDc;?Xr$?a#_7M{1_N2p2mDM-)?)RE3hfPAlH0s72pALLdy|1`iu>& zZ!?WT9`+jVs`aUNL?Z0sc;fysW7$)hK$O*&lB(z|)t0omAvc1Ca($;Bv7Uexn!tzO zLj)B<(O57oMVMvD$?t;Qg6^E%*+ev`|9fI41TtUTaweO9lF@EikMcHci6ZH z?VUjbw|5kkTu6tilXdvlY6$Gh4^5Dw_|Pv438chgC3@-Rxt!T#8B?kUoObTLOjx8B zYv;SPspunQ4C`ptdPzBi=c5ZX(s za$~U>YKD5s8GL2$nLSz1zliQ}UgKhz_rW1(CKT3?yrvIV>SK{ELsn_+G$>lCHo(xS z!GyQZ8k*Nt5aO^|4O_MVL2L}O#6@32)$EMzVk^^QceK0em|JW>OR=K)rh=QQg_FLV zXH2!!OYWwKnBRq)5{o9--1`t_q!5t=uP34b7j~M{V%yVXkqYXVmgL#P<9u{30Ok+o zu0d|Vr`V0YD49Bud3In~X>byE+lS4*vQ>S+nN*rT{`umS;3sI;>c(b0hF;(JXF2$H z66-Fg3v>_@=$$iatk`1L^D!6rBjF*=)u1DqLlKt%A45SZlK}Dbd291ckq`2CXhHG! zF*vuMbc5ndC zEYX$V2I+0!&AVHkWSW+mWpD_bZ_}fMhPm46^X5zG_#$Y7uKy0&|ro)p*w+1zq%=V5jfPG=Km)c1HPdX;@ z?kCF>Bc84Ek8um)w)&V-gojXUT0$J9=yahP($kM^;&7SG%Vy>SU1~ibrspF-uoI5r zBDPl*>P?T_OjKtJS~%T(557R)pcI0hA`$5scy4U*xrcB^F^M5vGq4a&ex9+_`Kwr% zoL>6!(W>1+q#ND@yGri24lbDlmR*bQNQ@B@UcsnRCed;@2Q_wFiXJ5``v|DzdU5C) z`J8PkB=p}5wyOA))5yVI8RthRN@F#1M#@hS`bZSfqGmSQ&#L5T5v?)ALipr ziBgfdAt^DkzQ!n=_Y|sT8o@I!y!Q6M6sHD!#1S^4Kj}ma5GR?}3-6d+un2^2jNj;x zPt095w-b|^RXxP&iw~>VVJh8O!d@b&>RB>e;MgPuYNA9dj8`<{g#t5&g z3rC{+K!o=ZwmY$WD6aY*Dr||HzsNgo7&d#QzECX9yTv3)fma2b=*f@uxYSYRV}6_4 zw?{@U`TELKJbdDnYu}(jc_=~Q4o!`&5E*fex(9ZCp$Wc)nXUIWukZP=%W4EW)wIv$WebS)cLk*S>YahY=u z(9~aO@nRPrl0GKxb|bY%P9YYS>f@3Wnu>|c&80dQf};+pziU|X8k#>}y&~Z%pQpzq z-AS`LCXdg{@B+Vj#8I@o6x9YP7*S*P#sNQkb?Qs&7vuD!#F_kw!czB9a?-A)TfesJ z>b=UwP(gGTdat3z=+fgvL%MpL6xGSlQ&d^0h0WulyyDp+3;7aMqS8m!SeX=UOV?WT zi3T$gWY*8S_Ucq0TrNx_y(?P?#b;CTsP<%XXD$MF5oA+)?QWk0_p2k`tT7VN@ei>N z!<$?*qUgc-g3$7^!V;m8{9y>dQQb)Um zcIZ4GRS?#b&gRf(I@$CJ`}V{5J*NPbVUq{@niEn{RPwqdFU@)gbW%u!ZheJVo_?ANwT#;BrEItN8>#d)Ub}9 zsYsgISIg9nGMhUt^SI*lix~*Y=>!k^if6CF&ObMNgpH|cqQ^Bc8S}zmcs{IJ`;Zpe z&11i(bVkk#8a9ecJe7-p5@4`UL0P$5+`jw7kk-?K;pGbJ9BGrmF~bnb+DHjDC9wcT zXmSRjtYyCwy%D@+RaP981m?ckDxB$CUJE@*Hu4h-`$f&w>$9`CiYBw*i(FC2PCpGb zfj~qwOM>`Mnhi*h0M2A;9$$kHS6)QB^tAUGlHx3`tX$YKKZ0>~1dfbq72hBegE$c~ zOOX|64fb9n*=U>aiM)wI+%MWRo3DhrMqGM*E8d<6K}gTGo&rvn{^1UDtMn4uiC@XT zTF?b<`PG8)ty9`^GlsG>g0PhN^Q4djY)uK4kH)gAR$)Qa-FwxF@}Uy^EP7sLPYj6J^34qq#dhlf&5fOYh8T>^;7ZIFk{fEiSC6^;8ZgzLtHy;|vV&%=rHxbvn zXRUHTv9F$}8>3J%trK`>=w@>~&L6DIIl6ukoJV4uo#WV3bJWLRX7@2; zMA2{R6Qz!XW6w0eUd;##kdIvou8?NvU!KSstd89IdeZ0ih2cUIk)%!iY&S5ceYr80 zzrT<(psY7cqVlur47a0`NA7nC#s`ps5A!wEUOwfeEi-w&)Dc@R*;9aLQr}$sRuS@v zdbw`)DnBhojRy&&8;8p}ZvzGariZoQ#z|zGD587RpFNi7pE+92?(;QC3c(igA7A6y zlLjJ^9m0Af^NSOs;>D9i90e|BP;{@`jBM&tiMxA1pQEfQhE+fFy$$F(c~h{&931Be z5?T5UE-ll*o{6*OJ}!4CTdCOHAjJw1w|=)6xk)501odPbS^tYxl~9xlhmefWPQR-o zLCi+Fh(2Xovt6U)!eY9`HqD|0BmA6(Z=W}*!C6GlK5K41m39Cr1zc=)N z@h9Lpda!qoLLDEqLOV)5J@Om{02}`3nb}O6z3rtaacW2@#!XODxO)NNS>Y1k@}BO8 zXrQ%|Rj{Eq8I=tv!G5ZYQ>ubLV#3PVWks+@c^W?kQ4!~f;C&~#X~0sssgB#+bVm9{ z3cFD8(W<)NkDs}Y2qUOo+rr}Un>>6g%o-2RRk@vVu|Bh;0spMBcr?;(HF*H$}vvj494|VPnL?Wck8th5`i%S%u~qy{g|yh zmB|#8*41;5iQ9rpMnyxS4;7WlL|}n%n4A(v`x*vWZ<0c}>GqPH!pY41w|=&Xz7QWkyUr z6yXkDs4|Eo(@QCM&gL)25i17P_F>i3f=+y9{aJ7u2pm{aP4q{CQhp!>s=oDdETEgd z9RjGA2g+iaABc9ZSs80b_K?8Nw&xd#Ols)fFuvE=eWHk%M{tp7emF6scaV?7zaf2);$h|_?c}CawX~;eg#DVtd-1Q(=U!3}k5HiyH z-7}q-zZq8;u~_g-YlQe8&Z>sU`Q0a;@mfECuo!$M+j7yTYnucy}*5!C_=J`D`ff zskUX&KbD;d9AR$d*;N&OnCJpC^0&{&`%F-|q%wU0BI>=o=1svTN}j6k#C8*SQ$<^m zg*0!sqpneXCitC(t}W2wCS7Yj^Id0W3}@Du zNPMGxY;(Hhpg+zWR>$^-(Z9{=VU%SJt2m70St78=*t54uPEHE5vPK_l^R7np^zlo* z!stW06cWzG^nqt>mLp1kdq~AnsCas+d6OfJjHIzixw(?YPU=zYu|WHl&eQw?c7~nw z5%WY_EBdhU^_2H~o3&hL*wN4=??Li8)+rBS15tw3AEW27L>&>6kaeJF( z)tbmbz@%o}rkoE>nC6WUH|d z6>F=bU6a)*>{P$5+kGm9v^TBZ?fb`8&FsXA8Yw6aD-o&fsk|)`*yv6{$N`a_QcT zZJq5OEy%h{rXVuNQT+%l5@bVv4su47Gn|I5jFRXt<=i>SW-}c$)F)kEu+sMNSwuL( z(3ahiGtwM|@@iNPTHV(Tg?@6d^vCPW8Q3scoYR0H=x?{b>lc;z=5!M`BudMkNFv*z3n zP1ZVciu!Bzn+Z!jaNTYHyaOMqu@F|tP2X_S@RbTwuU_j50zFJV_aMpCIwj;QuZ+w>TopSI!q&jO7Yhf;B)*Etsx*V%m z*%yd&vW-!0mYb^@rREqsxW*X)B9r6SSFPmMhvgGWxnSV~9@4{XTT>U=;PVfVe zUE0KB2`x|pxm8)S?lZ$}+6z5C?;@>9ay|r~vei`2%q)c@dPWB>20fWMeKjaVR~2L} z{@7VH%+#Pf2h%97+==fr3tO+PR)I|Mq{}BacQc5xP%D2ixZzJVTQ% z9+$PrZYHvQsq$|5wX?7wq84A5xgBC%C{scIEJ^&V z|7}&)=F?X!qa$S(`C61}tXwnkwD+hZ-7jOMjLhXFV2X~=bWwf_))tprdxwe@moZ?A z(xek-fek~LUsu26K7n>a&>Tb2Oz>f;&ID5>LVdr+p!t~_25qA;^vE+d5czHxyD`xC zK~}L6D%VDST#lUwWm+q&jvDqo2w6bG zgh(l=-6BEEg|?RSZJv8qm%fSCQdA6GY}%8P$%nUQIMdM`JDS>+hA=ydSQbc-IKFw# zNJd%DDA0#RiPz~Ufo+VXwi>8D?xySrde%WpD9icaU%}_#*g=_$IEgVspgh+S{3YH1 zT)ap@;z6V@R+my0gxg5tRj(bhGqyc6JMBW;7&q3&(YCQ1Y(0l|%!)G09fS>17Ah+T zI6sRtb(dIXd4uR%n$-%OLcd3C{}83I$=turTvD~me&}M`8dn;lX|L@2N;TUZ676@; z+@`aINqRj3F}p2vBTLg|_Uhqh{@y5i-O)CX#YcUA&~<${d%GZ~lLuR@&_&BZIv)Is z@xJfZ^JGT2SN)3ro^jj20_X9%e0-c+yAG%Cy&c9`(DbHlq69k(q?_1=XtkB z+N83y+LEnBi;_x`NXb%`iPB6V>kK7S$TATc4WTA&hLjMkV#xMY4Swf7?>naF-tX`G zotfXiKmR ztBmz9DfQL%nSW_i_j$vEH(%ZEHuwE1Z3T}z6&V|IMe!0=GxN6&!)f1EHqD;u=4b66 zbK&mV^6krCJ$Zcd#DbCRsxL330ouXGRKCwOlT@tO5$JzDsLuii6LqIrI?H$9^UWea0Jq_F5nrbDux+1@ zDz;*Xc4D7Yhm-oZd(}@k+wYv4@9_@>pAI)ziS?r*?bDv0>y;JWn)Pk^3b*%7t{zd( zqqbQH`Z+7r=u}9(g3d0!U7x?~opN#wcKDYOtvCX9>f!*>42HeSCa&g%B z-(OsIPkujM-}>rcl@*ae$5xj4+B{9VGxzn0-@S*5BHr$5*;6a*$Z;rgf2e9Z&`|q$ zpu)Fjk{^w}8g)Re{`&<#^wUz+9vbd@iCHmlKXa)$-Zp6Gio_V{h+a)SJBAzWn55uk zQRz80YHxyI^{=iIw>myL#j=dowz6aDxdYTjL6Auz;a zu=&yg!=8Nu=L}49H!pqZ`|Z@Cmp{c14>dk8q0h0_&RMC2UXC9Y|Mt0i=&EP8YffC< zV>>QjdfWZy!=|`R*15jYN-gF5GIh~M)zsGqyN8(ST>Q|hLH|P%<@;1HOsQqqy-^hv zE=M;cW**l+qh@(jE7fap(o`L_lSV@eUR=Cl6Oq*W=K3YelUGmk+!AG;Q*Gr`x?;!8 zd$}hBwUtTj7J+pqyCvjj#p(6Fs9|$Ty6VP-_nv!y(RCL5Ht^nBJJaI&P0Hu*zq#KM zXyU5q=H~T4`RMFxEeB%8)a<#w_s3Y9?@u@~udcj(@;>OkwE^VM=$Cb*&%#xU9b4-y zG?I=M%{BK{JU2VI^?YTpu}~{A^L*!U_50U1i+=n0^^HZ$vZJpc5y*?=C=q zI`+QS4}K#im@TomVwrY+l|{~oiZlBU|NLWIl;+Z3-xPn^5~}j*+NO}g*9y93F+0~- zx&NgUB8SQ{SzMDET2$uMHr@E3{O<-66@CJXDYGv z`a17*DiTiYn!U;M?Ajw*P0LLy)=qWndwj=J`_Y!gE=FzN_jZ^R>b_0*K&0Oy9?`Nk zvgc!^giPz-t6uup6*;f^!Fj~^4?RBQRm8;#{PRvJ>ZOznKDqqJd&>>&nQul(Jwsy7 z-aK(N{jsWRcDbFNU5$oIT8HK`v6Z3w2KSx8K@O=w8PyZ2`X}qgT)1Rw5x#27^ooQ` zuX}^`Zkg%WyI}Sry_E}e*9K;03^&@NT+#Q3%jrw5)7$)Re`uAaMoCPg-9V)| z4ZHuf<#gbR@6;+gG$)+~3Yr(7-dSsj7qKCIg0joN*0S6C>j8TeRNor`IkaUgX;w!yII z?Z22+&0d=N(5F!vb>%&KY2m?@-CLW+tVx?RG)J)ZqOZZzt?LgR4qvBmN@<;o&TxfI zi;653j5KkK-`!X32h-3p=Go2bV;3}p-5S&7X~P%W$2QNLa%Wj}yXI?Cn3go@+^|z) z%`Dr`)^@NhStFx!cN*>WzR3hmT{u4S`L3zDK{kU%j@})yrt`Fh-4ap7^DxaG*Yg|t zx5X4rDzl$A%Jfv0Brsr;t3vN%)0OixwC{}`?>(+Vx3ft(`(a9W^E+cdji^zbc9xGO z_D~u6%rz!4rt`CN`nZfa!qOQF>Zs8*#xvi)Q3`I>eS3C+hE^}ZE7SA)`!@9PQQiOe z(~<|?-3&gJ_bt5Upnhm*Nn`sRx;?8=>vni#q*%H_t41Q+oiexW*ybYxdVH4^HO{=7 z)1u`snH~{3C6L0d`98Hjd4A{3^GfXs^<}98)CW(FZ+K@kT{}Tzy{(mVWn$xx+h2xO zWT~r{?rm81@JVz~xbQ0--7onY-TWtwA42DC%dpi?Dpqjv)t&JrYU}*DZnNi>sI4m- zcu#!&%#`c}^%i4y6^*Kvw%dI7uanas)@0u)vL7D1CDrzLc+a)QS?8{HhVH+Ta{r3! z8eP+IipxU8yRLU^+1P1*E&a?h_K{&lYo33n=;W$!Ay(7JUt4|uQ~kCblV3&;)~`CP^(c)kK4y`5 zqwG^K|Z(BoDX!#iWnDQhd~z14VI;jg<;+3ZkUEFXc@sBP`xU`Vac>vx-%L ztU!f3&qI)2H8-cYXf>?95Zcdvx3m5IYvu~UhYy~vYWlXh z*8~mM6x*m zEu-5VA2W>*Gc}w}Jsnq+>N+dLBVtBMDpEEMJsd4$pW2bV+Vpvk_q_||WOJza$O1m$N#>t6` z*9-_W(w%+Sb5rY}x=(?nnL%|<4z`-VG@nwcbggw-w0fql*@C?V#YZ|mU$mWXbi+cJ zVdhzN=mbRxgk_RKzaE9Ro_%hL3|d`%NpW4$`k9VR%F|BP(yW%wmzJi#=v;fEMwzkd4BI{1wb5eoA&2tz z*3|q+Z{y0HKW9$-;6Bp+L*pQ)sxi%D9!>c?Y?h@>TTiX}hYbd|pHKU#B2sy&zNYD{ z-Z#!gMU4o`U9EBQ{oum`+El%o-|tQNuA?#Hn@2OxO1mA|9{Z$i>D_V@=Z-{!i{7dy z3sx=OnX{r%DI-q(;Mnxn8UFD>;m2LJm|E%Ya?O`|TbrM}eBj8P=s6y9TIYKW9pW?3 zPxZ4O-Q;*5>f~0OoW%N9FJ0%eq%^2i>rH>VBX8<1&9Lp56Z|1dX+~?&F;Pl2W&F-~ zTdZ2K=C9*F@96U&ZDn%u;i^!z>_$t2O;?;F0!KBsUD|dlEaidjqN0bHP0>BKSlv+3 zYL}+o*3Y)-)v>zQnew$K%X)b<=01F8vVC=Xb@sFV^^aekzS|j;S@>hU^O(cC5*`?z zEw`yIk7-yNvv61>dp+!J;m1uApQW5lH|XxQaGKdC#k?uqHoK&l4ye&pdYjk#*6e_! zBZf(dB_(x_ua%p<$T?hVLcMfJKh2n}I(+x#`=Cf0zfy-$)wd`5s08hq+&*We!}E6i z$o>vZ4+h-GpI7Mc@lI8;!M0^*frj4?ZK@54;9qKvfCnN9OOr@AQSPa0QgZ@Ty0 zyUS@;Z9X|%w%1%#xct*&QQaI9?H=kMJ@z~?wT`}~zG&F#8?%C1R5g^qUn@)qW}+xZ46XR>Bn7JT!)(vh-f;p_CaOZJZ(LWSn;Q7u2_Yg1E47}BswsdM#uW|e+OYt8(Fect)^(y>3Z(k(jC_(>Go8Z_>*r{eyDK7)U^ zGIvzHdce}e>~28b=e&mxC6TJme%1>!;;fnA$>~{&rFnuO+wSE~ZYlbueL{eJNm9|J z^tKVFO#IS4X3ifOcc3yL&HT5NI{UlEZ`XB9Q-8QqB_}gvVyFHRt>mZ|ZXB95z{q8oUCpd4Pw(l&g=rVlD)(AuoBZPSliD3g zC;VNSwepVO%EddvrN)hkiTjdYyXgJm>2_eARbo{0#M*z+nk$Dm#wHZ!L|Q!xs`L5D z`?_;bhnaKc;C+3CJ3HRpTJC=Qip%y*S4Y?>=(O8E*e5zY{^F8@k2M=M`xM*0m8e7n z40KBn=mbPB8@K+$j!2`)!8xur7pI$z)Xhy-{ZX-}yJ5Liu$%ikEiYD~{8^*vw%Z{t z%bWLX_8;%rYjMccR_zy2g`#glf2f`0=hs2KK3K09YdQ3XO6Tif{WiP+v{C^g+|IcFf-?Nx;Nl#FwBT6C?|Cz>&OMSwD#lk=9P(slP zE^Uf3C1P0NpDD@Yd7OB8%(!GrsE5DuNBVzu)ipVm)k;Vv5~e4XNR=m8dxlJv{x5)31o&V6 z|1a6icm)h)MF^qL;GYHmWaa+;DE1ffwb0+{32S)uC%GInT@#wgiTi&_;5F4JFx)2$ z%GGtrvelKOvV3FCo_;f1BhL*t!`~Z8__Z`WmVc`N?WIw{D( zBC{4)*qbb=+X`0I&1*w~45=F~-N|QXL{{~O7LFGK9y7mIF9_2K^Z9c#B~Y@@nm0_K473S9rqiup~=349zq4` z{<0u(z5gM-43~utRtP2RxXvNzjqEF5DOB6ye#4{fdj1RAPe#wLgF<~hu2Z?< zW8|8Tk*hvNru#q{lPmo@Jms%c&QnItQ;G|j7ozW^p$wrBEE`=T+cA_tq(4roI~7(;E#e@ z{)bUmV3AB;%bg6az$3?R8OQj&p`FI>&Dt}}6AJO7xq|Nktz}c~QreRznbxQlG-PBm zp6xRaye{X^VDD8+eBgJ8w=aOk3C2^5H~a^O2kF(OK;>vX<0%s%6(c0>6+oR&5mcBo z>EcC2g!=d@Lk>e$r2J=O1w}kXpyGY?{(wPKJ?_8tg1*AUEG!58>jD!9fQA@Mj8fco zh0#*TjEKVkg7iBNPb^`eC?_8#6-$uL=!yqxlR{!j940~PfR86&k=5kGC>R)rF+wP0 zis3N~a&WM=EV5O680b}OE+Eb{K0km_B>jK{1f7MbWGFrcDsYOyghHHF0)a@%68sQJ zP!PnQvrvpOKJsBup_Je+ge(Le@RFE|h(&VD?@x>h!(uS0kRj#*wQ32P!t_98^ZS$1 zPz8|(Lo`Ks%DUo#tVs#FPyz|k0bTK6dx)V(EFUJ2iikM_o`4*ekb7DLIo5DmflG`W zEdIDOi#$6%4CEB&8wJb7#uZSI85CQu06b#kvhe$3XefM(!63;eF&8Q9Bpgq|B9D{b zpNN9Oxfo0$MJ`iUJfHzZ(2oWsgtw&y3`qkig`CW;{RsdMrvc=LMJ04GJVrq5eVS&G z4}$fF3Iby=nnjLLS3Hm*l=zkh6VM_C0&5(``K(>r3TTFNrui@t=ScHmKzm{x5-IX3 z_;?~zE*OJJq{!9b;Q_3S3Ky`pD2p)$V?@My8AgbFbgV5ZnvB61A)&7r zi4a9D{I(L#;`3o(0&p5I63$)c;Rzv0Hcl4?UJVd)VWlXj;k5-v5w|@^B5}E=g(5C9 zZr8RnXct01!}cKfCWI}B^9X!JLQeu7zCQqhidggK3}X=bN(dR2ab5z9(Akgyn9vab zL5Cn+*Cqfw!Ug~yipMZKu@IjhR0Mz$))*ek;&vajfr#L*NGc}m0*E~dRCwb8$8lOg ze_(70Jc7jeXND4E=@Stos^746Yp+q=g&&8A!*H>VY!RQffVZk_z z1@DJ|Nl-MwAD0yqdk_jY5cW;PqU`JZwqk)4d8xeqAW1d~_AwZ^jW|pQjsyV{5_?}v zllnz0L`CDVF`%YAF&B{tohI_<0&Wg5E{G)27W^xmZ(@mn(34_V20<5z08)?h#)W-O zqlk__7oa^c28%pr3{S$4>t#hySe}muQ^swmKtf5$wE=%|y#{!Mu9GkVf(D>s2^vUf ziqMIGN7#6YNJ6$1ku;DnG{H+yv4s4A%RuNWFl9nqHo&3~HVW`)oX%3%(j<%`@W8AR z`cFzzxV{3OgYm|A1lwGK%Op~#1dL{gJq>sat~aC-F*z3?7O@VIfY@(f^$A@9-XGZ? z=y9SyiImvqpqdE1!3r3H_N;)yeQyD~MfPe;v0Zj=Q z15Cp47%|~Xu{0TfunY(mzb)`whz{$~zprsEE0hqn0xSig4_HP@#HOrJMCcdTzNmNt ze+<~H1PtT~*Et}4xGxGRow*C5uKfY*-_Q!=3hNKy;d_P>kb56sQrsp(TOwXXcsT7T zfk=q!7l1MNeuE7N9rEb{c-&Dee{CS+7~kM@Lx}51XiMxPgoo=%;JFCb#V#6ALR4jH?#IRhTPk0^-f z2>!x#DDGdu7{oq;wlwbhz?4z-5T1+!jK=jJv<1(O#KUboECUn*jt9pK1Pn|g(H~31 zu)uRTmBIR>X^83w7=_y{L{fs6H268VOu`8O9`{gSp-4RNM#;9^i6T$#;j#jqgkmrW zA%C<`g8PmX9FE|=C>$w>aUTR=gdYnIAq3v|xS$mAIWurYgX6)`13GT!&lwaTPG`Ww zZ4?k5q8F?!6mI!9V-UUow1u#p-xhcdmY)x!7$~SjtOLSd0>%(=H?$SwaWVy$4TOGX zAOyqr0wZM69SM&vfQQ>2N(jdwI3ApM5OT#xA?U>V13cW;!o?}Palr9}+*Jf`E)azh zYXi3(xBGD11P3s9e-c!EiN7`>oG}tGUpDSp2L24weLXOahT4i!SQ< z>yUEyiaZ$L;d%{ZgE$)ol?cuc)*n!hkQdm|xU7Nd8Ru z*${yfLd=il?%w$0N+{fy0%759Rd{$}(r2Z_;8Wv!Rm6(Wl^WI-G$0NWk-9`I0*4L9 z106xOCFNcWdkfbG=t!D?iQoVl9|MlPF&~(QTLZ!`f#?#C_bE7m!s94-RRaQujRAPX z+9a@La62PmhfyWuo|EeZZV+^V zV;{n|kTNuWeg!iImlt0LoHO9^2W}#vpQW%DaGC-hVQYcRgbyc$ikk!t;EaM87p@KP zF~DoW&nVz376^^4gB1wyI0f)XUk_G>pAEy|3q-qke>5B*;4nDi!Pf!NIUX~?$p~>4 zNwIK%MYaWF#vcQC4g+H_xG2H>4RGtoScnDnLbhday#dd12z~$zE-Q&~K{??th)ppb z!8Hku?`iJpgy;{h8!`Dqc)0A+0x)ox-US$;ub?gWVxjAPLwL9k0w+c^e$EOo2(R$9 zNu{{X20VgqG#GT;HvMH9(VyNg9A}fy*=8_TV}ZUFYHPCGcE``|aR6N(ngy zDInw&jzw`@0&U^)7?T5#K7yv8IdOjjE-Q$43J8YZy#tJRTY+E%j{ruhVjUuI6Y;r-z<}U;23#cEp2MR~ z;;sXXG@M4_^8*>eX%BeB9t0R32cZ6N{}f;hNh>(GCDs9FOSt@jmrcwCV8mRIJC55y z01^8GJ)ObFMNgw}8h{sppQ+L)Y{cmTr_Y2wfSW~vM*t)C47zf~WdmRYP2trDA%Ae^ zK=?Uu+DX`Ra0Lka3sExevw|Kc;yfDAy2dGCp`K8{HB@=hB={o;|Fu5c1GWh>gM;C@ oKeA=$AvITe6pmTAU*_h@E5kfP!?^cu&>IEsAPo#`oNSf<2Ol6m(EtDd diff --git a/forester/archive/RIO/others/hmmer/config.guess b/forester/archive/RIO/others/hmmer/config.guess deleted file mode 100755 index 2960d6e..0000000 --- a/forester/archive/RIO/others/hmmer/config.guess +++ /dev/null @@ -1,951 +0,0 @@ -#! /bin/sh -# Attempt to guess a canonical system name. -# Copyright (C) 1992, 93, 94, 95, 96, 97, 1998 Free Software Foundation, Inc. -# -# This file is free software; you can redistribute it and/or modify it -# under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -# -# As a special exception to the GNU General Public License, if you -# distribute this file as part of a program that contains a -# configuration script generated by Autoconf, you may include it under -# the same distribution terms that you use for the rest of that program. - -# Written by Per Bothner . -# The master version of this file is at the FSF in /home/gd/gnu/lib. -# -# This script attempts to guess a canonical system name similar to -# config.sub. If it succeeds, it prints the system name on stdout, and -# exits with 0. Otherwise, it exits with 1. -# -# The plan is that this can be called by configure scripts if you -# don't specify an explicit system type (host/target name). -# -# Only a few systems have been added to this list; please add others -# (but try to keep the structure clean). -# - -# This is needed to find uname on a Pyramid OSx when run in the BSD universe. -# (ghazi@noc.rutgers.edu 8/24/94.) -if (test -f /.attbin/uname) >/dev/null 2>&1 ; then - PATH=$PATH:/.attbin ; export PATH -fi - -UNAME_MACHINE=`(uname -m) 2>/dev/null` || UNAME_MACHINE=unknown -UNAME_RELEASE=`(uname -r) 2>/dev/null` || UNAME_RELEASE=unknown -UNAME_SYSTEM=`(uname -s) 2>/dev/null` || UNAME_SYSTEM=unknown -UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown - -trap 'rm -f dummy.c dummy.o dummy; exit 1' 1 2 15 - -# Note: order is significant - the case branches are not exclusive. - -case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in - alpha:OSF1:*:*) - if test $UNAME_RELEASE = "V4.0"; then - UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'` - fi - # A Vn.n version is a released version. - # A Tn.n version is a released field test version. - # A Xn.n version is an unreleased experimental baselevel. - # 1.2 uses "1.2" for uname -r. - cat <dummy.s - .globl main - .ent main -main: - .frame \$30,0,\$26,0 - .prologue 0 - .long 0x47e03d80 # implver $0 - lda \$2,259 - .long 0x47e20c21 # amask $2,$1 - srl \$1,8,\$2 - sll \$2,2,\$2 - sll \$0,3,\$0 - addl \$1,\$0,\$0 - addl \$2,\$0,\$0 - ret \$31,(\$26),1 - .end main -EOF - ${CC-cc} dummy.s -o dummy 2>/dev/null - if test "$?" = 0 ; then - ./dummy - case "$?" in - 7) - UNAME_MACHINE="alpha" - ;; - 15) - UNAME_MACHINE="alphaev5" - ;; - 14) - UNAME_MACHINE="alphaev56" - ;; - 10) - UNAME_MACHINE="alphapca56" - ;; - 16) - UNAME_MACHINE="alphaev6" - ;; - esac - fi - rm -f dummy.s dummy - echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[VTX]//' | tr [[A-Z]] [[a-z]]` - exit 0 ;; - 21064:Windows_NT:50:3) - echo alpha-dec-winnt3.5 - exit 0 ;; - Amiga*:UNIX_System_V:4.0:*) - echo m68k-cbm-sysv4 - exit 0;; - amiga:NetBSD:*:*) - echo m68k-cbm-netbsd${UNAME_RELEASE} - exit 0 ;; - amiga:OpenBSD:*:*) - echo m68k-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - *:[Aa]miga[Oo][Ss]:*:*) - echo ${UNAME_MACHINE}-unknown-amigaos - exit 0 ;; - arc64:OpenBSD:*:*) - echo mips64el-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - arc:OpenBSD:*:*) - echo mipsel-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - hkmips:OpenBSD:*:*) - echo mips-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - pmax:OpenBSD:*:*) - echo mipsel-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - sgi:OpenBSD:*:*) - echo mips-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - wgrisc:OpenBSD:*:*) - echo mipsel-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*) - echo arm-acorn-riscix${UNAME_RELEASE} - exit 0;; - arm32:NetBSD:*:*) - echo arm-unknown-netbsd`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'` - exit 0 ;; - SR2?01:HI-UX/MPP:*:*) - echo hppa1.1-hitachi-hiuxmpp - exit 0;; - Pyramid*:OSx*:*:*|MIS*:OSx*:*:*|MIS*:SMP_DC-OSx*:*:*) - # akee@wpdis03.wpafb.af.mil (Earle F. Ake) contributed MIS and NILE. - if test "`(/bin/universe) 2>/dev/null`" = att ; then - echo pyramid-pyramid-sysv3 - else - echo pyramid-pyramid-bsd - fi - exit 0 ;; - NILE:*:*:dcosx) - echo pyramid-pyramid-svr4 - exit 0 ;; - sun4H:SunOS:5.*:*) - echo sparc-hal-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` - exit 0 ;; - sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*) - echo sparc-sun-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` - exit 0 ;; - i86pc:SunOS:5.*:*) - echo i386-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` - exit 0 ;; - sun4*:SunOS:6*:*) - # According to config.sub, this is the proper way to canonicalize - # SunOS6. Hard to guess exactly what SunOS6 will be like, but - # it's likely to be more like Solaris than SunOS4. - echo sparc-sun-solaris3`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` - exit 0 ;; - sun4*:SunOS:*:*) - case "`/usr/bin/arch -k`" in - Series*|S4*) - UNAME_RELEASE=`uname -v` - ;; - esac - # Japanese Language versions have a version number like `4.1.3-JL'. - echo sparc-sun-sunos`echo ${UNAME_RELEASE}|sed -e 's/-/_/'` - exit 0 ;; - sun3*:SunOS:*:*) - echo m68k-sun-sunos${UNAME_RELEASE} - exit 0 ;; - sun*:*:4.2BSD:*) - UNAME_RELEASE=`(head -1 /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null` - test "x${UNAME_RELEASE}" = "x" && UNAME_RELEASE=3 - case "`/bin/arch`" in - sun3) - echo m68k-sun-sunos${UNAME_RELEASE} - ;; - sun4) - echo sparc-sun-sunos${UNAME_RELEASE} - ;; - esac - exit 0 ;; - aushp:SunOS:*:*) - echo sparc-auspex-sunos${UNAME_RELEASE} - exit 0 ;; - atari*:NetBSD:*:*) - echo m68k-atari-netbsd${UNAME_RELEASE} - exit 0 ;; - atari*:OpenBSD:*:*) - echo m68k-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - sun3*:NetBSD:*:*) - echo m68k-sun-netbsd${UNAME_RELEASE} - exit 0 ;; - sun3*:OpenBSD:*:*) - echo m68k-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - mac68k:NetBSD:*:*) - echo m68k-apple-netbsd${UNAME_RELEASE} - exit 0 ;; - mac68k:OpenBSD:*:*) - echo m68k-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - mvme68k:OpenBSD:*:*) - echo m68k-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - mvme88k:OpenBSD:*:*) - echo m88k-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - powerpc:machten:*:*) - echo powerpc-apple-machten${UNAME_RELEASE} - exit 0 ;; - macppc:NetBSD:*:*) - echo powerpc-apple-netbsd${UNAME_RELEASE} - exit 0 ;; - RISC*:Mach:*:*) - echo mips-dec-mach_bsd4.3 - exit 0 ;; - RISC*:ULTRIX:*:*) - echo mips-dec-ultrix${UNAME_RELEASE} - exit 0 ;; - VAX*:ULTRIX*:*:*) - echo vax-dec-ultrix${UNAME_RELEASE} - exit 0 ;; - 2020:CLIX:*:*) - echo clipper-intergraph-clix${UNAME_RELEASE} - exit 0 ;; - mips:*:*:UMIPS | mips:*:*:RISCos) - sed 's/^ //' << EOF >dummy.c - int main (argc, argv) int argc; char **argv; { - #if defined (host_mips) && defined (MIPSEB) - #if defined (SYSTYPE_SYSV) - printf ("mips-mips-riscos%ssysv\n", argv[1]); exit (0); - #endif - #if defined (SYSTYPE_SVR4) - printf ("mips-mips-riscos%ssvr4\n", argv[1]); exit (0); - #endif - #if defined (SYSTYPE_BSD43) || defined(SYSTYPE_BSD) - printf ("mips-mips-riscos%sbsd\n", argv[1]); exit (0); - #endif - #endif - exit (-1); - } -EOF - ${CC-cc} dummy.c -o dummy \ - && ./dummy `echo "${UNAME_RELEASE}" | sed -n 's/\([0-9]*\).*/\1/p'` \ - && rm dummy.c dummy && exit 0 - rm -f dummy.c dummy - echo mips-mips-riscos${UNAME_RELEASE} - exit 0 ;; - Night_Hawk:Power_UNIX:*:*) - echo powerpc-harris-powerunix - exit 0 ;; - m88k:CX/UX:7*:*) - echo m88k-harris-cxux7 - exit 0 ;; - m88k:*:4*:R4*) - echo m88k-motorola-sysv4 - exit 0 ;; - m88k:*:3*:R3*) - echo m88k-motorola-sysv3 - exit 0 ;; - AViiON:dgux:*:*) - # DG/UX returns AViiON for all architectures - UNAME_PROCESSOR=`/usr/bin/uname -p` - if [ $UNAME_PROCESSOR = mc88100 -o $UNAME_PROCESSOR = mc88110 ] ; then - if [ ${TARGET_BINARY_INTERFACE}x = m88kdguxelfx \ - -o ${TARGET_BINARY_INTERFACE}x = x ] ; then - echo m88k-dg-dgux${UNAME_RELEASE} - else - echo m88k-dg-dguxbcs${UNAME_RELEASE} - fi - else echo i586-dg-dgux${UNAME_RELEASE} - fi - exit 0 ;; - M88*:DolphinOS:*:*) # DolphinOS (SVR3) - echo m88k-dolphin-sysv3 - exit 0 ;; - M88*:*:R3*:*) - # Delta 88k system running SVR3 - echo m88k-motorola-sysv3 - exit 0 ;; - XD88*:*:*:*) # Tektronix XD88 system running UTekV (SVR3) - echo m88k-tektronix-sysv3 - exit 0 ;; - Tek43[0-9][0-9]:UTek:*:*) # Tektronix 4300 system running UTek (BSD) - echo m68k-tektronix-bsd - exit 0 ;; - *:IRIX*:*:*) - echo mips-sgi-irix`echo ${UNAME_RELEASE}|sed -e 's/-/_/g'` - exit 0 ;; - ????????:AIX?:[12].1:2) # AIX 2.2.1 or AIX 2.1.1 is RT/PC AIX. - echo romp-ibm-aix # uname -m gives an 8 hex-code CPU id - exit 0 ;; # Note that: echo "'`uname -s`'" gives 'AIX ' - i?86:AIX:*:*) - echo i386-ibm-aix - exit 0 ;; - *:AIX:2:3) - if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then - sed 's/^ //' << EOF >dummy.c - #include - - main() - { - if (!__power_pc()) - exit(1); - puts("powerpc-ibm-aix3.2.5"); - exit(0); - } -EOF - ${CC-cc} dummy.c -o dummy && ./dummy && rm dummy.c dummy && exit 0 - rm -f dummy.c dummy - echo rs6000-ibm-aix3.2.5 - elif grep bos324 /usr/include/stdio.h >/dev/null 2>&1; then - echo rs6000-ibm-aix3.2.4 - else - echo rs6000-ibm-aix3.2 - fi - exit 0 ;; - *:AIX:*:4) - IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | head -1 | awk '{ print $1 }'` - if /usr/sbin/lsattr -EHl ${IBM_CPU_ID} | grep POWER >/dev/null 2>&1; then - IBM_ARCH=rs6000 - else - IBM_ARCH=powerpc - fi - if [ -x /usr/bin/oslevel ] ; then - IBM_REV=`/usr/bin/oslevel` - else - IBM_REV=4.${UNAME_RELEASE} - fi - echo ${IBM_ARCH}-ibm-aix${IBM_REV} - exit 0 ;; - *:AIX:*:*) - echo rs6000-ibm-aix - exit 0 ;; - ibmrt:4.4BSD:*|romp-ibm:BSD:*) - echo romp-ibm-bsd4.4 - exit 0 ;; - ibmrt:*BSD:*|romp-ibm:BSD:*) # covers RT/PC NetBSD and - echo romp-ibm-bsd${UNAME_RELEASE} # 4.3 with uname added to - exit 0 ;; # report: romp-ibm BSD 4.3 - *:BOSX:*:*) - echo rs6000-bull-bosx - exit 0 ;; - DPX/2?00:B.O.S.:*:*) - echo m68k-bull-sysv3 - exit 0 ;; - 9000/[34]??:4.3bsd:1.*:*) - echo m68k-hp-bsd - exit 0 ;; - hp300:4.4BSD:*:* | 9000/[34]??:4.3bsd:2.*:*) - echo m68k-hp-bsd4.4 - exit 0 ;; - 9000/[34678]??:HP-UX:*:*) - case "${UNAME_MACHINE}" in - 9000/31? ) HP_ARCH=m68000 ;; - 9000/[34]?? ) HP_ARCH=m68k ;; - 9000/6?? | 9000/7?? | 9000/80[24] | 9000/8?[13679] | 9000/892 ) - sed 's/^ //' << EOF >dummy.c - #include - #include - - int main () - { - #if defined(_SC_KERNEL_BITS) - long bits = sysconf(_SC_KERNEL_BITS); - #endif - long cpu = sysconf (_SC_CPU_VERSION); - - switch (cpu) - { - case CPU_PA_RISC1_0: puts ("hppa1.0"); break; - case CPU_PA_RISC1_1: puts ("hppa1.1"); break; - case CPU_PA_RISC2_0: - #if defined(_SC_KERNEL_BITS) - switch (bits) - { - case 64: puts ("hppa2.0w"); break; - case 32: puts ("hppa2.0n"); break; - default: puts ("hppa2.0"); break; - } break; - #else /* !defined(_SC_KERNEL_BITS) */ - puts ("hppa2.0"); break; - #endif - default: puts ("hppa1.0"); break; - } - exit (0); - } -EOF - (${CC-cc} dummy.c -o dummy 2>/dev/null ) && HP_ARCH=`./dummy` - rm -f dummy.c dummy - esac - HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'` - echo ${HP_ARCH}-hp-hpux${HPUX_REV} - exit 0 ;; - 3050*:HI-UX:*:*) - sed 's/^ //' << EOF >dummy.c - #include - int - main () - { - long cpu = sysconf (_SC_CPU_VERSION); - /* The order matters, because CPU_IS_HP_MC68K erroneously returns - true for CPU_PA_RISC1_0. CPU_IS_PA_RISC returns correct - results, however. */ - if (CPU_IS_PA_RISC (cpu)) - { - switch (cpu) - { - case CPU_PA_RISC1_0: puts ("hppa1.0-hitachi-hiuxwe2"); break; - case CPU_PA_RISC1_1: puts ("hppa1.1-hitachi-hiuxwe2"); break; - case CPU_PA_RISC2_0: puts ("hppa2.0-hitachi-hiuxwe2"); break; - default: puts ("hppa-hitachi-hiuxwe2"); break; - } - } - else if (CPU_IS_HP_MC68K (cpu)) - puts ("m68k-hitachi-hiuxwe2"); - else puts ("unknown-hitachi-hiuxwe2"); - exit (0); - } -EOF - ${CC-cc} dummy.c -o dummy && ./dummy && rm dummy.c dummy && exit 0 - rm -f dummy.c dummy - echo unknown-hitachi-hiuxwe2 - exit 0 ;; - 9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:* ) - echo hppa1.1-hp-bsd - exit 0 ;; - 9000/8??:4.3bsd:*:*) - echo hppa1.0-hp-bsd - exit 0 ;; - hp7??:OSF1:*:* | hp8?[79]:OSF1:*:* ) - echo hppa1.1-hp-osf - exit 0 ;; - hp8??:OSF1:*:*) - echo hppa1.0-hp-osf - exit 0 ;; - i?86:OSF1:*:*) - if [ -x /usr/sbin/sysversion ] ; then - echo ${UNAME_MACHINE}-unknown-osf1mk - else - echo ${UNAME_MACHINE}-unknown-osf1 - fi - exit 0 ;; - parisc*:Lites*:*:*) - echo hppa1.1-hp-lites - exit 0 ;; - C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*) - echo c1-convex-bsd - exit 0 ;; - C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*) - if getsysinfo -f scalar_acc - then echo c32-convex-bsd - else echo c2-convex-bsd - fi - exit 0 ;; - C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*) - echo c34-convex-bsd - exit 0 ;; - C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*) - echo c38-convex-bsd - exit 0 ;; - C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*) - echo c4-convex-bsd - exit 0 ;; - CRAY*X-MP:*:*:*) - echo xmp-cray-unicos - exit 0 ;; - CRAY*Y-MP:*:*:*) - echo ymp-cray-unicos${UNAME_RELEASE} - exit 0 ;; - CRAY*[A-Z]90:*:*:*) - echo ${UNAME_MACHINE}-cray-unicos${UNAME_RELEASE} \ - | sed -e 's/CRAY.*\([A-Z]90\)/\1/' \ - -e y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/ - exit 0 ;; - CRAY*TS:*:*:*) - echo t90-cray-unicos${UNAME_RELEASE} - exit 0 ;; - CRAY-2:*:*:*) - echo cray2-cray-unicos - exit 0 ;; - F300:UNIX_System_V:*:*) - FUJITSU_SYS=`uname -p | tr [A-Z] [a-z] | sed -e 's/\///'` - FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'` - echo "f300-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" - exit 0 ;; - F301:UNIX_System_V:*:*) - echo f301-fujitsu-uxpv`echo $UNAME_RELEASE | sed 's/ .*//'` - exit 0 ;; - hp3[0-9][05]:NetBSD:*:*) - echo m68k-hp-netbsd${UNAME_RELEASE} - exit 0 ;; - hp300:OpenBSD:*:*) - echo m68k-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - sparc*:BSD/OS:*:*) - echo sparc-unknown-bsdi${UNAME_RELEASE} - exit 0 ;; - i?86:BSD/386:*:* | *:BSD/OS:*:*) - echo ${UNAME_MACHINE}-pc-bsdi${UNAME_RELEASE} - exit 0 ;; - *:FreeBSD:*:*) - echo ${UNAME_MACHINE}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` - exit 0 ;; - *:NetBSD:*:*) - echo ${UNAME_MACHINE}-unknown-netbsd`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'` - exit 0 ;; - *:OpenBSD:*:*) - echo ${UNAME_MACHINE}-unknown-openbsd`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'` - exit 0 ;; - i*:CYGWIN*:*) - echo ${UNAME_MACHINE}-pc-cygwin - exit 0 ;; - i*:MINGW*:*) - echo ${UNAME_MACHINE}-pc-mingw32 - exit 0 ;; - p*:CYGWIN*:*) - echo powerpcle-unknown-cygwin - exit 0 ;; - prep*:SunOS:5.*:*) - echo powerpcle-unknown-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` - exit 0 ;; - *:GNU:*:*) - echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-gnu`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'` - exit 0 ;; - *:Linux:*:*) - # uname on the ARM produces all sorts of strangeness, and we need to - # filter it out. - case "$UNAME_MACHINE" in - arm* | sa110*) UNAME_MACHINE="arm" ;; - esac - - # The BFD linker knows what the default object file format is, so - # first see if it will tell us. - ld_help_string=`ld --help 2>&1` - ld_supported_emulations=`echo $ld_help_string \ - | sed -ne '/supported emulations:/!d - s/[ ][ ]*/ /g - s/.*supported emulations: *// - s/ .*// - p'` - case "$ld_supported_emulations" in - i?86linux) echo "${UNAME_MACHINE}-pc-linux-gnuaout" ; exit 0 ;; - i?86coff) echo "${UNAME_MACHINE}-pc-linux-gnucoff" ; exit 0 ;; - sparclinux) echo "${UNAME_MACHINE}-unknown-linux-gnuaout" ; exit 0 ;; - armlinux) echo "${UNAME_MACHINE}-unknown-linux-gnuaout" ; exit 0 ;; - m68klinux) echo "${UNAME_MACHINE}-unknown-linux-gnuaout" ; exit 0 ;; - elf32ppc) echo "powerpc-unknown-linux-gnu" ; exit 0 ;; - esac - - if test "${UNAME_MACHINE}" = "alpha" ; then - sed 's/^ //' <dummy.s - .globl main - .ent main - main: - .frame \$30,0,\$26,0 - .prologue 0 - .long 0x47e03d80 # implver $0 - lda \$2,259 - .long 0x47e20c21 # amask $2,$1 - srl \$1,8,\$2 - sll \$2,2,\$2 - sll \$0,3,\$0 - addl \$1,\$0,\$0 - addl \$2,\$0,\$0 - ret \$31,(\$26),1 - .end main -EOF - LIBC="" - ${CC-cc} dummy.s -o dummy 2>/dev/null - if test "$?" = 0 ; then - ./dummy - case "$?" in - 7) - UNAME_MACHINE="alpha" - ;; - 15) - UNAME_MACHINE="alphaev5" - ;; - 14) - UNAME_MACHINE="alphaev56" - ;; - 10) - UNAME_MACHINE="alphapca56" - ;; - 16) - UNAME_MACHINE="alphaev6" - ;; - esac - - objdump --private-headers dummy | \ - grep ld.so.1 > /dev/null - if test "$?" = 0 ; then - LIBC="libc1" - fi - fi - rm -f dummy.s dummy - echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC} ; exit 0 - elif test "${UNAME_MACHINE}" = "mips" ; then - cat >dummy.c </dev/null && ./dummy "${UNAME_MACHINE}" && rm dummy.c dummy && exit 0 - rm -f dummy.c dummy - else - # Either a pre-BFD a.out linker (linux-gnuoldld) - # or one that does not give us useful --help. - # GCC wants to distinguish between linux-gnuoldld and linux-gnuaout. - # If ld does not provide *any* "supported emulations:" - # that means it is gnuoldld. - echo "$ld_help_string" | grep >/dev/null 2>&1 "supported emulations:" - test $? != 0 && echo "${UNAME_MACHINE}-pc-linux-gnuoldld" && exit 0 - - case "${UNAME_MACHINE}" in - i?86) - VENDOR=pc; - ;; - *) - VENDOR=unknown; - ;; - esac - # Determine whether the default compiler is a.out or elf - cat >dummy.c < -main(argc, argv) - int argc; - char *argv[]; -{ -#ifdef __ELF__ -# ifdef __GLIBC__ -# if __GLIBC__ >= 2 - printf ("%s-${VENDOR}-linux-gnu\n", argv[1]); -# else - printf ("%s-${VENDOR}-linux-gnulibc1\n", argv[1]); -# endif -# else - printf ("%s-${VENDOR}-linux-gnulibc1\n", argv[1]); -# endif -#else - printf ("%s-${VENDOR}-linux-gnuaout\n", argv[1]); -#endif - return 0; -} -EOF - ${CC-cc} dummy.c -o dummy 2>/dev/null && ./dummy "${UNAME_MACHINE}" && rm dummy.c dummy && exit 0 - rm -f dummy.c dummy - fi ;; -# ptx 4.0 does uname -s correctly, with DYNIX/ptx in there. earlier versions -# are messed up and put the nodename in both sysname and nodename. - i?86:DYNIX/ptx:4*:*) - echo i386-sequent-sysv4 - exit 0 ;; - i?86:UNIX_SV:4.2MP:2.*) - # Unixware is an offshoot of SVR4, but it has its own version - # number series starting with 2... - # I am not positive that other SVR4 systems won't match this, - # I just have to hope. -- rms. - # Use sysv4.2uw... so that sysv4* matches it. - echo ${UNAME_MACHINE}-pc-sysv4.2uw${UNAME_VERSION} - exit 0 ;; - i?86:*:4.*:* | i?86:SYSTEM_V:4.*:*) - if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then - echo ${UNAME_MACHINE}-univel-sysv${UNAME_RELEASE} - else - echo ${UNAME_MACHINE}-pc-sysv${UNAME_RELEASE} - fi - exit 0 ;; - i?86:*:3.2:*) - if test -f /usr/options/cb.name; then - UNAME_REL=`sed -n 's/.*Version //p' /dev/null >/dev/null ; then - UNAME_REL=`(/bin/uname -X|egrep Release|sed -e 's/.*= //')` - (/bin/uname -X|egrep i80486 >/dev/null) && UNAME_MACHINE=i486 - (/bin/uname -X|egrep '^Machine.*Pentium' >/dev/null) \ - && UNAME_MACHINE=i586 - echo ${UNAME_MACHINE}-pc-sco$UNAME_REL - else - echo ${UNAME_MACHINE}-pc-sysv32 - fi - exit 0 ;; - i?86:UnixWare:*:*) - if /bin/uname -X 2>/dev/null >/dev/null ; then - (/bin/uname -X|egrep '^Machine.*Pentium' >/dev/null) \ - && UNAME_MACHINE=i586 - fi - echo ${UNAME_MACHINE}-unixware-${UNAME_RELEASE}-${UNAME_VERSION} - exit 0 ;; - pc:*:*:*) - # uname -m prints for DJGPP always 'pc', but it prints nothing about - # the processor, so we play safe by assuming i386. - echo i386-pc-msdosdjgpp - exit 0 ;; - Intel:Mach:3*:*) - echo i386-pc-mach3 - exit 0 ;; - paragon:*:*:*) - echo i860-intel-osf1 - exit 0 ;; - i860:*:4.*:*) # i860-SVR4 - if grep Stardent /usr/include/sys/uadmin.h >/dev/null 2>&1 ; then - echo i860-stardent-sysv${UNAME_RELEASE} # Stardent Vistra i860-SVR4 - else # Add other i860-SVR4 vendors below as they are discovered. - echo i860-unknown-sysv${UNAME_RELEASE} # Unknown i860-SVR4 - fi - exit 0 ;; - mini*:CTIX:SYS*5:*) - # "miniframe" - echo m68010-convergent-sysv - exit 0 ;; - M68*:*:R3V[567]*:*) - test -r /sysV68 && echo 'm68k-motorola-sysv' && exit 0 ;; - 3[34]??:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 4850:*:4.0:3.0) - OS_REL='' - test -r /etc/.relid \ - && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid` - /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ - && echo i486-ncr-sysv4.3${OS_REL} && exit 0 - /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \ - && echo i586-ncr-sysv4.3${OS_REL} && exit 0 ;; - 3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*) - /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ - && echo i486-ncr-sysv4 && exit 0 ;; - m68*:LynxOS:2.*:*) - echo m68k-unknown-lynxos${UNAME_RELEASE} - exit 0 ;; - mc68030:UNIX_System_V:4.*:*) - echo m68k-atari-sysv4 - exit 0 ;; - i?86:LynxOS:2.*:*) - echo i386-unknown-lynxos${UNAME_RELEASE} - exit 0 ;; - TSUNAMI:LynxOS:2.*:*) - echo sparc-unknown-lynxos${UNAME_RELEASE} - exit 0 ;; - rs6000:LynxOS:2.*:* | PowerPC:LynxOS:2.*:*) - echo rs6000-unknown-lynxos${UNAME_RELEASE} - exit 0 ;; - SM[BE]S:UNIX_SV:*:*) - echo mips-dde-sysv${UNAME_RELEASE} - exit 0 ;; - RM*:SINIX-*:*:*) - echo mips-sni-sysv4 - exit 0 ;; - *:SINIX-*:*:*) - if uname -p 2>/dev/null >/dev/null ; then - UNAME_MACHINE=`(uname -p) 2>/dev/null` - echo ${UNAME_MACHINE}-sni-sysv4 - else - echo ns32k-sni-sysv - fi - exit 0 ;; - PENTIUM:CPunix:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort - # says - echo i586-unisys-sysv4 - exit 0 ;; - *:UNIX_System_V:4*:FTX*) - # From Gerald Hewes . - # How about differentiating between stratus architectures? -djm - echo hppa1.1-stratus-sysv4 - exit 0 ;; - *:*:*:FTX*) - # From seanf@swdc.stratus.com. - echo i860-stratus-sysv4 - exit 0 ;; - mc68*:A/UX:*:*) - echo m68k-apple-aux${UNAME_RELEASE} - exit 0 ;; - news*:NEWS-OS:*:6*) - echo mips-sony-newsos6 - exit 0 ;; - R3000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R4000:UNIX_SV:*:*) - if [ -d /usr/nec ]; then - echo mips-nec-sysv${UNAME_RELEASE} - else - echo mips-unknown-sysv${UNAME_RELEASE} - fi - exit 0 ;; - BeBox:BeOS:*:*) # BeOS running on hardware made by Be, PPC only. - echo powerpc-be-beos - exit 0 ;; - BeMac:BeOS:*:*) # BeOS running on Mac or Mac clone, PPC only. - echo powerpc-apple-beos - exit 0 ;; - BePC:BeOS:*:*) # BeOS running on Intel PC compatible. - echo i586-pc-beos - exit 0 ;; -esac - -#echo '(No uname command or uname output not recognized.)' 1>&2 -#echo "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" 1>&2 - -cat >dummy.c < -# include -#endif -main () -{ -#if defined (sony) -#if defined (MIPSEB) - /* BFD wants "bsd" instead of "newsos". Perhaps BFD should be changed, - I don't know.... */ - printf ("mips-sony-bsd\n"); exit (0); -#else -#include - printf ("m68k-sony-newsos%s\n", -#ifdef NEWSOS4 - "4" -#else - "" -#endif - ); exit (0); -#endif -#endif - -#if defined (__arm) && defined (__acorn) && defined (__unix) - printf ("arm-acorn-riscix"); exit (0); -#endif - -#if defined (hp300) && !defined (hpux) - printf ("m68k-hp-bsd\n"); exit (0); -#endif - -#if defined (NeXT) -#if !defined (__ARCHITECTURE__) -#define __ARCHITECTURE__ "m68k" -#endif - int version; - version=`(hostinfo | sed -n 's/.*NeXT Mach \([0-9]*\).*/\1/p') 2>/dev/null`; - printf ("%s-next-nextstep%d\n", __ARCHITECTURE__, version); - exit (0); -#endif - -#if defined (MULTIMAX) || defined (n16) -#if defined (UMAXV) - printf ("ns32k-encore-sysv\n"); exit (0); -#else -#if defined (CMU) - printf ("ns32k-encore-mach\n"); exit (0); -#else - printf ("ns32k-encore-bsd\n"); exit (0); -#endif -#endif -#endif - -#if defined (__386BSD__) - printf ("i386-pc-bsd\n"); exit (0); -#endif - -#if defined (sequent) -#if defined (i386) - printf ("i386-sequent-dynix\n"); exit (0); -#endif -#if defined (ns32000) - printf ("ns32k-sequent-dynix\n"); exit (0); -#endif -#endif - -#if defined (_SEQUENT_) - struct utsname un; - - uname(&un); - - if (strncmp(un.version, "V2", 2) == 0) { - printf ("i386-sequent-ptx2\n"); exit (0); - } - if (strncmp(un.version, "V1", 2) == 0) { /* XXX is V1 correct? */ - printf ("i386-sequent-ptx1\n"); exit (0); - } - printf ("i386-sequent-ptx\n"); exit (0); - -#endif - -#if defined (vax) -#if !defined (ultrix) - printf ("vax-dec-bsd\n"); exit (0); -#else - printf ("vax-dec-ultrix\n"); exit (0); -#endif -#endif - -#if defined (alliant) && defined (i860) - printf ("i860-alliant-bsd\n"); exit (0); -#endif - - exit (1); -} -EOF - -${CC-cc} dummy.c -o dummy 2>/dev/null && ./dummy && rm dummy.c dummy && exit 0 -rm -f dummy.c dummy - -# Apollos put the system type in the environment. - -test -d /usr/apollo && { echo ${ISP}-apollo-${SYSTYPE}; exit 0; } - -# Convex versions that predate uname can use getsysinfo(1) - -if [ -x /usr/convex/getsysinfo ] -then - case `getsysinfo -f cpu_type` in - c1*) - echo c1-convex-bsd - exit 0 ;; - c2*) - if getsysinfo -f scalar_acc - then echo c32-convex-bsd - else echo c2-convex-bsd - fi - exit 0 ;; - c34*) - echo c34-convex-bsd - exit 0 ;; - c38*) - echo c38-convex-bsd - exit 0 ;; - c4*) - echo c4-convex-bsd - exit 0 ;; - esac -fi - -#echo '(Unable to guess system type)' 1>&2 - -exit 1 diff --git a/forester/archive/RIO/others/hmmer/config.sub b/forester/archive/RIO/others/hmmer/config.sub deleted file mode 100755 index 00bea6e..0000000 --- a/forester/archive/RIO/others/hmmer/config.sub +++ /dev/null @@ -1,955 +0,0 @@ -#! /bin/sh -# Configuration validation subroutine script, version 1.1. -# Copyright (C) 1991, 92-97, 1998 Free Software Foundation, Inc. -# This file is (in principle) common to ALL GNU software. -# The presence of a machine in this file suggests that SOME GNU software -# can handle that machine. It does not imply ALL GNU software can. -# -# This file is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, -# Boston, MA 02111-1307, USA. - -# As a special exception to the GNU General Public License, if you -# distribute this file as part of a program that contains a -# configuration script generated by Autoconf, you may include it under -# the same distribution terms that you use for the rest of that program. - -# Configuration subroutine to validate and canonicalize a configuration type. -# Supply the specified configuration type as an argument. -# If it is invalid, we print an error message on stderr and exit with code 1. -# Otherwise, we print the canonical config type on stdout and succeed. - -# This file is supposed to be the same for all GNU packages -# and recognize all the CPU types, system types and aliases -# that are meaningful with *any* GNU software. -# Each package is responsible for reporting which valid configurations -# it does not support. The user should be able to distinguish -# a failure to support a valid configuration from a meaningless -# configuration. - -# The goal of this file is to map all the various variations of a given -# machine specification into a single specification in the form: -# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM -# or in some cases, the newer four-part form: -# CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM -# It is wrong to echo any other type of specification. - -if [ x$1 = x ] -then - echo Configuration name missing. 1>&2 - echo "Usage: $0 CPU-MFR-OPSYS" 1>&2 - echo "or $0 ALIAS" 1>&2 - echo where ALIAS is a recognized configuration type. 1>&2 - exit 1 -fi - -# First pass through any local machine types. -case $1 in - *local*) - echo $1 - exit 0 - ;; - *) - ;; -esac - -# Separate what the user gave into CPU-COMPANY and OS or KERNEL-OS (if any). -# Here we must recognize all the valid KERNEL-OS combinations. -maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'` -case $maybe_os in - linux-gnu*) - os=-$maybe_os - basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'` - ;; - *) - basic_machine=`echo $1 | sed 's/-[^-]*$//'` - if [ $basic_machine != $1 ] - then os=`echo $1 | sed 's/.*-/-/'` - else os=; fi - ;; -esac - -### Let's recognize common machines as not being operating systems so -### that things like config.sub decstation-3100 work. We also -### recognize some manufacturers as not being operating systems, so we -### can provide default operating systems below. -case $os in - -sun*os*) - # Prevent following clause from handling this invalid input. - ;; - -dec* | -mips* | -sequent* | -encore* | -pc532* | -sgi* | -sony* | \ - -att* | -7300* | -3300* | -delta* | -motorola* | -sun[234]* | \ - -unicom* | -ibm* | -next | -hp | -isi* | -apollo | -altos* | \ - -convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\ - -c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \ - -harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \ - -apple) - os= - basic_machine=$1 - ;; - -hiux*) - os=-hiuxwe2 - ;; - -sco5) - os=sco3.2v5 - basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` - ;; - -sco4) - os=-sco3.2v4 - basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` - ;; - -sco3.2.[4-9]*) - os=`echo $os | sed -e 's/sco3.2./sco3.2v/'` - basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` - ;; - -sco3.2v[4-9]*) - # Don't forget version if it is 3.2v4 or newer. - basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` - ;; - -sco*) - os=-sco3.2v2 - basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` - ;; - -isc) - os=-isc2.2 - basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` - ;; - -clix*) - basic_machine=clipper-intergraph - ;; - -isc*) - basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` - ;; - -lynx*) - os=-lynxos - ;; - -ptx*) - basic_machine=`echo $1 | sed -e 's/86-.*/86-sequent/'` - ;; - -windowsnt*) - os=`echo $os | sed -e 's/windowsnt/winnt/'` - ;; - -psos*) - os=-psos - ;; -esac - -# Decode aliases for certain CPU-COMPANY combinations. -case $basic_machine in - # Recognize the basic CPU types without company name. - # Some are omitted here because they have special meanings below. - tahoe | i860 | m32r | m68k | m68000 | m88k | ns32k | arc | arm \ - | arme[lb] | pyramid | mn10200 | mn10300 | tron | a29k \ - | 580 | i960 | h8300 | hppa | hppa1.0 | hppa1.1 | hppa2.0 \ - | alpha | alphaev5 | alphaev56 | we32k | ns16k | clipper \ - | i370 | sh | powerpc | powerpcle | 1750a | dsp16xx | pdp11 \ - | mips64 | mipsel | mips64el | mips64orion | mips64orionel \ - | mipstx39 | mipstx39el \ - | sparc | sparclet | sparclite | sparc64 | v850) - basic_machine=$basic_machine-unknown - ;; - # We use `pc' rather than `unknown' - # because (1) that's what they normally are, and - # (2) the word "unknown" tends to confuse beginning users. - i[34567]86) - basic_machine=$basic_machine-pc - ;; - # Object if more than one company name word. - *-*-*) - echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2 - exit 1 - ;; - # Recognize the basic CPU types with company name. - vax-* | tahoe-* | i[34567]86-* | i860-* | m32r-* | m68k-* | m68000-* \ - | m88k-* | sparc-* | ns32k-* | fx80-* | arc-* | arm-* | c[123]* \ - | mips-* | pyramid-* | tron-* | a29k-* | romp-* | rs6000-* \ - | power-* | none-* | 580-* | cray2-* | h8300-* | i960-* \ - | xmp-* | ymp-* | hppa-* | hppa1.0-* | hppa1.1-* | hppa2.0-* \ - | alpha-* | alphaev5-* | alphaev56-* | we32k-* | cydra-* \ - | ns16k-* | pn-* | np1-* | xps100-* | clipper-* | orion-* \ - | sparclite-* | pdp11-* | sh-* | powerpc-* | powerpcle-* \ - | sparc64-* | mips64-* | mipsel-* \ - | mips64el-* | mips64orion-* | mips64orionel-* \ - | mipstx39-* | mipstx39el-* \ - | f301-*) - ;; - # Recognize the various machine names and aliases which stand - # for a CPU type and a company and sometimes even an OS. - 3b1 | 7300 | 7300-att | att-7300 | pc7300 | safari | unixpc) - basic_machine=m68000-att - ;; - 3b*) - basic_machine=we32k-att - ;; - alliant | fx80) - basic_machine=fx80-alliant - ;; - altos | altos3068) - basic_machine=m68k-altos - ;; - am29k) - basic_machine=a29k-none - os=-bsd - ;; - amdahl) - basic_machine=580-amdahl - os=-sysv - ;; - amiga | amiga-*) - basic_machine=m68k-cbm - ;; - amigaos | amigados) - basic_machine=m68k-cbm - os=-amigaos - ;; - amigaunix | amix) - basic_machine=m68k-cbm - os=-sysv4 - ;; - apollo68) - basic_machine=m68k-apollo - os=-sysv - ;; - aux) - basic_machine=m68k-apple - os=-aux - ;; - balance) - basic_machine=ns32k-sequent - os=-dynix - ;; - convex-c1) - basic_machine=c1-convex - os=-bsd - ;; - convex-c2) - basic_machine=c2-convex - os=-bsd - ;; - convex-c32) - basic_machine=c32-convex - os=-bsd - ;; - convex-c34) - basic_machine=c34-convex - os=-bsd - ;; - convex-c38) - basic_machine=c38-convex - os=-bsd - ;; - cray | ymp) - basic_machine=ymp-cray - os=-unicos - ;; - cray2) - basic_machine=cray2-cray - os=-unicos - ;; - [ctj]90-cray) - basic_machine=c90-cray - os=-unicos - ;; - crds | unos) - basic_machine=m68k-crds - ;; - da30 | da30-*) - basic_machine=m68k-da30 - ;; - decstation | decstation-3100 | pmax | pmax-* | pmin | dec3100 | decstatn) - basic_machine=mips-dec - ;; - delta | 3300 | motorola-3300 | motorola-delta \ - | 3300-motorola | delta-motorola) - basic_machine=m68k-motorola - ;; - delta88) - basic_machine=m88k-motorola - os=-sysv3 - ;; - dpx20 | dpx20-*) - basic_machine=rs6000-bull - os=-bosx - ;; - dpx2* | dpx2*-bull) - basic_machine=m68k-bull - os=-sysv3 - ;; - ebmon29k) - basic_machine=a29k-amd - os=-ebmon - ;; - elxsi) - basic_machine=elxsi-elxsi - os=-bsd - ;; - encore | umax | mmax) - basic_machine=ns32k-encore - ;; - fx2800) - basic_machine=i860-alliant - ;; - genix) - basic_machine=ns32k-ns - ;; - gmicro) - basic_machine=tron-gmicro - os=-sysv - ;; - h3050r* | hiux*) - basic_machine=hppa1.1-hitachi - os=-hiuxwe2 - ;; - h8300hms) - basic_machine=h8300-hitachi - os=-hms - ;; - harris) - basic_machine=m88k-harris - os=-sysv3 - ;; - hp300-*) - basic_machine=m68k-hp - ;; - hp300bsd) - basic_machine=m68k-hp - os=-bsd - ;; - hp300hpux) - basic_machine=m68k-hp - os=-hpux - ;; - hp9k2[0-9][0-9] | hp9k31[0-9]) - basic_machine=m68000-hp - ;; - hp9k3[2-9][0-9]) - basic_machine=m68k-hp - ;; - hp9k7[0-9][0-9] | hp7[0-9][0-9] | hp9k8[0-9]7 | hp8[0-9]7) - basic_machine=hppa1.1-hp - ;; - hp9k8[0-9][0-9] | hp8[0-9][0-9]) - basic_machine=hppa1.0-hp - ;; - hppa-next) - os=-nextstep3 - ;; - i370-ibm* | ibm*) - basic_machine=i370-ibm - os=-mvs - ;; -# I'm not sure what "Sysv32" means. Should this be sysv3.2? - i[34567]86v32) - basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` - os=-sysv32 - ;; - i[34567]86v4*) - basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` - os=-sysv4 - ;; - i[34567]86v) - basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` - os=-sysv - ;; - i[34567]86sol2) - basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` - os=-solaris2 - ;; - iris | iris4d) - basic_machine=mips-sgi - case $os in - -irix*) - ;; - *) - os=-irix4 - ;; - esac - ;; - isi68 | isi) - basic_machine=m68k-isi - os=-sysv - ;; - m88k-omron*) - basic_machine=m88k-omron - ;; - magnum | m3230) - basic_machine=mips-mips - os=-sysv - ;; - merlin) - basic_machine=ns32k-utek - os=-sysv - ;; - miniframe) - basic_machine=m68000-convergent - ;; - mipsel*-linux*) - basic_machine=mipsel-unknown - os=-linux-gnu - ;; - mips*-linux*) - basic_machine=mips-unknown - os=-linux-gnu - ;; - mips3*-*) - basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'` - ;; - mips3*) - basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`-unknown - ;; - ncr3000) - basic_machine=i486-ncr - os=-sysv4 - ;; - news | news700 | news800 | news900) - basic_machine=m68k-sony - os=-newsos - ;; - news1000) - basic_machine=m68030-sony - os=-newsos - ;; - news-3600 | risc-news) - basic_machine=mips-sony - os=-newsos - ;; - next | m*-next ) - basic_machine=m68k-next - case $os in - -nextstep* ) - ;; - -ns2*) - os=-nextstep2 - ;; - *) - os=-nextstep3 - ;; - esac - ;; - nh3000) - basic_machine=m68k-harris - os=-cxux - ;; - nh[45]000) - basic_machine=m88k-harris - os=-cxux - ;; - nindy960) - basic_machine=i960-intel - os=-nindy - ;; - np1) - basic_machine=np1-gould - ;; - pa-hitachi) - basic_machine=hppa1.1-hitachi - os=-hiuxwe2 - ;; - paragon) - basic_machine=i860-intel - os=-osf - ;; - pbd) - basic_machine=sparc-tti - ;; - pbb) - basic_machine=m68k-tti - ;; - pc532 | pc532-*) - basic_machine=ns32k-pc532 - ;; - pentium | p5 | k5 | nexen) - basic_machine=i586-pc - ;; - pentiumpro | p6 | k6 | 6x86) - basic_machine=i686-pc - ;; - pentiumii | pentium2) - basic_machine=i786-pc - ;; - pentium-* | p5-* | k5-* | nexen-*) - basic_machine=i586-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - pentiumpro-* | p6-* | k6-* | 6x86-*) - basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - pentiumii-* | pentium2-*) - basic_machine=i786-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - pn) - basic_machine=pn-gould - ;; - power) basic_machine=rs6000-ibm - ;; - ppc) basic_machine=powerpc-unknown - ;; - ppc-*) basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - ppcle | powerpclittle | ppc-le | powerpc-little) - basic_machine=powerpcle-unknown - ;; - ppcle-* | powerpclittle-*) - basic_machine=powerpcle-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - ps2) - basic_machine=i386-ibm - ;; - rm[46]00) - basic_machine=mips-siemens - ;; - rtpc | rtpc-*) - basic_machine=romp-ibm - ;; - sequent) - basic_machine=i386-sequent - ;; - sh) - basic_machine=sh-hitachi - os=-hms - ;; - sps7) - basic_machine=m68k-bull - os=-sysv2 - ;; - spur) - basic_machine=spur-unknown - ;; - sun2) - basic_machine=m68000-sun - ;; - sun2os3) - basic_machine=m68000-sun - os=-sunos3 - ;; - sun2os4) - basic_machine=m68000-sun - os=-sunos4 - ;; - sun3os3) - basic_machine=m68k-sun - os=-sunos3 - ;; - sun3os4) - basic_machine=m68k-sun - os=-sunos4 - ;; - sun4os3) - basic_machine=sparc-sun - os=-sunos3 - ;; - sun4os4) - basic_machine=sparc-sun - os=-sunos4 - ;; - sun4sol2) - basic_machine=sparc-sun - os=-solaris2 - ;; - sun3 | sun3-*) - basic_machine=m68k-sun - ;; - sun4) - basic_machine=sparc-sun - ;; - sun386 | sun386i | roadrunner) - basic_machine=i386-sun - ;; - symmetry) - basic_machine=i386-sequent - os=-dynix - ;; - tx39) - basic_machine=mipstx39-unknown - ;; - tx39el) - basic_machine=mipstx39el-unknown - ;; - tower | tower-32) - basic_machine=m68k-ncr - ;; - udi29k) - basic_machine=a29k-amd - os=-udi - ;; - ultra3) - basic_machine=a29k-nyu - os=-sym1 - ;; - vaxv) - basic_machine=vax-dec - os=-sysv - ;; - vms) - basic_machine=vax-dec - os=-vms - ;; - vpp*|vx|vx-*) - basic_machine=f301-fujitsu - ;; - vxworks960) - basic_machine=i960-wrs - os=-vxworks - ;; - vxworks68) - basic_machine=m68k-wrs - os=-vxworks - ;; - vxworks29k) - basic_machine=a29k-wrs - os=-vxworks - ;; - xmp) - basic_machine=xmp-cray - os=-unicos - ;; - xps | xps100) - basic_machine=xps100-honeywell - ;; - none) - basic_machine=none-none - os=-none - ;; - -# Here we handle the default manufacturer of certain CPU types. It is in -# some cases the only manufacturer, in others, it is the most popular. - mips) - if [ x$os = x-linux-gnu ]; then - basic_machine=mips-unknown - else - basic_machine=mips-mips - fi - ;; - romp) - basic_machine=romp-ibm - ;; - rs6000) - basic_machine=rs6000-ibm - ;; - vax) - basic_machine=vax-dec - ;; - pdp11) - basic_machine=pdp11-dec - ;; - we32k) - basic_machine=we32k-att - ;; - sparc) - basic_machine=sparc-sun - ;; - cydra) - basic_machine=cydra-cydrome - ;; - orion) - basic_machine=orion-highlevel - ;; - orion105) - basic_machine=clipper-highlevel - ;; - *) - echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2 - exit 1 - ;; -esac - -# Here we canonicalize certain aliases for manufacturers. -case $basic_machine in - *-digital*) - basic_machine=`echo $basic_machine | sed 's/digital.*/dec/'` - ;; - *-commodore*) - basic_machine=`echo $basic_machine | sed 's/commodore.*/cbm/'` - ;; - *) - ;; -esac - -# Decode manufacturer-specific aliases for certain operating systems. - -if [ x"$os" != x"" ] -then -case $os in - # First match some system type aliases - # that might get confused with valid system types. - # -solaris* is a basic system type, with this one exception. - -solaris1 | -solaris1.*) - os=`echo $os | sed -e 's|solaris1|sunos4|'` - ;; - -solaris) - os=-solaris2 - ;; - -svr4*) - os=-sysv4 - ;; - -unixware*) - os=-sysv4.2uw - ;; - -gnu/linux*) - os=`echo $os | sed -e 's|gnu/linux|linux-gnu|'` - ;; - # First accept the basic system types. - # The portable systems comes first. - # Each alternative MUST END IN A *, to match a version number. - # -sysv* is not here because it comes later, after sysvr4. - -gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \ - | -*vms* | -sco* | -esix* | -isc* | -aix* | -sunos | -sunos[34]*\ - | -hpux* | -unos* | -osf* | -luna* | -dgux* | -solaris* | -sym* \ - | -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \ - | -aos* \ - | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \ - | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \ - | -hiux* | -386bsd* | -netbsd* | -openbsd* | -freebsd* | -riscix* \ - | -lynxos* | -bosx* | -nextstep* | -cxux* | -aout* | -elf* \ - | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \ - | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \ - | -cygwin* | -pe* | -psos* | -moss* | -proelf* | -rtems* \ - | -mingw32* | -linux-gnu* | -uxpv* | -beos*) - # Remember, each alternative MUST END IN *, to match a version number. - ;; - -linux*) - os=`echo $os | sed -e 's|linux|linux-gnu|'` - ;; - -sunos5*) - os=`echo $os | sed -e 's|sunos5|solaris2|'` - ;; - -sunos6*) - os=`echo $os | sed -e 's|sunos6|solaris3|'` - ;; - -osfrose*) - os=-osfrose - ;; - -osf*) - os=-osf - ;; - -utek*) - os=-bsd - ;; - -dynix*) - os=-bsd - ;; - -acis*) - os=-aos - ;; - -ctix* | -uts*) - os=-sysv - ;; - -ns2 ) - os=-nextstep2 - ;; - # Preserve the version number of sinix5. - -sinix5.*) - os=`echo $os | sed -e 's|sinix|sysv|'` - ;; - -sinix*) - os=-sysv4 - ;; - -triton*) - os=-sysv3 - ;; - -oss*) - os=-sysv3 - ;; - -svr4) - os=-sysv4 - ;; - -svr3) - os=-sysv3 - ;; - -sysvr4) - os=-sysv4 - ;; - # This must come after -sysvr4. - -sysv*) - ;; - -xenix) - os=-xenix - ;; - -none) - ;; - *) - # Get rid of the `-' at the beginning of $os. - os=`echo $os | sed 's/[^-]*-//'` - echo Invalid configuration \`$1\': system \`$os\' not recognized 1>&2 - exit 1 - ;; -esac -else - -# Here we handle the default operating systems that come with various machines. -# The value should be what the vendor currently ships out the door with their -# machine or put another way, the most popular os provided with the machine. - -# Note that if you're going to try to match "-MANUFACTURER" here (say, -# "-sun"), then you have to tell the case statement up towards the top -# that MANUFACTURER isn't an operating system. Otherwise, code above -# will signal an error saying that MANUFACTURER isn't an operating -# system, and we'll never get to this point. - -case $basic_machine in - *-acorn) - os=-riscix1.2 - ;; - arm*-semi) - os=-aout - ;; - pdp11-*) - os=-none - ;; - *-dec | vax-*) - os=-ultrix4.2 - ;; - m68*-apollo) - os=-domain - ;; - i386-sun) - os=-sunos4.0.2 - ;; - m68000-sun) - os=-sunos3 - # This also exists in the configure program, but was not the - # default. - # os=-sunos4 - ;; - *-tti) # must be before sparc entry or we get the wrong os. - os=-sysv3 - ;; - sparc-* | *-sun) - os=-sunos4.1.1 - ;; - *-be) - os=-beos - ;; - *-ibm) - os=-aix - ;; - *-hp) - os=-hpux - ;; - *-hitachi) - os=-hiux - ;; - i860-* | *-att | *-ncr | *-altos | *-motorola | *-convergent) - os=-sysv - ;; - *-cbm) - os=-amigaos - ;; - *-dg) - os=-dgux - ;; - *-dolphin) - os=-sysv3 - ;; - m68k-ccur) - os=-rtu - ;; - m88k-omron*) - os=-luna - ;; - *-next ) - os=-nextstep - ;; - *-sequent) - os=-ptx - ;; - *-crds) - os=-unos - ;; - *-ns) - os=-genix - ;; - i370-*) - os=-mvs - ;; - *-next) - os=-nextstep3 - ;; - *-gould) - os=-sysv - ;; - *-highlevel) - os=-bsd - ;; - *-encore) - os=-bsd - ;; - *-sgi) - os=-irix - ;; - *-siemens) - os=-sysv4 - ;; - *-masscomp) - os=-rtu - ;; - f301-fujitsu) - os=-uxpv - ;; - *) - os=-none - ;; -esac -fi - -# Here we handle the case where we know the os, and the CPU type, but not the -# manufacturer. We pick the logical manufacturer. -vendor=unknown -case $basic_machine in - *-unknown) - case $os in - -riscix*) - vendor=acorn - ;; - -sunos*) - vendor=sun - ;; - -aix*) - vendor=ibm - ;; - -hpux*) - vendor=hp - ;; - -hiux*) - vendor=hitachi - ;; - -unos*) - vendor=crds - ;; - -dgux*) - vendor=dg - ;; - -luna*) - vendor=omron - ;; - -genix*) - vendor=ns - ;; - -mvs*) - vendor=ibm - ;; - -ptx*) - vendor=sequent - ;; - -vxsim* | -vxworks*) - vendor=wrs - ;; - -aux*) - vendor=apple - ;; - esac - basic_machine=`echo $basic_machine | sed "s/unknown/$vendor/"` - ;; -esac - -echo $basic_machine$os diff --git a/forester/archive/RIO/others/hmmer/configure b/forester/archive/RIO/others/hmmer/configure deleted file mode 100755 index e5b79e4..0000000 --- a/forester/archive/RIO/others/hmmer/configure +++ /dev/null @@ -1,2509 +0,0 @@ -#! /bin/sh - -# Guess values for system-dependent variables and create Makefiles. -# Generated automatically using autoconf version 2.13 -# Copyright (C) 1992, 93, 94, 95, 96 Free Software Foundation, Inc. -# -# This configure script is free software; the Free Software Foundation -# gives unlimited permission to copy, distribute and modify it. - -# Defaults: -ac_help= -ac_default_prefix=/usr/local -# Any additions from configure.in: -ac_help="$ac_help - --with-pvm enable PVM, Parallel Virtual Machine" -ac_help="$ac_help - --disable-threads disable POSIX threads support" - -# Initialize some variables set by options. -# The variables have the same names as the options, with -# dashes changed to underlines. -build=NONE -cache_file=./config.cache -exec_prefix=NONE -host=NONE -no_create= -nonopt=NONE -no_recursion= -prefix=NONE -program_prefix=NONE -program_suffix=NONE -program_transform_name=s,x,x, -silent= -site= -srcdir= -target=NONE -verbose= -x_includes=NONE -x_libraries=NONE -bindir='${exec_prefix}/bin' -sbindir='${exec_prefix}/sbin' -libexecdir='${exec_prefix}/libexec' -datadir='${prefix}/share' -sysconfdir='${prefix}/etc' -sharedstatedir='${prefix}/com' -localstatedir='${prefix}/var' -libdir='${exec_prefix}/lib' -includedir='${prefix}/include' -oldincludedir='/usr/include' -infodir='${prefix}/info' -mandir='${prefix}/man' - -# Initialize some other variables. -subdirs= -MFLAGS= MAKEFLAGS= -SHELL=${CONFIG_SHELL-/bin/sh} -# Maximum number of lines to put in a shell here document. -ac_max_here_lines=12 - -ac_prev= -for ac_option -do - - # If the previous option needs an argument, assign it. - if test -n "$ac_prev"; then - eval "$ac_prev=\$ac_option" - ac_prev= - continue - fi - - case "$ac_option" in - -*=*) ac_optarg=`echo "$ac_option" | sed 's/[-_a-zA-Z0-9]*=//'` ;; - *) ac_optarg= ;; - esac - - # Accept the important Cygnus configure options, so we can diagnose typos. - - case "$ac_option" in - - -bindir | --bindir | --bindi | --bind | --bin | --bi) - ac_prev=bindir ;; - -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*) - bindir="$ac_optarg" ;; - - -build | --build | --buil | --bui | --bu) - ac_prev=build ;; - -build=* | --build=* | --buil=* | --bui=* | --bu=*) - build="$ac_optarg" ;; - - -cache-file | --cache-file | --cache-fil | --cache-fi \ - | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c) - ac_prev=cache_file ;; - -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \ - | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*) - cache_file="$ac_optarg" ;; - - -datadir | --datadir | --datadi | --datad | --data | --dat | --da) - ac_prev=datadir ;; - -datadir=* | --datadir=* | --datadi=* | --datad=* | --data=* | --dat=* \ - | --da=*) - datadir="$ac_optarg" ;; - - -disable-* | --disable-*) - ac_feature=`echo $ac_option|sed -e 's/-*disable-//'` - # Reject names that are not valid shell variable names. - if test -n "`echo $ac_feature| sed 's/[-a-zA-Z0-9_]//g'`"; then - { echo "configure: error: $ac_feature: invalid feature name" 1>&2; exit 1; } - fi - ac_feature=`echo $ac_feature| sed 's/-/_/g'` - eval "enable_${ac_feature}=no" ;; - - -enable-* | --enable-*) - ac_feature=`echo $ac_option|sed -e 's/-*enable-//' -e 's/=.*//'` - # Reject names that are not valid shell variable names. - if test -n "`echo $ac_feature| sed 's/[-_a-zA-Z0-9]//g'`"; then - { echo "configure: error: $ac_feature: invalid feature name" 1>&2; exit 1; } - fi - ac_feature=`echo $ac_feature| sed 's/-/_/g'` - case "$ac_option" in - *=*) ;; - *) ac_optarg=yes ;; - esac - eval "enable_${ac_feature}='$ac_optarg'" ;; - - -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \ - | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \ - | --exec | --exe | --ex) - ac_prev=exec_prefix ;; - -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \ - | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \ - | --exec=* | --exe=* | --ex=*) - exec_prefix="$ac_optarg" ;; - - -gas | --gas | --ga | --g) - # Obsolete; use --with-gas. - with_gas=yes ;; - - -help | --help | --hel | --he) - # Omit some internal or obsolete options to make the list less imposing. - # This message is too long to be a string in the A/UX 3.1 sh. - cat << EOF -Usage: configure [options] [host] -Options: [defaults in brackets after descriptions] -Configuration: - --cache-file=FILE cache test results in FILE - --help print this message - --no-create do not create output files - --quiet, --silent do not print \`checking...' messages - --version print the version of autoconf that created configure -Directory and file names: - --prefix=PREFIX install architecture-independent files in PREFIX - [$ac_default_prefix] - --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX - [same as prefix] - --bindir=DIR user executables in DIR [EPREFIX/bin] - --sbindir=DIR system admin executables in DIR [EPREFIX/sbin] - --libexecdir=DIR program executables in DIR [EPREFIX/libexec] - --datadir=DIR read-only architecture-independent data in DIR - [PREFIX/share] - --sysconfdir=DIR read-only single-machine data in DIR [PREFIX/etc] - --sharedstatedir=DIR modifiable architecture-independent data in DIR - [PREFIX/com] - --localstatedir=DIR modifiable single-machine data in DIR [PREFIX/var] - --libdir=DIR object code libraries in DIR [EPREFIX/lib] - --includedir=DIR C header files in DIR [PREFIX/include] - --oldincludedir=DIR C header files for non-gcc in DIR [/usr/include] - --infodir=DIR info documentation in DIR [PREFIX/info] - --mandir=DIR man documentation in DIR [PREFIX/man] - --srcdir=DIR find the sources in DIR [configure dir or ..] - --program-prefix=PREFIX prepend PREFIX to installed program names - --program-suffix=SUFFIX append SUFFIX to installed program names - --program-transform-name=PROGRAM - run sed PROGRAM on installed program names -EOF - cat << EOF -Host type: - --build=BUILD configure for building on BUILD [BUILD=HOST] - --host=HOST configure for HOST [guessed] - --target=TARGET configure for TARGET [TARGET=HOST] -Features and packages: - --disable-FEATURE do not include FEATURE (same as --enable-FEATURE=no) - --enable-FEATURE[=ARG] include FEATURE [ARG=yes] - --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] - --without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no) - --x-includes=DIR X include files are in DIR - --x-libraries=DIR X library files are in DIR -EOF - if test -n "$ac_help"; then - echo "--enable and --with options recognized:$ac_help" - fi - exit 0 ;; - - -host | --host | --hos | --ho) - ac_prev=host ;; - -host=* | --host=* | --hos=* | --ho=*) - host="$ac_optarg" ;; - - -includedir | --includedir | --includedi | --included | --include \ - | --includ | --inclu | --incl | --inc) - ac_prev=includedir ;; - -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \ - | --includ=* | --inclu=* | --incl=* | --inc=*) - includedir="$ac_optarg" ;; - - -infodir | --infodir | --infodi | --infod | --info | --inf) - ac_prev=infodir ;; - -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*) - infodir="$ac_optarg" ;; - - -libdir | --libdir | --libdi | --libd) - ac_prev=libdir ;; - -libdir=* | --libdir=* | --libdi=* | --libd=*) - libdir="$ac_optarg" ;; - - -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \ - | --libexe | --libex | --libe) - ac_prev=libexecdir ;; - -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \ - | --libexe=* | --libex=* | --libe=*) - libexecdir="$ac_optarg" ;; - - -localstatedir | --localstatedir | --localstatedi | --localstated \ - | --localstate | --localstat | --localsta | --localst \ - | --locals | --local | --loca | --loc | --lo) - ac_prev=localstatedir ;; - -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \ - | --localstate=* | --localstat=* | --localsta=* | --localst=* \ - | --locals=* | --local=* | --loca=* | --loc=* | --lo=*) - localstatedir="$ac_optarg" ;; - - -mandir | --mandir | --mandi | --mand | --man | --ma | --m) - ac_prev=mandir ;; - -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*) - mandir="$ac_optarg" ;; - - -nfp | --nfp | --nf) - # Obsolete; use --without-fp. - with_fp=no ;; - - -no-create | --no-create | --no-creat | --no-crea | --no-cre \ - | --no-cr | --no-c) - no_create=yes ;; - - -no-recursion | --no-recursion | --no-recursio | --no-recursi \ - | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) - no_recursion=yes ;; - - -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \ - | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \ - | --oldin | --oldi | --old | --ol | --o) - ac_prev=oldincludedir ;; - -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \ - | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \ - | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*) - oldincludedir="$ac_optarg" ;; - - -prefix | --prefix | --prefi | --pref | --pre | --pr | --p) - ac_prev=prefix ;; - -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*) - prefix="$ac_optarg" ;; - - -program-prefix | --program-prefix | --program-prefi | --program-pref \ - | --program-pre | --program-pr | --program-p) - ac_prev=program_prefix ;; - -program-prefix=* | --program-prefix=* | --program-prefi=* \ - | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*) - program_prefix="$ac_optarg" ;; - - -program-suffix | --program-suffix | --program-suffi | --program-suff \ - | --program-suf | --program-su | --program-s) - ac_prev=program_suffix ;; - -program-suffix=* | --program-suffix=* | --program-suffi=* \ - | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*) - program_suffix="$ac_optarg" ;; - - -program-transform-name | --program-transform-name \ - | --program-transform-nam | --program-transform-na \ - | --program-transform-n | --program-transform- \ - | --program-transform | --program-transfor \ - | --program-transfo | --program-transf \ - | --program-trans | --program-tran \ - | --progr-tra | --program-tr | --program-t) - ac_prev=program_transform_name ;; - -program-transform-name=* | --program-transform-name=* \ - | --program-transform-nam=* | --program-transform-na=* \ - | --program-transform-n=* | --program-transform-=* \ - | --program-transform=* | --program-transfor=* \ - | --program-transfo=* | --program-transf=* \ - | --program-trans=* | --program-tran=* \ - | --progr-tra=* | --program-tr=* | --program-t=*) - program_transform_name="$ac_optarg" ;; - - -q | -quiet | --quiet | --quie | --qui | --qu | --q \ - | -silent | --silent | --silen | --sile | --sil) - silent=yes ;; - - -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb) - ac_prev=sbindir ;; - -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \ - | --sbi=* | --sb=*) - sbindir="$ac_optarg" ;; - - -sharedstatedir | --sharedstatedir | --sharedstatedi \ - | --sharedstated | --sharedstate | --sharedstat | --sharedsta \ - | --sharedst | --shareds | --shared | --share | --shar \ - | --sha | --sh) - ac_prev=sharedstatedir ;; - -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \ - | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \ - | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \ - | --sha=* | --sh=*) - sharedstatedir="$ac_optarg" ;; - - -site | --site | --sit) - ac_prev=site ;; - -site=* | --site=* | --sit=*) - site="$ac_optarg" ;; - - -srcdir | --srcdir | --srcdi | --srcd | --src | --sr) - ac_prev=srcdir ;; - -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*) - srcdir="$ac_optarg" ;; - - -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \ - | --syscon | --sysco | --sysc | --sys | --sy) - ac_prev=sysconfdir ;; - -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \ - | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*) - sysconfdir="$ac_optarg" ;; - - -target | --target | --targe | --targ | --tar | --ta | --t) - ac_prev=target ;; - -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*) - target="$ac_optarg" ;; - - -v | -verbose | --verbose | --verbos | --verbo | --verb) - verbose=yes ;; - - -version | --version | --versio | --versi | --vers) - echo "configure generated by autoconf version 2.13" - exit 0 ;; - - -with-* | --with-*) - ac_package=`echo $ac_option|sed -e 's/-*with-//' -e 's/=.*//'` - # Reject names that are not valid shell variable names. - if test -n "`echo $ac_package| sed 's/[-_a-zA-Z0-9]//g'`"; then - { echo "configure: error: $ac_package: invalid package name" 1>&2; exit 1; } - fi - ac_package=`echo $ac_package| sed 's/-/_/g'` - case "$ac_option" in - *=*) ;; - *) ac_optarg=yes ;; - esac - eval "with_${ac_package}='$ac_optarg'" ;; - - -without-* | --without-*) - ac_package=`echo $ac_option|sed -e 's/-*without-//'` - # Reject names that are not valid shell variable names. - if test -n "`echo $ac_package| sed 's/[-a-zA-Z0-9_]//g'`"; then - { echo "configure: error: $ac_package: invalid package name" 1>&2; exit 1; } - fi - ac_package=`echo $ac_package| sed 's/-/_/g'` - eval "with_${ac_package}=no" ;; - - --x) - # Obsolete; use --with-x. - with_x=yes ;; - - -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \ - | --x-incl | --x-inc | --x-in | --x-i) - ac_prev=x_includes ;; - -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \ - | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*) - x_includes="$ac_optarg" ;; - - -x-libraries | --x-libraries | --x-librarie | --x-librari \ - | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l) - ac_prev=x_libraries ;; - -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \ - | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*) - x_libraries="$ac_optarg" ;; - - -*) { echo "configure: error: $ac_option: invalid option; use --help to show usage" 1>&2; exit 1; } - ;; - - *) - if test -n "`echo $ac_option| sed 's/[-a-z0-9.]//g'`"; then - echo "configure: warning: $ac_option: invalid host type" 1>&2 - fi - if test "x$nonopt" != xNONE; then - { echo "configure: error: can only configure for one host and one target at a time" 1>&2; exit 1; } - fi - nonopt="$ac_option" - ;; - - esac -done - -if test -n "$ac_prev"; then - { echo "configure: error: missing argument to --`echo $ac_prev | sed 's/_/-/g'`" 1>&2; exit 1; } -fi - -trap 'rm -fr conftest* confdefs* core core.* *.core $ac_clean_files; exit 1' 1 2 15 - -# File descriptor usage: -# 0 standard input -# 1 file creation -# 2 errors and warnings -# 3 some systems may open it to /dev/tty -# 4 used on the Kubota Titan -# 6 checking for... messages and results -# 5 compiler messages saved in config.log -if test "$silent" = yes; then - exec 6>/dev/null -else - exec 6>&1 -fi -exec 5>./config.log - -echo "\ -This file contains any messages produced by compilers while -running configure, to aid debugging if configure makes a mistake. -" 1>&5 - -# Strip out --no-create and --no-recursion so they do not pile up. -# Also quote any args containing shell metacharacters. -ac_configure_args= -for ac_arg -do - case "$ac_arg" in - -no-create | --no-create | --no-creat | --no-crea | --no-cre \ - | --no-cr | --no-c) ;; - -no-recursion | --no-recursion | --no-recursio | --no-recursi \ - | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) ;; - *" "*|*" "*|*[\[\]\~\#\$\^\&\*\(\)\{\}\\\|\;\<\>\?]*) - ac_configure_args="$ac_configure_args '$ac_arg'" ;; - *) ac_configure_args="$ac_configure_args $ac_arg" ;; - esac -done - -# NLS nuisances. -# Only set these to C if already set. These must not be set unconditionally -# because not all systems understand e.g. LANG=C (notably SCO). -# Fixing LC_MESSAGES prevents Solaris sh from translating var values in `set'! -# Non-C LC_CTYPE values break the ctype check. -if test "${LANG+set}" = set; then LANG=C; export LANG; fi -if test "${LC_ALL+set}" = set; then LC_ALL=C; export LC_ALL; fi -if test "${LC_MESSAGES+set}" = set; then LC_MESSAGES=C; export LC_MESSAGES; fi -if test "${LC_CTYPE+set}" = set; then LC_CTYPE=C; export LC_CTYPE; fi - -# confdefs.h avoids OS command line length limits that DEFS can exceed. -rm -rf conftest* confdefs.h -# AIX cpp loses on an empty file, so make sure it contains at least a newline. -echo > confdefs.h - -# A filename unique to this package, relative to the directory that -# configure is in, which we can look for to find out if srcdir is correct. -ac_unique_file=src/hmmpfam.c - -# Find the source files, if location was not specified. -if test -z "$srcdir"; then - ac_srcdir_defaulted=yes - # Try the directory containing this script, then its parent. - ac_prog=$0 - ac_confdir=`echo $ac_prog|sed 's%/[^/][^/]*$%%'` - test "x$ac_confdir" = "x$ac_prog" && ac_confdir=. - srcdir=$ac_confdir - if test ! -r $srcdir/$ac_unique_file; then - srcdir=.. - fi -else - ac_srcdir_defaulted=no -fi -if test ! -r $srcdir/$ac_unique_file; then - if test "$ac_srcdir_defaulted" = yes; then - { echo "configure: error: can not find sources in $ac_confdir or .." 1>&2; exit 1; } - else - { echo "configure: error: can not find sources in $srcdir" 1>&2; exit 1; } - fi -fi -srcdir=`echo "${srcdir}" | sed 's%\([^/]\)/*$%\1%'` - -# Prefer explicitly selected file to automatically selected ones. -if test -z "$CONFIG_SITE"; then - if test "x$prefix" != xNONE; then - CONFIG_SITE="$prefix/share/config.site $prefix/etc/config.site" - else - CONFIG_SITE="$ac_default_prefix/share/config.site $ac_default_prefix/etc/config.site" - fi -fi -for ac_site_file in $CONFIG_SITE; do - if test -r "$ac_site_file"; then - echo "loading site script $ac_site_file" - . "$ac_site_file" - fi -done - -if test -r "$cache_file"; then - echo "loading cache $cache_file" - . $cache_file -else - echo "creating cache $cache_file" - > $cache_file -fi - -ac_ext=c -# CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options. -ac_cpp='$CPP $CPPFLAGS' -ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5' -ac_link='${CC-cc} -o conftest${ac_exeext} $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5' -cross_compiling=$ac_cv_prog_cc_cross - -ac_exeext= -ac_objext=o -if (echo "testing\c"; echo 1,2,3) | grep c >/dev/null; then - # Stardent Vistra SVR4 grep lacks -e, says ghazi@caip.rutgers.edu. - if (echo -n testing; echo 1,2,3) | sed s/-n/xn/ | grep xn >/dev/null; then - ac_n= ac_c=' -' ac_t=' ' - else - ac_n=-n ac_c= ac_t= - fi -else - ac_n= ac_c='\c' ac_t= -fi - - - -echo " Welcome to HMMER... configuring for your system." - - - - - - - - -# Extract the first word of "gcc", so it can be a program name with args. -set dummy gcc; ac_word=$2 -echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:542: checking for $ac_word" >&5 -if eval "test \"`echo '$''{'ac_cv_prog_CC'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - if test -n "$CC"; then - ac_cv_prog_CC="$CC" # Let the user override the test. -else - IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" - ac_dummy="$PATH" - for ac_dir in $ac_dummy; do - test -z "$ac_dir" && ac_dir=. - if test -f $ac_dir/$ac_word; then - ac_cv_prog_CC="gcc" - break - fi - done - IFS="$ac_save_ifs" -fi -fi -CC="$ac_cv_prog_CC" -if test -n "$CC"; then - echo "$ac_t""$CC" 1>&6 -else - echo "$ac_t""no" 1>&6 -fi - -if test -z "$CC"; then - # Extract the first word of "cc", so it can be a program name with args. -set dummy cc; ac_word=$2 -echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:572: checking for $ac_word" >&5 -if eval "test \"`echo '$''{'ac_cv_prog_CC'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - if test -n "$CC"; then - ac_cv_prog_CC="$CC" # Let the user override the test. -else - IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" - ac_prog_rejected=no - ac_dummy="$PATH" - for ac_dir in $ac_dummy; do - test -z "$ac_dir" && ac_dir=. - if test -f $ac_dir/$ac_word; then - if test "$ac_dir/$ac_word" = "/usr/ucb/cc"; then - ac_prog_rejected=yes - continue - fi - ac_cv_prog_CC="cc" - break - fi - done - IFS="$ac_save_ifs" -if test $ac_prog_rejected = yes; then - # We found a bogon in the path, so make sure we never use it. - set dummy $ac_cv_prog_CC - shift - if test $# -gt 0; then - # We chose a different compiler from the bogus one. - # However, it has the same basename, so the bogon will be chosen - # first if we set CC to just the basename; use the full file name. - shift - set dummy "$ac_dir/$ac_word" "$@" - shift - ac_cv_prog_CC="$@" - fi -fi -fi -fi -CC="$ac_cv_prog_CC" -if test -n "$CC"; then - echo "$ac_t""$CC" 1>&6 -else - echo "$ac_t""no" 1>&6 -fi - - if test -z "$CC"; then - case "`uname -s`" in - *win32* | *WIN32*) - # Extract the first word of "cl", so it can be a program name with args. -set dummy cl; ac_word=$2 -echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:623: checking for $ac_word" >&5 -if eval "test \"`echo '$''{'ac_cv_prog_CC'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - if test -n "$CC"; then - ac_cv_prog_CC="$CC" # Let the user override the test. -else - IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" - ac_dummy="$PATH" - for ac_dir in $ac_dummy; do - test -z "$ac_dir" && ac_dir=. - if test -f $ac_dir/$ac_word; then - ac_cv_prog_CC="cl" - break - fi - done - IFS="$ac_save_ifs" -fi -fi -CC="$ac_cv_prog_CC" -if test -n "$CC"; then - echo "$ac_t""$CC" 1>&6 -else - echo "$ac_t""no" 1>&6 -fi - ;; - esac - fi - test -z "$CC" && { echo "configure: error: no acceptable cc found in \$PATH" 1>&2; exit 1; } -fi - -echo $ac_n "checking whether the C compiler ($CC $CFLAGS $LDFLAGS) works""... $ac_c" 1>&6 -echo "configure:655: checking whether the C compiler ($CC $CFLAGS $LDFLAGS) works" >&5 - -ac_ext=c -# CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options. -ac_cpp='$CPP $CPPFLAGS' -ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5' -ac_link='${CC-cc} -o conftest${ac_exeext} $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5' -cross_compiling=$ac_cv_prog_cc_cross - -cat > conftest.$ac_ext << EOF - -#line 666 "configure" -#include "confdefs.h" - -main(){return(0);} -EOF -if { (eval echo configure:671: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then - ac_cv_prog_cc_works=yes - # If we can't run a trivial program, we are probably using a cross compiler. - if (./conftest; exit) 2>/dev/null; then - ac_cv_prog_cc_cross=no - else - ac_cv_prog_cc_cross=yes - fi -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - ac_cv_prog_cc_works=no -fi -rm -fr conftest* -ac_ext=c -# CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options. -ac_cpp='$CPP $CPPFLAGS' -ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5' -ac_link='${CC-cc} -o conftest${ac_exeext} $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5' -cross_compiling=$ac_cv_prog_cc_cross - -echo "$ac_t""$ac_cv_prog_cc_works" 1>&6 -if test $ac_cv_prog_cc_works = no; then - { echo "configure: error: installation or configuration problem: C compiler cannot create executables." 1>&2; exit 1; } -fi -echo $ac_n "checking whether the C compiler ($CC $CFLAGS $LDFLAGS) is a cross-compiler""... $ac_c" 1>&6 -echo "configure:697: checking whether the C compiler ($CC $CFLAGS $LDFLAGS) is a cross-compiler" >&5 -echo "$ac_t""$ac_cv_prog_cc_cross" 1>&6 -cross_compiling=$ac_cv_prog_cc_cross - -echo $ac_n "checking whether we are using GNU C""... $ac_c" 1>&6 -echo "configure:702: checking whether we are using GNU C" >&5 -if eval "test \"`echo '$''{'ac_cv_prog_gcc'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.c <&5; (eval $ac_try) 2>&5; }; } | egrep yes >/dev/null 2>&1; then - ac_cv_prog_gcc=yes -else - ac_cv_prog_gcc=no -fi -fi - -echo "$ac_t""$ac_cv_prog_gcc" 1>&6 - -if test $ac_cv_prog_gcc = yes; then - GCC=yes -else - GCC= -fi - -ac_test_CFLAGS="${CFLAGS+set}" -ac_save_CFLAGS="$CFLAGS" -CFLAGS= -echo $ac_n "checking whether ${CC-cc} accepts -g""... $ac_c" 1>&6 -echo "configure:730: checking whether ${CC-cc} accepts -g" >&5 -if eval "test \"`echo '$''{'ac_cv_prog_cc_g'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - echo 'void f(){}' > conftest.c -if test -z "`${CC-cc} -g -c conftest.c 2>&1`"; then - ac_cv_prog_cc_g=yes -else - ac_cv_prog_cc_g=no -fi -rm -f conftest* - -fi - -echo "$ac_t""$ac_cv_prog_cc_g" 1>&6 -if test "$ac_test_CFLAGS" = set; then - CFLAGS="$ac_save_CFLAGS" -elif test $ac_cv_prog_cc_g = yes; then - if test "$GCC" = yes; then - CFLAGS="-g -O2" - else - CFLAGS="-g" - fi -else - if test "$GCC" = yes; then - CFLAGS="-O2" - else - CFLAGS= - fi -fi - -echo $ac_n "checking whether ln -s works""... $ac_c" 1>&6 -echo "configure:762: checking whether ln -s works" >&5 -if eval "test \"`echo '$''{'ac_cv_prog_LN_S'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - rm -f conftestdata -if ln -s X conftestdata 2>/dev/null -then - rm -f conftestdata - ac_cv_prog_LN_S="ln -s" -else - ac_cv_prog_LN_S=ln -fi -fi -LN_S="$ac_cv_prog_LN_S" -if test "$ac_cv_prog_LN_S" = "ln -s"; then - echo "$ac_t""yes" 1>&6 -else - echo "$ac_t""no" 1>&6 -fi - -# Extract the first word of "ranlib", so it can be a program name with args. -set dummy ranlib; ac_word=$2 -echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:785: checking for $ac_word" >&5 -if eval "test \"`echo '$''{'ac_cv_prog_RANLIB'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - if test -n "$RANLIB"; then - ac_cv_prog_RANLIB="$RANLIB" # Let the user override the test. -else - IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" - ac_dummy="$PATH" - for ac_dir in $ac_dummy; do - test -z "$ac_dir" && ac_dir=. - if test -f $ac_dir/$ac_word; then - ac_cv_prog_RANLIB="ranlib" - break - fi - done - IFS="$ac_save_ifs" - test -z "$ac_cv_prog_RANLIB" && ac_cv_prog_RANLIB=":" -fi -fi -RANLIB="$ac_cv_prog_RANLIB" -if test -n "$RANLIB"; then - echo "$ac_t""$RANLIB" 1>&6 -else - echo "$ac_t""no" 1>&6 -fi - - - - - echo $ac_n "checking whether your make is GNU make""... $ac_c" 1>&6 -echo "configure:816: checking whether your make is GNU make" >&5 - foundGNUmake='nope, assuming sysv make.' ; - EXEC_DEPENDENCY=\$\$\@.o ; - if ( make --version nothing 2> /dev/null | grep GNU > /dev/null ) ; then - foundGNUmake='yes, it is.' ; - EXEC_DEPENDENCY='%: %.o' ; - fi - echo "$ac_t""$foundGNUmake" 1>&6 - - - -# Check whether --with-pvm or --without-pvm was given. -if test "${with_pvm+set}" = set; then - withval="$with_pvm" - case $with_pvm in - yes) echo 'Configuring for PVM' - PVMLIBDIR="-L${PVM_ROOT}/lib/${PVM_ARCH}" - PVMINCDIR="-I${PVM_ROOT}/include" - PVMFLAG="-DHMMER_PVM" - PVMPROGS="hmmcalibrate-pvm hmmpfam-pvm hmmsearch-pvm" - PVMLIBS="-lpvm3" - ;; - no) ;; - *) echo "Ignoring unknown argument to --with-pvm: $with_pvm" - ;; -esac -fi - - -ac_aux_dir= -for ac_dir in $srcdir $srcdir/.. $srcdir/../..; do - if test -f $ac_dir/install-sh; then - ac_aux_dir=$ac_dir - ac_install_sh="$ac_aux_dir/install-sh -c" - break - elif test -f $ac_dir/install.sh; then - ac_aux_dir=$ac_dir - ac_install_sh="$ac_aux_dir/install.sh -c" - break - fi -done -if test -z "$ac_aux_dir"; then - { echo "configure: error: can not find install-sh or install.sh in $srcdir $srcdir/.. $srcdir/../.." 1>&2; exit 1; } -fi -ac_config_guess=$ac_aux_dir/config.guess -ac_config_sub=$ac_aux_dir/config.sub -ac_configure=$ac_aux_dir/configure # This should be Cygnus configure. - - -# Make sure we can run config.sub. -if ${CONFIG_SHELL-/bin/sh} $ac_config_sub sun4 >/dev/null 2>&1; then : -else { echo "configure: error: can not run $ac_config_sub" 1>&2; exit 1; } -fi - -echo $ac_n "checking host system type""... $ac_c" 1>&6 -echo "configure:871: checking host system type" >&5 - -host_alias=$host -case "$host_alias" in -NONE) - case $nonopt in - NONE) - if host_alias=`${CONFIG_SHELL-/bin/sh} $ac_config_guess`; then : - else { echo "configure: error: can not guess host type; you must specify one" 1>&2; exit 1; } - fi ;; - *) host_alias=$nonopt ;; - esac ;; -esac - -host=`${CONFIG_SHELL-/bin/sh} $ac_config_sub $host_alias` -host_cpu=`echo $host | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\1/'` -host_vendor=`echo $host | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\2/'` -host_os=`echo $host | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\3/'` -echo "$ac_t""$host" 1>&6 - -echo $ac_n "checking how to run the C preprocessor""... $ac_c" 1>&6 -echo "configure:892: checking how to run the C preprocessor" >&5 -# On Suns, sometimes $CPP names a directory. -if test -n "$CPP" && test -d "$CPP"; then - CPP= -fi -if test -z "$CPP"; then -if eval "test \"`echo '$''{'ac_cv_prog_CPP'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - # This must be in double quotes, not single quotes, because CPP may get - # substituted into the Makefile and "${CC-cc}" will confuse make. - CPP="${CC-cc} -E" - # On the NeXT, cc -E runs the code through the compiler's parser, - # not just through cpp. - cat > conftest.$ac_ext < -Syntax Error -EOF -ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:913: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } -ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` -if test -z "$ac_err"; then - : -else - echo "$ac_err" >&5 - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - CPP="${CC-cc} -E -traditional-cpp" - cat > conftest.$ac_ext < -Syntax Error -EOF -ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:930: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } -ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` -if test -z "$ac_err"; then - : -else - echo "$ac_err" >&5 - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - CPP="${CC-cc} -nologo -E" - cat > conftest.$ac_ext < -Syntax Error -EOF -ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:947: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } -ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` -if test -z "$ac_err"; then - : -else - echo "$ac_err" >&5 - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - CPP=/lib/cpp -fi -rm -f conftest* -fi -rm -f conftest* -fi -rm -f conftest* - ac_cv_prog_CPP="$CPP" -fi - CPP="$ac_cv_prog_CPP" -else - ac_cv_prog_CPP="$CPP" -fi -echo "$ac_t""$CPP" 1>&6 - -# Check whether --enable-threads or --disable-threads was given. -if test "${enable_threads+set}" = set; then - enableval="$enable_threads" - case $enable_threads in - yes) echo "Enabling POSIX threads support" - - -acx_pthread_ok=no - -# First, check if the POSIX threads header, pthread.h, is available. -# If it isn't, don't bother looking for the threads libraries. -ac_safe=`echo "pthread.h" | sed 'y%./+-%__p_%'` -echo $ac_n "checking for pthread.h""... $ac_c" 1>&6 -echo "configure:984: checking for pthread.h" >&5 -if eval "test \"`echo '$''{'ac_cv_header_$ac_safe'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -EOF -ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:994: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } -ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` -if test -z "$ac_err"; then - rm -rf conftest* - eval "ac_cv_header_$ac_safe=yes" -else - echo "$ac_err" >&5 - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - eval "ac_cv_header_$ac_safe=no" -fi -rm -f conftest* -fi -if eval "test \"`echo '$ac_cv_header_'$ac_safe`\" = yes"; then - echo "$ac_t""yes" 1>&6 - : -else - echo "$ac_t""no" 1>&6 -acx_pthread_ok=noheader -fi - - -# We must check for the threads library under a number of different -# names; the ordering is very important because some systems -# (e.g. DEC) have both -lpthread and -lpthreads, where one of the -# libraries is broken (non-POSIX). - -# First of all, check if the user has set any of the PTHREAD_LIBS, -# etcetera environment variables, and if threads linking works using -# them: -if test x"$PTHREAD_LIBS$PTHREAD_CFLAGS" != x; then - save_CFLAGS="$CFLAGS" - CFLAGS="$CFLAGS $PTHREAD_CFLAGS" - save_LIBS="$LIBS" - LIBS="$PTHREAD_LIBS $LIBS" - echo $ac_n "checking for pthread_join in LIBS=$PTHREAD_LIBS with CFLAGS=$PTHREAD_CFLAGS""... $ac_c" 1>&6 -echo "configure:1031: checking for pthread_join in LIBS=$PTHREAD_LIBS with CFLAGS=$PTHREAD_CFLAGS" >&5 - cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then - rm -rf conftest* - acx_pthread_ok=yes -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 -fi -rm -f conftest* - echo "$ac_t""$acx_pthread_ok" 1>&6 - if test x"$acx_pthread_ok" = xno; then - PTHREAD_LIBS="" - PTHREAD_CFLAGS="" - fi - LIBS="$save_LIBS" - CFLAGS="$save_CFLAGS" -fi - -# Create a list of thread flags to try. Items starting with a "-" are -# C compiler flags, and other items are library names, except for "none" -# which indicates that we try without any flags at all. - -acx_pthread_flags="pthreads none -Kthread -kthread lthread -pthread -pthreads -mthreads pthread --thread-safe -mt" - -# The ordering *is* (sometimes) important. Some notes on the -# individual items follow: - -# pthreads: AIX (must check this before -lpthread) -# none: in case threads are in libc; should be tried before -Kthread and -# other compiler flags to prevent continual compiler warnings -# -Kthread: Sequent (threads in libc, but -Kthread needed for pthread.h) -# -kthread: FreeBSD kernel threads (preferred to -pthread since SMP-able) -# lthread: LinuxThreads port on FreeBSD (also preferred to -pthread) -# -pthread: Linux/gcc (kernel threads), BSD/gcc (userland threads) -# -pthreads: Solaris/gcc -# -mthreads: Mingw32/gcc, Lynx/gcc -# -mt: Sun Workshop C (may only link SunOS threads [-lthread], but it -# doesn't hurt to check since this sometimes defines pthreads too; -# also defines -D_REENTRANT) -# pthread: Linux, etcetera -# --thread-safe: KAI C++ - -case "${host_cpu}-${host_os}" in - *solaris*) - - # On Solaris (at least, for some versions), libc contains stubbed - # (non-functional) versions of the pthreads routines, so link-based - # tests will erroneously succeed. (We need to link with -pthread or - # -lpthread.) (The stubs are missing pthread_cleanup_push, or rather - # a function called by this macro, so we could check for that, but - # who knows whether they'll stub that too in a future libc.) So, - # we'll just look for -pthreads and -lpthread first: - - acx_pthread_flags="-pthread -pthreads pthread -mt $acx_pthread_flags" - ;; -esac - -if test x"$acx_pthread_ok" = xno; then -for flag in $acx_pthread_flags; do - - case $flag in - none) - echo $ac_n "checking whether pthreads work without any flags""... $ac_c" 1>&6 -echo "configure:1106: checking whether pthreads work without any flags" >&5 - ;; - - -*) - echo $ac_n "checking whether pthreads work with $flag""... $ac_c" 1>&6 -echo "configure:1111: checking whether pthreads work with $flag" >&5 - PTHREAD_CFLAGS="$flag" - ;; - - *) - echo $ac_n "checking for the pthreads library -l$flag""... $ac_c" 1>&6 -echo "configure:1117: checking for the pthreads library -l$flag" >&5 - PTHREAD_LIBS="-l$flag" - ;; - esac - - save_LIBS="$LIBS" - save_CFLAGS="$CFLAGS" - LIBS="$PTHREAD_LIBS $LIBS" - CFLAGS="$CFLAGS $PTHREAD_CFLAGS" - - # Check for various functions. We must include pthread.h, - # since some functions may be macros. (On the Sequent, we - # need a special flag -Kthread to make this header compile.) - # We check for pthread_join because it is in -lpthread on IRIX - # while pthread_create is in libc. We check for pthread_attr_init - # due to DEC craziness with -lpthreads. We check for - # pthread_cleanup_push because it is one of the few pthread - # functions on Solaris that doesn't have a non-functional libc stub. - # We try pthread_create on general principles. - cat > conftest.$ac_ext < -int main() { -pthread_t th; pthread_join(th, 0); - pthread_attr_init(0); pthread_cleanup_push(0, 0); - pthread_create(0,0,0,0); pthread_cleanup_pop(0); -; return 0; } -EOF -if { (eval echo configure:1146: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then - rm -rf conftest* - acx_pthread_ok=yes -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 -fi -rm -f conftest* - - LIBS="$save_LIBS" - CFLAGS="$save_CFLAGS" - - echo "$ac_t""$acx_pthread_ok" 1>&6 - if test "x$acx_pthread_ok" = xyes; then - break; - fi - - PTHREAD_LIBS="" - PTHREAD_CFLAGS="" -done -fi - -# Various other checks: -if test "x$acx_pthread_ok" = xyes; then - save_LIBS="$LIBS" - LIBS="$PTHREAD_LIBS $LIBS" - save_CFLAGS="$CFLAGS" - CFLAGS="$CFLAGS $PTHREAD_CFLAGS" - - # Detect AIX lossage: threads are created detached by default - # and the JOINABLE attribute has a nonstandard name (UNDETACHED). - echo $ac_n "checking for joinable pthread attribute""... $ac_c" 1>&6 -echo "configure:1178: checking for joinable pthread attribute" >&5 - cat > conftest.$ac_ext < -int main() { -int attr=PTHREAD_CREATE_JOINABLE; -; return 0; } -EOF -if { (eval echo configure:1187: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then - rm -rf conftest* - ok=PTHREAD_CREATE_JOINABLE -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - ok=unknown -fi -rm -f conftest* - if test x"$ok" = xunknown; then - cat > conftest.$ac_ext < -int main() { -int attr=PTHREAD_CREATE_UNDETACHED; -; return 0; } -EOF -if { (eval echo configure:1206: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then - rm -rf conftest* - ok=PTHREAD_CREATE_UNDETACHED -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - ok=unknown -fi -rm -f conftest* - fi - if test x"$ok" != xPTHREAD_CREATE_JOINABLE; then - cat >> confdefs.h <<\EOF -#define PTHREAD_CREATE_JOINABLE $ok -EOF - - fi - echo "$ac_t""${ok}" 1>&6 - if test x"$ok" = xunknown; then - echo "configure: warning: we do not know how to create joinable pthreads" 1>&2 - fi - - echo $ac_n "checking if more special flags are required for pthreads""... $ac_c" 1>&6 -echo "configure:1229: checking if more special flags are required for pthreads" >&5 - flag=no - case "${host_cpu}-${host_os}" in - *-aix* | *-freebsd*) flag="-D_THREAD_SAFE";; - *solaris* | alpha*-osf*) flag="-D_REENTRANT";; - esac - echo "$ac_t""${flag}" 1>&6 - if test "x$flag" != xno; then - PTHREAD_CFLAGS="$flag $PTHREAD_CFLAGS" - fi - - LIBS="$save_LIBS" - CFLAGS="$save_CFLAGS" - - # More AIX lossage: must compile with cc_r - # Extract the first word of "cc_r", so it can be a program name with args. -set dummy cc_r; ac_word=$2 -echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:1247: checking for $ac_word" >&5 -if eval "test \"`echo '$''{'ac_cv_prog_PTHREAD_CC'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - if test -n "$PTHREAD_CC"; then - ac_cv_prog_PTHREAD_CC="$PTHREAD_CC" # Let the user override the test. -else - IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" - ac_dummy="$PATH" - for ac_dir in $ac_dummy; do - test -z "$ac_dir" && ac_dir=. - if test -f $ac_dir/$ac_word; then - ac_cv_prog_PTHREAD_CC="cc_r" - break - fi - done - IFS="$ac_save_ifs" - test -z "$ac_cv_prog_PTHREAD_CC" && ac_cv_prog_PTHREAD_CC="${CC}" -fi -fi -PTHREAD_CC="$ac_cv_prog_PTHREAD_CC" -if test -n "$PTHREAD_CC"; then - echo "$ac_t""$PTHREAD_CC" 1>&6 -else - echo "$ac_t""no" 1>&6 -fi - -else - PTHREAD_CC="$CC" -fi - - - - - -# Finally, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND: -if test x"$acx_pthread_ok" = xyes; then - MDEFS="${MDEFS} -DHMMER_THREADS" - : -else - acx_pthread_ok=no - -fi - - - ;; - no) echo "POSIX threads support disabled" - ;; - *) echo "Ignoring unknown argument to --disable-threads: $enable_threads" - ;; -esac -else - - echo " Trying to enable default POSIX threads support" - - -acx_pthread_ok=no - -# First, check if the POSIX threads header, pthread.h, is available. -# If it isn't, don't bother looking for the threads libraries. -ac_safe=`echo "pthread.h" | sed 'y%./+-%__p_%'` -echo $ac_n "checking for pthread.h""... $ac_c" 1>&6 -echo "configure:1309: checking for pthread.h" >&5 -if eval "test \"`echo '$''{'ac_cv_header_$ac_safe'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -EOF -ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:1319: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } -ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` -if test -z "$ac_err"; then - rm -rf conftest* - eval "ac_cv_header_$ac_safe=yes" -else - echo "$ac_err" >&5 - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - eval "ac_cv_header_$ac_safe=no" -fi -rm -f conftest* -fi -if eval "test \"`echo '$ac_cv_header_'$ac_safe`\" = yes"; then - echo "$ac_t""yes" 1>&6 - : -else - echo "$ac_t""no" 1>&6 -acx_pthread_ok=noheader -fi - - -# We must check for the threads library under a number of different -# names; the ordering is very important because some systems -# (e.g. DEC) have both -lpthread and -lpthreads, where one of the -# libraries is broken (non-POSIX). - -# First of all, check if the user has set any of the PTHREAD_LIBS, -# etcetera environment variables, and if threads linking works using -# them: -if test x"$PTHREAD_LIBS$PTHREAD_CFLAGS" != x; then - save_CFLAGS="$CFLAGS" - CFLAGS="$CFLAGS $PTHREAD_CFLAGS" - save_LIBS="$LIBS" - LIBS="$PTHREAD_LIBS $LIBS" - echo $ac_n "checking for pthread_join in LIBS=$PTHREAD_LIBS with CFLAGS=$PTHREAD_CFLAGS""... $ac_c" 1>&6 -echo "configure:1356: checking for pthread_join in LIBS=$PTHREAD_LIBS with CFLAGS=$PTHREAD_CFLAGS" >&5 - cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then - rm -rf conftest* - acx_pthread_ok=yes -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 -fi -rm -f conftest* - echo "$ac_t""$acx_pthread_ok" 1>&6 - if test x"$acx_pthread_ok" = xno; then - PTHREAD_LIBS="" - PTHREAD_CFLAGS="" - fi - LIBS="$save_LIBS" - CFLAGS="$save_CFLAGS" -fi - -# Create a list of thread flags to try. Items starting with a "-" are -# C compiler flags, and other items are library names, except for "none" -# which indicates that we try without any flags at all. - -acx_pthread_flags="pthreads none -Kthread -kthread lthread -pthread -pthreads -mthreads pthread --thread-safe -mt" - -# The ordering *is* (sometimes) important. Some notes on the -# individual items follow: - -# pthreads: AIX (must check this before -lpthread) -# none: in case threads are in libc; should be tried before -Kthread and -# other compiler flags to prevent continual compiler warnings -# -Kthread: Sequent (threads in libc, but -Kthread needed for pthread.h) -# -kthread: FreeBSD kernel threads (preferred to -pthread since SMP-able) -# lthread: LinuxThreads port on FreeBSD (also preferred to -pthread) -# -pthread: Linux/gcc (kernel threads), BSD/gcc (userland threads) -# -pthreads: Solaris/gcc -# -mthreads: Mingw32/gcc, Lynx/gcc -# -mt: Sun Workshop C (may only link SunOS threads [-lthread], but it -# doesn't hurt to check since this sometimes defines pthreads too; -# also defines -D_REENTRANT) -# pthread: Linux, etcetera -# --thread-safe: KAI C++ - -case "${host_cpu}-${host_os}" in - *solaris*) - - # On Solaris (at least, for some versions), libc contains stubbed - # (non-functional) versions of the pthreads routines, so link-based - # tests will erroneously succeed. (We need to link with -pthread or - # -lpthread.) (The stubs are missing pthread_cleanup_push, or rather - # a function called by this macro, so we could check for that, but - # who knows whether they'll stub that too in a future libc.) So, - # we'll just look for -pthreads and -lpthread first: - - acx_pthread_flags="-pthread -pthreads pthread -mt $acx_pthread_flags" - ;; -esac - -if test x"$acx_pthread_ok" = xno; then -for flag in $acx_pthread_flags; do - - case $flag in - none) - echo $ac_n "checking whether pthreads work without any flags""... $ac_c" 1>&6 -echo "configure:1431: checking whether pthreads work without any flags" >&5 - ;; - - -*) - echo $ac_n "checking whether pthreads work with $flag""... $ac_c" 1>&6 -echo "configure:1436: checking whether pthreads work with $flag" >&5 - PTHREAD_CFLAGS="$flag" - ;; - - *) - echo $ac_n "checking for the pthreads library -l$flag""... $ac_c" 1>&6 -echo "configure:1442: checking for the pthreads library -l$flag" >&5 - PTHREAD_LIBS="-l$flag" - ;; - esac - - save_LIBS="$LIBS" - save_CFLAGS="$CFLAGS" - LIBS="$PTHREAD_LIBS $LIBS" - CFLAGS="$CFLAGS $PTHREAD_CFLAGS" - - # Check for various functions. We must include pthread.h, - # since some functions may be macros. (On the Sequent, we - # need a special flag -Kthread to make this header compile.) - # We check for pthread_join because it is in -lpthread on IRIX - # while pthread_create is in libc. We check for pthread_attr_init - # due to DEC craziness with -lpthreads. We check for - # pthread_cleanup_push because it is one of the few pthread - # functions on Solaris that doesn't have a non-functional libc stub. - # We try pthread_create on general principles. - cat > conftest.$ac_ext < -int main() { -pthread_t th; pthread_join(th, 0); - pthread_attr_init(0); pthread_cleanup_push(0, 0); - pthread_create(0,0,0,0); pthread_cleanup_pop(0); -; return 0; } -EOF -if { (eval echo configure:1471: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then - rm -rf conftest* - acx_pthread_ok=yes -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 -fi -rm -f conftest* - - LIBS="$save_LIBS" - CFLAGS="$save_CFLAGS" - - echo "$ac_t""$acx_pthread_ok" 1>&6 - if test "x$acx_pthread_ok" = xyes; then - break; - fi - - PTHREAD_LIBS="" - PTHREAD_CFLAGS="" -done -fi - -# Various other checks: -if test "x$acx_pthread_ok" = xyes; then - save_LIBS="$LIBS" - LIBS="$PTHREAD_LIBS $LIBS" - save_CFLAGS="$CFLAGS" - CFLAGS="$CFLAGS $PTHREAD_CFLAGS" - - # Detect AIX lossage: threads are created detached by default - # and the JOINABLE attribute has a nonstandard name (UNDETACHED). - echo $ac_n "checking for joinable pthread attribute""... $ac_c" 1>&6 -echo "configure:1503: checking for joinable pthread attribute" >&5 - cat > conftest.$ac_ext < -int main() { -int attr=PTHREAD_CREATE_JOINABLE; -; return 0; } -EOF -if { (eval echo configure:1512: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then - rm -rf conftest* - ok=PTHREAD_CREATE_JOINABLE -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - ok=unknown -fi -rm -f conftest* - if test x"$ok" = xunknown; then - cat > conftest.$ac_ext < -int main() { -int attr=PTHREAD_CREATE_UNDETACHED; -; return 0; } -EOF -if { (eval echo configure:1531: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then - rm -rf conftest* - ok=PTHREAD_CREATE_UNDETACHED -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - ok=unknown -fi -rm -f conftest* - fi - if test x"$ok" != xPTHREAD_CREATE_JOINABLE; then - cat >> confdefs.h <<\EOF -#define PTHREAD_CREATE_JOINABLE $ok -EOF - - fi - echo "$ac_t""${ok}" 1>&6 - if test x"$ok" = xunknown; then - echo "configure: warning: we do not know how to create joinable pthreads" 1>&2 - fi - - echo $ac_n "checking if more special flags are required for pthreads""... $ac_c" 1>&6 -echo "configure:1554: checking if more special flags are required for pthreads" >&5 - flag=no - case "${host_cpu}-${host_os}" in - *-aix* | *-freebsd*) flag="-D_THREAD_SAFE";; - *solaris* | alpha*-osf*) flag="-D_REENTRANT";; - esac - echo "$ac_t""${flag}" 1>&6 - if test "x$flag" != xno; then - PTHREAD_CFLAGS="$flag $PTHREAD_CFLAGS" - fi - - LIBS="$save_LIBS" - CFLAGS="$save_CFLAGS" - - # More AIX lossage: must compile with cc_r - # Extract the first word of "cc_r", so it can be a program name with args. -set dummy cc_r; ac_word=$2 -echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:1572: checking for $ac_word" >&5 -if eval "test \"`echo '$''{'ac_cv_prog_PTHREAD_CC'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - if test -n "$PTHREAD_CC"; then - ac_cv_prog_PTHREAD_CC="$PTHREAD_CC" # Let the user override the test. -else - IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" - ac_dummy="$PATH" - for ac_dir in $ac_dummy; do - test -z "$ac_dir" && ac_dir=. - if test -f $ac_dir/$ac_word; then - ac_cv_prog_PTHREAD_CC="cc_r" - break - fi - done - IFS="$ac_save_ifs" - test -z "$ac_cv_prog_PTHREAD_CC" && ac_cv_prog_PTHREAD_CC="${CC}" -fi -fi -PTHREAD_CC="$ac_cv_prog_PTHREAD_CC" -if test -n "$PTHREAD_CC"; then - echo "$ac_t""$PTHREAD_CC" 1>&6 -else - echo "$ac_t""no" 1>&6 -fi - -else - PTHREAD_CC="$CC" -fi - - - - - -# Finally, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND: -if test x"$acx_pthread_ok" = xyes; then - MDEFS="${MDEFS} -DHMMER_THREADS" - : -else - acx_pthread_ok=no - -fi - - - -fi - - - -for ac_func in pthread_setconcurrency -do -echo $ac_n "checking for $ac_func""... $ac_c" 1>&6 -echo "configure:1625: checking for $ac_func" >&5 -if eval "test \"`echo '$''{'ac_cv_func_$ac_func'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -/* Override any gcc2 internal prototype to avoid an error. */ -/* We use char because int might match the return type of a gcc2 - builtin and then its argument prototype would still apply. */ -char $ac_func(); - -int main() { - -/* The GNU C library defines this for functions which it implements - to always fail with ENOSYS. Some functions are actually named - something starting with __ and the normal name is an alias. */ -#if defined (__stub_$ac_func) || defined (__stub___$ac_func) -choke me -#else -$ac_func(); -#endif - -; return 0; } -EOF -if { (eval echo configure:1653: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then - rm -rf conftest* - eval "ac_cv_func_$ac_func=yes" -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - eval "ac_cv_func_$ac_func=no" -fi -rm -f conftest* -fi - -if eval "test \"`echo '$ac_cv_func_'$ac_func`\" = yes"; then - echo "$ac_t""yes" 1>&6 - ac_tr_func=HAVE_`echo $ac_func | tr 'abcdefghijklmnopqrstuvwxyz' 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'` - cat >> confdefs.h <&6 -fi -done - -for ac_func in pthread_attr_setscope -do -echo $ac_n "checking for $ac_func""... $ac_c" 1>&6 -echo "configure:1680: checking for $ac_func" >&5 -if eval "test \"`echo '$''{'ac_cv_func_$ac_func'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -/* Override any gcc2 internal prototype to avoid an error. */ -/* We use char because int might match the return type of a gcc2 - builtin and then its argument prototype would still apply. */ -char $ac_func(); - -int main() { - -/* The GNU C library defines this for functions which it implements - to always fail with ENOSYS. Some functions are actually named - something starting with __ and the normal name is an alias. */ -#if defined (__stub_$ac_func) || defined (__stub___$ac_func) -choke me -#else -$ac_func(); -#endif - -; return 0; } -EOF -if { (eval echo configure:1708: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then - rm -rf conftest* - eval "ac_cv_func_$ac_func=yes" -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - eval "ac_cv_func_$ac_func=no" -fi -rm -f conftest* -fi - -if eval "test \"`echo '$ac_cv_func_'$ac_func`\" = yes"; then - echo "$ac_t""yes" 1>&6 - ac_tr_func=HAVE_`echo $ac_func | tr 'abcdefghijklmnopqrstuvwxyz' 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'` - cat >> confdefs.h <&6 -fi -done - - -for ac_func in ntohs -do -echo $ac_n "checking for $ac_func""... $ac_c" 1>&6 -echo "configure:1736: checking for $ac_func" >&5 -if eval "test \"`echo '$''{'ac_cv_func_$ac_func'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -/* Override any gcc2 internal prototype to avoid an error. */ -/* We use char because int might match the return type of a gcc2 - builtin and then its argument prototype would still apply. */ -char $ac_func(); - -int main() { - -/* The GNU C library defines this for functions which it implements - to always fail with ENOSYS. Some functions are actually named - something starting with __ and the normal name is an alias. */ -#if defined (__stub_$ac_func) || defined (__stub___$ac_func) -choke me -#else -$ac_func(); -#endif - -; return 0; } -EOF -if { (eval echo configure:1764: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then - rm -rf conftest* - eval "ac_cv_func_$ac_func=yes" -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - eval "ac_cv_func_$ac_func=no" -fi -rm -f conftest* -fi - -if eval "test \"`echo '$ac_cv_func_'$ac_func`\" = yes"; then - echo "$ac_t""yes" 1>&6 - ac_tr_func=HAVE_`echo $ac_func | tr 'abcdefghijklmnopqrstuvwxyz' 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'` - cat >> confdefs.h <&6 -echo $ac_n "checking for ntohs in -lsocket""... $ac_c" 1>&6 -echo "configure:1786: checking for ntohs in -lsocket" >&5 -ac_lib_var=`echo socket'_'ntohs | sed 'y%./+-%__p_%'` -if eval "test \"`echo '$''{'ac_cv_lib_$ac_lib_var'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - ac_save_LIBS="$LIBS" -LIBS="-lsocket $LIBS" -cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then - rm -rf conftest* - eval "ac_cv_lib_$ac_lib_var=yes" -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - eval "ac_cv_lib_$ac_lib_var=no" -fi -rm -f conftest* -LIBS="$ac_save_LIBS" - -fi -if eval "test \"`echo '$ac_cv_lib_'$ac_lib_var`\" = yes"; then - echo "$ac_t""yes" 1>&6 - ac_tr_lib=HAVE_LIB`echo socket | sed -e 's/^a-zA-Z0-9_/_/g' \ - -e 'y/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/'` - cat >> confdefs.h <&6 -fi - -fi -done - -for ac_func in ntohl -do -echo $ac_n "checking for $ac_func""... $ac_c" 1>&6 -echo "configure:1838: checking for $ac_func" >&5 -if eval "test \"`echo '$''{'ac_cv_func_$ac_func'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -/* Override any gcc2 internal prototype to avoid an error. */ -/* We use char because int might match the return type of a gcc2 - builtin and then its argument prototype would still apply. */ -char $ac_func(); - -int main() { - -/* The GNU C library defines this for functions which it implements - to always fail with ENOSYS. Some functions are actually named - something starting with __ and the normal name is an alias. */ -#if defined (__stub_$ac_func) || defined (__stub___$ac_func) -choke me -#else -$ac_func(); -#endif - -; return 0; } -EOF -if { (eval echo configure:1866: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then - rm -rf conftest* - eval "ac_cv_func_$ac_func=yes" -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - eval "ac_cv_func_$ac_func=no" -fi -rm -f conftest* -fi - -if eval "test \"`echo '$ac_cv_func_'$ac_func`\" = yes"; then - echo "$ac_t""yes" 1>&6 - ac_tr_func=HAVE_`echo $ac_func | tr 'abcdefghijklmnopqrstuvwxyz' 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'` - cat >> confdefs.h <&6 -echo $ac_n "checking for ntohl in -lsocket""... $ac_c" 1>&6 -echo "configure:1888: checking for ntohl in -lsocket" >&5 -ac_lib_var=`echo socket'_'ntohl | sed 'y%./+-%__p_%'` -if eval "test \"`echo '$''{'ac_cv_lib_$ac_lib_var'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - ac_save_LIBS="$LIBS" -LIBS="-lsocket $LIBS" -cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then - rm -rf conftest* - eval "ac_cv_lib_$ac_lib_var=yes" -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - eval "ac_cv_lib_$ac_lib_var=no" -fi -rm -f conftest* -LIBS="$ac_save_LIBS" - -fi -if eval "test \"`echo '$ac_cv_lib_'$ac_lib_var`\" = yes"; then - echo "$ac_t""yes" 1>&6 - ac_tr_lib=HAVE_LIB`echo socket | sed -e 's/^a-zA-Z0-9_/_/g' \ - -e 'y/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/'` - cat >> confdefs.h <&6 -fi - -fi -done - -for ac_func in htons -do -echo $ac_n "checking for $ac_func""... $ac_c" 1>&6 -echo "configure:1940: checking for $ac_func" >&5 -if eval "test \"`echo '$''{'ac_cv_func_$ac_func'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -/* Override any gcc2 internal prototype to avoid an error. */ -/* We use char because int might match the return type of a gcc2 - builtin and then its argument prototype would still apply. */ -char $ac_func(); - -int main() { - -/* The GNU C library defines this for functions which it implements - to always fail with ENOSYS. Some functions are actually named - something starting with __ and the normal name is an alias. */ -#if defined (__stub_$ac_func) || defined (__stub___$ac_func) -choke me -#else -$ac_func(); -#endif - -; return 0; } -EOF -if { (eval echo configure:1968: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then - rm -rf conftest* - eval "ac_cv_func_$ac_func=yes" -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - eval "ac_cv_func_$ac_func=no" -fi -rm -f conftest* -fi - -if eval "test \"`echo '$ac_cv_func_'$ac_func`\" = yes"; then - echo "$ac_t""yes" 1>&6 - ac_tr_func=HAVE_`echo $ac_func | tr 'abcdefghijklmnopqrstuvwxyz' 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'` - cat >> confdefs.h <&6 -echo $ac_n "checking for htons in -lsocket""... $ac_c" 1>&6 -echo "configure:1990: checking for htons in -lsocket" >&5 -ac_lib_var=`echo socket'_'htons | sed 'y%./+-%__p_%'` -if eval "test \"`echo '$''{'ac_cv_lib_$ac_lib_var'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - ac_save_LIBS="$LIBS" -LIBS="-lsocket $LIBS" -cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then - rm -rf conftest* - eval "ac_cv_lib_$ac_lib_var=yes" -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - eval "ac_cv_lib_$ac_lib_var=no" -fi -rm -f conftest* -LIBS="$ac_save_LIBS" - -fi -if eval "test \"`echo '$ac_cv_lib_'$ac_lib_var`\" = yes"; then - echo "$ac_t""yes" 1>&6 - ac_tr_lib=HAVE_LIB`echo socket | sed -e 's/^a-zA-Z0-9_/_/g' \ - -e 'y/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/'` - cat >> confdefs.h <&6 -fi - -fi -done - -for ac_func in htonl -do -echo $ac_n "checking for $ac_func""... $ac_c" 1>&6 -echo "configure:2042: checking for $ac_func" >&5 -if eval "test \"`echo '$''{'ac_cv_func_$ac_func'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -/* Override any gcc2 internal prototype to avoid an error. */ -/* We use char because int might match the return type of a gcc2 - builtin and then its argument prototype would still apply. */ -char $ac_func(); - -int main() { - -/* The GNU C library defines this for functions which it implements - to always fail with ENOSYS. Some functions are actually named - something starting with __ and the normal name is an alias. */ -#if defined (__stub_$ac_func) || defined (__stub___$ac_func) -choke me -#else -$ac_func(); -#endif - -; return 0; } -EOF -if { (eval echo configure:2070: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then - rm -rf conftest* - eval "ac_cv_func_$ac_func=yes" -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - eval "ac_cv_func_$ac_func=no" -fi -rm -f conftest* -fi - -if eval "test \"`echo '$ac_cv_func_'$ac_func`\" = yes"; then - echo "$ac_t""yes" 1>&6 - ac_tr_func=HAVE_`echo $ac_func | tr 'abcdefghijklmnopqrstuvwxyz' 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'` - cat >> confdefs.h <&6 -echo $ac_n "checking for htonl in -lsocket""... $ac_c" 1>&6 -echo "configure:2092: checking for htonl in -lsocket" >&5 -ac_lib_var=`echo socket'_'htonl | sed 'y%./+-%__p_%'` -if eval "test \"`echo '$''{'ac_cv_lib_$ac_lib_var'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - ac_save_LIBS="$LIBS" -LIBS="-lsocket $LIBS" -cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then - rm -rf conftest* - eval "ac_cv_lib_$ac_lib_var=yes" -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - eval "ac_cv_lib_$ac_lib_var=no" -fi -rm -f conftest* -LIBS="$ac_save_LIBS" - -fi -if eval "test \"`echo '$ac_cv_lib_'$ac_lib_var`\" = yes"; then - echo "$ac_t""yes" 1>&6 - ac_tr_lib=HAVE_LIB`echo socket | sed -e 's/^a-zA-Z0-9_/_/g' \ - -e 'y/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/'` - cat >> confdefs.h <&6 -fi - -fi -done - - -subdirs="squid" - - -echo " Configuration complete. Writing Makefiles and such..." -trap '' 1 2 15 -cat > confcache <<\EOF -# This file is a shell script that caches the results of configure -# tests run on this system so they can be shared between configure -# scripts and configure runs. It is not useful on other systems. -# If it contains results you don't want to keep, you may remove or edit it. -# -# By default, configure uses ./config.cache as the cache file, -# creating it if it does not exist already. You can give configure -# the --cache-file=FILE option to use a different cache file; that is -# what configure does when it calls configure scripts in -# subdirectories, so they share the cache. -# Giving --cache-file=/dev/null disables caching, for debugging configure. -# config.status only pays attention to the cache file if you give it the -# --recheck option to rerun configure. -# -EOF -# The following way of writing the cache mishandles newlines in values, -# but we know of no workaround that is simple, portable, and efficient. -# So, don't put newlines in cache variables' values. -# Ultrix sh set writes to stderr and can't be redirected directly, -# and sets the high bit in the cache file unless we assign to the vars. -(set) 2>&1 | - case `(ac_space=' '; set | grep ac_space) 2>&1` in - *ac_space=\ *) - # `set' does not quote correctly, so add quotes (double-quote substitution - # turns \\\\ into \\, and sed turns \\ into \). - sed -n \ - -e "s/'/'\\\\''/g" \ - -e "s/^\\([a-zA-Z0-9_]*_cv_[a-zA-Z0-9_]*\\)=\\(.*\\)/\\1=\${\\1='\\2'}/p" - ;; - *) - # `set' quotes correctly as required by POSIX, so do not add quotes. - sed -n -e 's/^\([a-zA-Z0-9_]*_cv_[a-zA-Z0-9_]*\)=\(.*\)/\1=${\1=\2}/p' - ;; - esac >> confcache -if cmp -s $cache_file confcache; then - : -else - if test -w $cache_file; then - echo "updating cache $cache_file" - cat confcache > $cache_file - else - echo "not updating unwritable cache $cache_file" - fi -fi -rm -f confcache - -trap 'rm -fr conftest* confdefs* core core.* *.core $ac_clean_files; exit 1' 1 2 15 - -test "x$prefix" = xNONE && prefix=$ac_default_prefix -# Let make expand exec_prefix. -test "x$exec_prefix" = xNONE && exec_prefix='${prefix}' - -# Any assignment to VPATH causes Sun make to only execute -# the first set of double-colon rules, so remove it if not needed. -# If there is a colon in the path, we need to keep it. -if test "x$srcdir" = x.; then - ac_vpsub='/^[ ]*VPATH[ ]*=[^:]*$/d' -fi - -trap 'rm -f $CONFIG_STATUS conftest*; exit 1' 1 2 15 - -# Transform confdefs.h into DEFS. -# Protect against shell expansion while executing Makefile rules. -# Protect against Makefile macro expansion. -cat > conftest.defs <<\EOF -s%#define \([A-Za-z_][A-Za-z0-9_]*\) *\(.*\)%-D\1=\2%g -s%[ `~#$^&*(){}\\|;'"<>?]%\\&%g -s%\[%\\&%g -s%\]%\\&%g -s%\$%$$%g -EOF -DEFS=`sed -f conftest.defs confdefs.h | tr '\012' ' '` -rm -f conftest.defs - - -# Without the "./", some shells look in PATH for config.status. -: ${CONFIG_STATUS=./config.status} - -echo creating $CONFIG_STATUS -rm -f $CONFIG_STATUS -cat > $CONFIG_STATUS </dev/null | sed 1q`: -# -# $0 $ac_configure_args -# -# Compiler output produced by configure, useful for debugging -# configure, is in ./config.log if it exists. - -ac_cs_usage="Usage: $CONFIG_STATUS [--recheck] [--version] [--help]" -for ac_option -do - case "\$ac_option" in - -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r) - echo "running \${CONFIG_SHELL-/bin/sh} $0 $ac_configure_args --no-create --no-recursion" - exec \${CONFIG_SHELL-/bin/sh} $0 $ac_configure_args --no-create --no-recursion ;; - -version | --version | --versio | --versi | --vers | --ver | --ve | --v) - echo "$CONFIG_STATUS generated by autoconf version 2.13" - exit 0 ;; - -help | --help | --hel | --he | --h) - echo "\$ac_cs_usage"; exit 0 ;; - *) echo "\$ac_cs_usage"; exit 1 ;; - esac -done - -ac_given_srcdir=$srcdir - -trap 'rm -fr `echo "Makefile src/Makefile testsuite/Makefile" | sed "s/:[^ ]*//g"` conftest*; exit 1' 1 2 15 -EOF -cat >> $CONFIG_STATUS < conftest.subs <<\\CEOF -$ac_vpsub -$extrasub -s%@SHELL@%$SHELL%g -s%@CFLAGS@%$CFLAGS%g -s%@CPPFLAGS@%$CPPFLAGS%g -s%@CXXFLAGS@%$CXXFLAGS%g -s%@FFLAGS@%$FFLAGS%g -s%@DEFS@%$DEFS%g -s%@LDFLAGS@%$LDFLAGS%g -s%@LIBS@%$LIBS%g -s%@exec_prefix@%$exec_prefix%g -s%@prefix@%$prefix%g -s%@program_transform_name@%$program_transform_name%g -s%@bindir@%$bindir%g -s%@sbindir@%$sbindir%g -s%@libexecdir@%$libexecdir%g -s%@datadir@%$datadir%g -s%@sysconfdir@%$sysconfdir%g -s%@sharedstatedir@%$sharedstatedir%g -s%@localstatedir@%$localstatedir%g -s%@libdir@%$libdir%g -s%@includedir@%$includedir%g -s%@oldincludedir@%$oldincludedir%g -s%@infodir@%$infodir%g -s%@mandir@%$mandir%g -s%@MDEFS@%$MDEFS%g -s%@PVMLIBDIR@%$PVMLIBDIR%g -s%@PVMINCDIR@%$PVMINCDIR%g -s%@PVMFLAG@%$PVMFLAG%g -s%@PVMPROGS@%$PVMPROGS%g -s%@PVMLIBS@%$PVMLIBS%g -s%@CC@%$CC%g -s%@LN_S@%$LN_S%g -s%@RANLIB@%$RANLIB%g -s%@EXEC_DEPENDENCY@%$EXEC_DEPENDENCY%g -s%@host@%$host%g -s%@host_alias@%$host_alias%g -s%@host_cpu@%$host_cpu%g -s%@host_vendor@%$host_vendor%g -s%@host_os@%$host_os%g -s%@CPP@%$CPP%g -s%@PTHREAD_CC@%$PTHREAD_CC%g -s%@PTHREAD_LIBS@%$PTHREAD_LIBS%g -s%@PTHREAD_CFLAGS@%$PTHREAD_CFLAGS%g -s%@subdirs@%$subdirs%g - -CEOF -EOF - -cat >> $CONFIG_STATUS <<\EOF - -# Split the substitutions into bite-sized pieces for seds with -# small command number limits, like on Digital OSF/1 and HP-UX. -ac_max_sed_cmds=90 # Maximum number of lines to put in a sed script. -ac_file=1 # Number of current file. -ac_beg=1 # First line for current file. -ac_end=$ac_max_sed_cmds # Line after last line for current file. -ac_more_lines=: -ac_sed_cmds="" -while $ac_more_lines; do - if test $ac_beg -gt 1; then - sed "1,${ac_beg}d; ${ac_end}q" conftest.subs > conftest.s$ac_file - else - sed "${ac_end}q" conftest.subs > conftest.s$ac_file - fi - if test ! -s conftest.s$ac_file; then - ac_more_lines=false - rm -f conftest.s$ac_file - else - if test -z "$ac_sed_cmds"; then - ac_sed_cmds="sed -f conftest.s$ac_file" - else - ac_sed_cmds="$ac_sed_cmds | sed -f conftest.s$ac_file" - fi - ac_file=`expr $ac_file + 1` - ac_beg=$ac_end - ac_end=`expr $ac_end + $ac_max_sed_cmds` - fi -done -if test -z "$ac_sed_cmds"; then - ac_sed_cmds=cat -fi -EOF - -cat >> $CONFIG_STATUS <> $CONFIG_STATUS <<\EOF -for ac_file in .. $CONFIG_FILES; do if test "x$ac_file" != x..; then - # Support "outfile[:infile[:infile...]]", defaulting infile="outfile.in". - case "$ac_file" in - *:*) ac_file_in=`echo "$ac_file"|sed 's%[^:]*:%%'` - ac_file=`echo "$ac_file"|sed 's%:.*%%'` ;; - *) ac_file_in="${ac_file}.in" ;; - esac - - # Adjust a relative srcdir, top_srcdir, and INSTALL for subdirectories. - - # Remove last slash and all that follows it. Not all systems have dirname. - ac_dir=`echo $ac_file|sed 's%/[^/][^/]*$%%'` - if test "$ac_dir" != "$ac_file" && test "$ac_dir" != .; then - # The file is in a subdirectory. - test ! -d "$ac_dir" && mkdir "$ac_dir" - ac_dir_suffix="/`echo $ac_dir|sed 's%^\./%%'`" - # A "../" for each directory in $ac_dir_suffix. - ac_dots=`echo $ac_dir_suffix|sed 's%/[^/]*%../%g'` - else - ac_dir_suffix= ac_dots= - fi - - case "$ac_given_srcdir" in - .) srcdir=. - if test -z "$ac_dots"; then top_srcdir=. - else top_srcdir=`echo $ac_dots|sed 's%/$%%'`; fi ;; - /*) srcdir="$ac_given_srcdir$ac_dir_suffix"; top_srcdir="$ac_given_srcdir" ;; - *) # Relative path. - srcdir="$ac_dots$ac_given_srcdir$ac_dir_suffix" - top_srcdir="$ac_dots$ac_given_srcdir" ;; - esac - - - echo creating "$ac_file" - rm -f "$ac_file" - configure_input="Generated automatically from `echo $ac_file_in|sed 's%.*/%%'` by configure." - case "$ac_file" in - *Makefile*) ac_comsub="1i\\ -# $configure_input" ;; - *) ac_comsub= ;; - esac - - ac_file_inputs=`echo $ac_file_in|sed -e "s%^%$ac_given_srcdir/%" -e "s%:% $ac_given_srcdir/%g"` - sed -e "$ac_comsub -s%@configure_input@%$configure_input%g -s%@srcdir@%$srcdir%g -s%@top_srcdir@%$top_srcdir%g -" $ac_file_inputs | (eval "$ac_sed_cmds") > $ac_file -fi; done -rm -f conftest.s* - -EOF -cat >> $CONFIG_STATUS <> $CONFIG_STATUS <<\EOF - -exit 0 -EOF -chmod +x $CONFIG_STATUS -rm -fr confdefs* $ac_clean_files -test "$no_create" = yes || ${CONFIG_SHELL-/bin/sh} $CONFIG_STATUS || exit 1 - -if test "$no_recursion" != yes; then - - # Remove --cache-file and --srcdir arguments so they do not pile up. - ac_sub_configure_args= - ac_prev= - for ac_arg in $ac_configure_args; do - if test -n "$ac_prev"; then - ac_prev= - continue - fi - case "$ac_arg" in - -cache-file | --cache-file | --cache-fil | --cache-fi \ - | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c) - ac_prev=cache_file ;; - -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \ - | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*) - ;; - -srcdir | --srcdir | --srcdi | --srcd | --src | --sr) - ac_prev=srcdir ;; - -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*) - ;; - *) ac_sub_configure_args="$ac_sub_configure_args $ac_arg" ;; - esac - done - - for ac_config_dir in squid; do - - # Do not complain, so a configure script can configure whichever - # parts of a large source tree are present. - if test ! -d $srcdir/$ac_config_dir; then - continue - fi - - echo configuring in $ac_config_dir - - case "$srcdir" in - .) ;; - *) - if test -d ./$ac_config_dir || mkdir ./$ac_config_dir; then :; - else - { echo "configure: error: can not create `pwd`/$ac_config_dir" 1>&2; exit 1; } - fi - ;; - esac - - ac_popdir=`pwd` - cd $ac_config_dir - - # A "../" for each directory in /$ac_config_dir. - ac_dots=`echo $ac_config_dir|sed -e 's%^\./%%' -e 's%[^/]$%&/%' -e 's%[^/]*/%../%g'` - - case "$srcdir" in - .) # No --srcdir option. We are building in place. - ac_sub_srcdir=$srcdir ;; - /*) # Absolute path. - ac_sub_srcdir=$srcdir/$ac_config_dir ;; - *) # Relative path. - ac_sub_srcdir=$ac_dots$srcdir/$ac_config_dir ;; - esac - - # Check for guested configure; otherwise get Cygnus style configure. - if test -f $ac_sub_srcdir/configure; then - ac_sub_configure=$ac_sub_srcdir/configure - elif test -f $ac_sub_srcdir/configure.in; then - ac_sub_configure=$ac_configure - else - echo "configure: warning: no configuration information is in $ac_config_dir" 1>&2 - ac_sub_configure= - fi - - # The recursion is here. - if test -n "$ac_sub_configure"; then - - # Make the cache file name correct relative to the subdirectory. - case "$cache_file" in - /*) ac_sub_cache_file=$cache_file ;; - *) # Relative path. - ac_sub_cache_file="$ac_dots$cache_file" ;; - esac - - echo "running ${CONFIG_SHELL-/bin/sh} $ac_sub_configure $ac_sub_configure_args --cache-file=$ac_sub_cache_file --srcdir=$ac_sub_srcdir" - # The eval makes quoting arguments work. - if eval ${CONFIG_SHELL-/bin/sh} $ac_sub_configure $ac_sub_configure_args --cache-file=$ac_sub_cache_file --srcdir=$ac_sub_srcdir - then : - else - { echo "configure: error: $ac_sub_configure failed for $ac_config_dir" 1>&2; exit 1; } - fi - fi - - cd $ac_popdir - done -fi - - diff --git a/forester/archive/RIO/others/hmmer/documentation/man/hmmalign.man b/forester/archive/RIO/others/hmmer/documentation/man/hmmalign.man deleted file mode 100644 index dc08445..0000000 --- a/forester/archive/RIO/others/hmmer/documentation/man/hmmalign.man +++ /dev/null @@ -1,154 +0,0 @@ -.TH "hmmalign" 1 @RELEASEDATE@ "HMMER @RELEASE@" "HMMER Manual" - -.SH NAME -.TP -hmmalign - align sequences to an HMM profile - -.SH SYNOPSIS -.B hmmalign -.I [options] -.I hmmfile -.I seqfile - -.SH DESCRIPTION - -.B hmmalign -reads an HMM file from -.I hmmfile -and a set of sequences from -.I seqfile, -aligns the sequences to the profile HMM, -and outputs a multiple sequence alignment. - -.PP -.I seqfile -may be in any unaligned or aligned file format -accepted by HMMER. If it is in a multiple alignment format -(e.g. Stockholm, MSF, SELEX, ClustalW), the existing alignment -is ignored (i.e., the sequences are read as if they were -unaligned - hmmalign will align them the way it wants). - -.SH OPTIONS - -.TP -.B -h -Print brief help; includes version number and summary of -all options, including expert options. - -.TP -.B -m -Include in the alignment only those symbols aligned to match states. -Do not show symbols assigned to insert states. - -.TP -.BI -o " " -Save alignment to file -.I -instead of to standard output. - -.TP -.B -q -quiet; suppress all output except the alignment itself. -Useful for piping or redirecting the output. - -.SH EXPERT OPTIONS - -.TP -.BI --informat " " -Assert that the input -.I seqfile -is in format -.I ; -do not run Babelfish format autodection. This increases -the reliability of the program somewhat, because -the Babelfish can make mistakes; particularly -recommended for unattended, high-throughput runs -of HMMER. Valid format strings include FASTA, -GENBANK, EMBL, GCG, PIR, STOCKHOLM, SELEX, MSF, -CLUSTAL, and PHYLIP. See the User's Guide for a complete -list. - -.TP -.BI --mapali " " -Reads an alignment from file -.I -and aligns it as a single object to the HMM; e.g. the alignment in -.I -is held fixed. -This allows you to align sequences to a model with -.B hmmalign -and view them in the context of an existing trusted -multiple alignment. -The alignment to the alignment is defined by a "map" kept -in the HMM, and so is fast and guaranteed to be consistent -with the way the HMM was constructed from the alignment. -The alignment in the file -.I -must be exactly the alignment that the HMM was built from. -Compare the -.B --withali -option. - -.TP -.BI --withali " " -Reads an alignment from file -.I -and aligns it as a single object to the HMM; e.g. the alignment in -.I -is held fixed. -This allows you to align sequences to a model with -.B hmmalign -and view them in the context of an existing trusted -multiple alignment. The alignment to the alignment is -done with a heuristic (nonoptimal) dynamic programming procedure, -which may be somewhat slow and is not guaranteed to -be completely consistent with the way the HMM was -constructed (though it should be quite close). -However, any alignment can be used, not just the alignment that -the HMM was built from. Compare the -.B --mapali -option. - -.SH SEE ALSO - -.PP -Master man page, with full list of and guide to the individual man -pages: see -.B hmmer(1). -.PP -A User guide and tutorial came with the distribution: -.B Userguide.ps -[Postscript] and/or -.B Userguide.pdf -[PDF]. -.PP -Finally, all documentation is also available online via WWW: -.B http://hmmer.wustl.edu/ - -.SH AUTHOR - -This software and documentation is: -.nf -@COPYRIGHT@ -HMMER - Biological sequence analysis with profile HMMs -Copyright (C) 1992-1999 Washington University School of Medicine -All Rights Reserved - - This source code is distributed under the terms of the - GNU General Public License. See the files COPYING and LICENSE - for details. -.fi -See the file COPYING in your distribution for complete details. - -.nf -Sean Eddy -HHMI/Dept. of Genetics -Washington Univ. School of Medicine -4566 Scott Ave. -St Louis, MO 63110 USA -Phone: 1-314-362-7666 -FAX : 1-314-362-7855 -Email: eddy@genetics.wustl.edu -.fi - - diff --git a/forester/archive/RIO/others/hmmer/documentation/man/hmmbuild.man b/forester/archive/RIO/others/hmmer/documentation/man/hmmbuild.man deleted file mode 100644 index f4856b8..0000000 --- a/forester/archive/RIO/others/hmmer/documentation/man/hmmbuild.man +++ /dev/null @@ -1,476 +0,0 @@ -.TH "hmmbuild" 1 @RELEASEDATE@ "HMMER @RELEASE@" "HMMER Manual" - -.SH NAME -.TP -hmmbuild - build a profile HMM from an alignment - -.SH SYNOPSIS -.B hmmbuild -.I [options] -.I hmmfile -.I alignfile - -.SH DESCRIPTION - -.B hmmbuild -reads a multiple sequence alignment file -.I alignfile -, builds a new profile HMM, and saves the HMM in -.I hmmfile. - -.PP -.I alignfile -may be in ClustalW, GCG MSF, SELEX, Stockholm, or aligned FASTA -alignment format. The format is automatically detected. - -.PP -By default, the model is configured to find one or more -nonoverlapping alignments to the complete model: multiple -global alignments with respect to the model, and local with -respect to the sequence. -This -is analogous to the behavior of the -.B hmmls -program of HMMER 1. -To configure the model for multiple -.I local -alignments -with respect to the model and local with respect to -the sequence, -a la the old program -.B hmmfs, -use the -.B -f -(fragment) option. More rarely, you may want to -configure the model for a single -global alignment (global with respect to both -model and sequence), using the -.B -g -option; -or to configure the model for a single local/local alignment -(a la standard Smith/Waterman, or the old -.B hmmsw -program), use the -.B -s -option. - -.SH OPTIONS - -.TP -.B -f -Configure the model for finding multiple domains per sequence, -where each domain can be a local (fragmentary) alignment. This -is analogous to the old -.B hmmfs -program of HMMER 1. - -.TP -.B -g -Configure the model for finding a single global alignment to -a target sequence, analogous to -the old -.B hmms -program of HMMER 1. - -.TP -.B -h -Print brief help; includes version number and summary of -all options, including expert options. - -.TP -.BI -n " " -Name this HMM -.I . -.I -can be any string of non-whitespace characters (e.g. one "word"). -There is no length limit (at least not one imposed by HMMER; -your shell will complain about command line lengths first). - -.TP -.BI -o " " -Re-save the starting alignment to -.I , -in Stockholm format. -The columns which were assigned to match states will be -marked with x's in an #=RF annotation line. -If either the -.B --hand -or -.B --fast -construction options were chosen, the alignment may have -been slightly altered to be compatible with Plan 7 transitions, -so saving the final alignment and comparing to the -starting alignment can let you view these alterations. -See the User's Guide for more information on this arcane -side effect. - -.TP -.B -s -Configure the model for finding a single local alignment per -target sequence. This is analogous to the standard Smith/Waterman -algorithm or the -.B hmmsw -program of HMMER 1. - -.TP -.B -A -Append this model to an existing -.I hmmfile -rather than creating -.I hmmfile. -Useful for building HMM libraries (like Pfam). - -.TP -.B -F -Force overwriting of an existing -.I hmmfile. -Otherwise HMMER will refuse to clobber your existing HMM files, -for safety's sake. - -.SH EXPERT OPTIONS - -.TP -.B --amino -Force the sequence alignment to be interpreted as amino acid -sequences. Normally HMMER autodetects whether the alignment is -protein or DNA, but sometimes alignments are so small that -autodetection is ambiguous. See -.B --nucleic. - -.TP -.BI --archpri " " -Set the "architecture prior" used by MAP architecture construction to -.I , -where -.I -is a probability between 0 and 1. This parameter governs a geometric -prior distribution over model lengths. As -.I -increases, longer models are favored a priori. -As -.I -decreases, it takes more residue conservation in a column to -make a column a "consensus" match column in the model architecture. -The 0.85 default has been chosen empirically as a reasonable setting. - -.TP -.B --binary -Write the HMM to -.I hmmfile -in HMMER binary format instead of readable ASCII text. - -.TP -.BI --cfile " " -Save the observed emission and transition counts to -.I -after the architecture has been determined (e.g. after residues/gaps -have been assigned to match, delete, and insert states). -This option is used in HMMER development for generating data files -useful for training new Dirichlet priors. The format of -count files is documented in the User's Guide. - -.TP -.B --fast -Quickly and heuristically determine the architecture of the model by -assigning all columns will more than a certain fraction of gap -characters to insert states. By default this fraction is 0.5, and it -can be changed using the -.B --gapmax -option. -The default construction algorithm is a maximum a posteriori (MAP) -algorithm, which is slower. - -.TP -.BI --gapmax " " -Controls the -.I --fast -model construction algorithm, but if -.I --fast -is not being used, has no effect. -If a column has more than a fraction -.I -of gap symbols in it, it gets assigned to an insert column. -.I -is a frequency from 0 to 1, and by default is set -to 0.5. Higher values of -.I -mean more columns get assigned to consensus, and models get -longer; smaller values of -.I -mean fewer columns get assigned to consensus, and models get -smaller. -.I - -.TP -.B --hand -Specify the architecture of the model by hand: the alignment file must -be in SELEX or Stockholm format, and the reference annotation -line (#=RF in SELEX, #=GC RF in Stockholm) is used to specify -the architecture. Any column marked with a non-gap symbol (such -as an 'x', for instance) is assigned as a consensus (match) column in -the model. - -.TP -.BI --idlevel " " -Controls both the determination of effective sequence number and -the behavior of the -.I --wblosum -weighting option. The sequence alignment is clustered by percent -identity, and the number of clusters at a cutoff threshold of -.I -is used to determine the effective sequence number. -Higher values of -.I -give more clusters and higher effective sequence -numbers; lower values of -.I -give fewer clusters and lower effective sequence numbers. -.I -is a fraction from 0 to 1, and -by default is set to 0.62 (corresponding to the clustering level used -in constructing the BLOSUM62 substitution matrix). - -.TP -.BI --informat " " -Assert that the input -.I seqfile -is in format -.I ; -do not run Babelfish format autodection. This increases -the reliability of the program somewhat, because -the Babelfish can make mistakes; particularly -recommended for unattended, high-throughput runs -of HMMER. Valid format strings include FASTA, -GENBANK, EMBL, GCG, PIR, STOCKHOLM, SELEX, MSF, -CLUSTAL, and PHYLIP. See the User's Guide for a complete -list. - -.TP -.B --noeff -Turn off the effective sequence number calculation, and use the -true number of sequences instead. This will usually reduce the -sensitivity of the final model (so don't do it without good reason!) - -.TP -.B --nucleic -Force the alignment to be interpreted as nucleic acid sequence, -either RNA or DNA. Normally HMMER autodetects whether the alignment is -protein or DNA, but sometimes alignments are so small that -autodetection is ambiguous. See -.B --amino. - -.TP -.BI --null " " -Read a null model from -.I . -The default for protein is to use average amino acid frequencies from -Swissprot 34 and p1 = 350/351; for nucleic acid, the default is -to use 0.25 for each base and p1 = 1000/1001. For documentation -of the format of the null model file and further explanation -of how the null model is used, see the User's Guide. - -.TP -.BI --pam " " -Apply a heuristic PAM- (substitution matrix-) based prior on match -emission probabilities instead of -the default mixture Dirichlet. The substitution matrix is read -from -.I . -See -.B --pamwgt. - -The default Dirichlet state transition prior and insert emission prior -are unaffected. Therefore in principle you could combine -.B --prior -with -.B --pam -but this isn't recommended, as it hasn't been tested. ( -.B --pam -itself hasn't been tested much!) - -.TP -.BI --pamwgt " " -Controls the weight on a PAM-based prior. Only has effect if -.B --pam -option is also in use. -.I -is a positive real number, 20.0 by default. -.I -is the number of "pseudocounts" contriubuted by the heuristic -prior. Very high values of -.I -can force a scoring system that is entirely driven by the -substitution matrix, making -HMMER somewhat approximate Gribskov profiles. - -.TP -.BI --pbswitch " " -For alignments with a very large number of sequences, -the GSC, BLOSUM, and Voronoi weighting schemes are slow; -they're O(N^2) for N sequences. Henikoff position-based -weights (PB weights) are more efficient. At or above a certain -threshold sequence number -.I -.B hmmbuild -will switch from GSC, BLOSUM, or Voronoi weights to -PB weights. To disable this switching behavior (at the cost -of compute time, set -.I -to be something larger than the number of sequences in -your alignment. -.I -is a positive integer; the default is 1000. - -.TP -.BI --prior " " -Read a Dirichlet prior from -.I , -replacing the default mixture Dirichlet. -The format of prior files is documented in the User's Guide, -and an example is given in the Demos directory of the HMMER -distribution. - -.TP -.BI --swentry " " -Controls the total probability that is distributed to local entries -into the model, versus starting at the beginning of the model -as in a global alignment. -.I -is a probability from 0 to 1, and by default is set to 0.5. -Higher values of -.I -mean that hits that are fragments on their left (N or 5'-terminal) side will be -penalized less, but complete global alignments will be penalized more. -Lower values of -.I -mean that fragments on the left will be penalized more, and -global alignments on this side will be favored. -This option only affects the configurations that allow local -alignments, -e.g. -.B -s -and -.B -f; -unless one of these options is also activated, this option has no effect. -You have independent control over local/global alignment behavior for -the N/C (5'/3') termini of your target sequences using -.B --swentry -and -.B --swexit. - -.TP -.BI --swexit " " -Controls the total probability that is distributed to local exits -from the model, versus ending an alignment at the end of the model -as in a global alignment. -.I -is a probability from 0 to 1, and by default is set to 0.5. -Higher values of -.I -mean that hits that are fragments on their right (C or 3'-terminal) side will be -penalized less, but complete global alignments will be penalized more. -Lower values of -.I -mean that fragments on the right will be penalized more, and -global alignments on this side will be favored. -This option only affects the configurations that allow local -alignments, -e.g. -.B -s -and -.B -f; -unless one of these options is also activated, this option has no effect. -You have independent control over local/global alignment behavior for -the N/C (5'/3') termini of your target sequences using -.B --swentry -and -.B --swexit. - -.TP -.B --verbose -Print more possibly useful stuff, such as the individual scores for -each sequence in the alignment. - -.TP -.B --wblosum -Use the BLOSUM filtering algorithm to weight the sequences, -instead of the default. -Cluster the sequences at a given percentage identity -(see -.B --idlevel); -assign each cluster a total weight of 1.0, distributed equally -amongst the members of that cluster. - - -.TP -.B --wgsc -Use the Gerstein/Sonnhammer/Chothia ad hoc sequence weighting -algorithm. This is already the default, so this option has no effect -(unless it follows another option in the --w family, in which case it -overrides it). - -.TP -.B --wme -Use the Krogh/Mitchison maximum entropy algorithm to "weight" -the sequences. This supercedes the Eddy/Mitchison/Durbin -maximum discrimination algorithm, which gives almost -identical weights but is less robust. ME weighting seems -to give a marginal increase in sensitivity -over the default GSC weights, but takes a fair amount of time. - -.TP -.B --wnone -Turn off all sequence weighting. - -.TP -.B --wpb -Use the Henikoff position-based weighting scheme. - -.TP -.B --wvoronoi -Use the Sibbald/Argos Voronoi sequence weighting algorithm -in place of the default GSC weighting. - -.SH SEE ALSO - -.PP -Master man page, with full list of and guide to the individual man -pages: see -.B hmmer(1). -.PP -A User guide and tutorial came with the distribution: -.B Userguide.ps -[Postscript] and/or -.B Userguide.pdf -[PDF]. -.PP -Finally, all documentation is also available online via WWW: -.B http://hmmer.wustl.edu/ - -.SH AUTHOR - -This software and documentation is: -.nf -@COPYRIGHT@ -HMMER - Biological sequence analysis with profile HMMs -Copyright (C) 1992-1999 Washington University School of Medicine -All Rights Reserved - - This source code is distributed under the terms of the - GNU General Public License. See the files COPYING and LICENSE - for details. -.fi -See the file COPYING in your distribution for complete details. - -.nf -Sean Eddy -HHMI/Dept. of Genetics -Washington Univ. School of Medicine -4566 Scott Ave. -St Louis, MO 63110 USA -Phone: 1-314-362-7666 -FAX : 1-314-362-7855 -Email: eddy@genetics.wustl.edu -.fi - - diff --git a/forester/archive/RIO/others/hmmer/documentation/man/hmmcalibrate.man b/forester/archive/RIO/others/hmmer/documentation/man/hmmcalibrate.man deleted file mode 100644 index e472b92..0000000 --- a/forester/archive/RIO/others/hmmer/documentation/man/hmmcalibrate.man +++ /dev/null @@ -1,172 +0,0 @@ -.TH "hmmcalibrate" 1 @RELEASEDATE@ "HMMER @RELEASE@" "HMMER Manual" - -.SH NAME -.TP -hmmcalibrate - calibrate HMM search statistics - -.SH SYNOPSIS -.B hmmcalibrate -.I [options] -.I hmmfile - -.SH DESCRIPTION - -.B hmmcalibrate -reads an HMM file from -.I hmmfile, -scores a large number of synthesized random sequences with it, fits an -extreme value distribution (EVD) to the histogram of those scores, and -re-saves -.I hmmfile -now including the EVD parameters. - -.PP -.B hmmcalibrate -may take several minutes (or longer) to run. -While it is running, a temporary file called -.I hmmfile.xxx -is generated in your working directory. -If you abort -.B hmmcalibrate -prematurely (ctrl-C, for instance), your original -.I hmmfile -will be untouched, and you should delete the -.I hmmfile.xxx -temporary file. - -.SH OPTIONS - -.TP -.B -h -Print brief help; includes version number and summary of -all options, including expert options. - -.SH EXPERT OPTIONS - -.TP -.BI --cpu " " -Sets the maximum number of CPUs that the program -will run on. The default is to use all CPUs -in the machine. Overrides the HMMER_NCPU -environment variable. Only affects threaded -versions of HMMER (the default on most systems). - -.TP -.BI --fixed " " -Fix the length of the random sequences to -.I , -where -.I -is a positive (and reasonably sized) integer. -The default is instead to generate sequences with -a variety of different lengths, controlled by a Gaussian -(normal) distribution. - -.TP -.BI --histfile " " -Save a histogram of the scores and the fitted theoretical curve -to file -.I . - -.TP -.BI --mean " " -Set the mean length of the synthetic sequences to -.I , -where -.I -is a positive real number. The default is 350. - -.TP -.BI --num " " -Set the number of synthetic sequences to -.I , -where -.I -is a positive integer. If -.I is less than about 1000, the fit to the EVD may fail. -Higher numbers of -.I -will give better determined EVD parameters. The default -is 5000; it was empirically chosen as -a tradeoff between accuracy and computation time. - -.TP -.B --pvm -Run on a Parallel Virtual Machine (PVM). The PVM must -already be running. The client program -.B hmmcalibrate-pvm -must be installed on all the PVM nodes. -Optional PVM support must have been compiled into -HMMER. - -.TP -.BI --sd " " -Set the standard deviation of the synthetic sequence -length distribution to -.I , -where -.I -is a positive real number. The default is 350. Note that the -Gaussian is left-truncated so that no sequences have lengths -<= 0. - -.TP -.BI --seed " " -Set the random seed to -.I , -where -.I -is a positive integer. The default is to use -.B time() -to generate a different seed for each run, which -means that two different runs of -.B hmmcalibrate -on the same HMM will give slightly different -results. You can use -this option to generate reproducible results for -different -.B hmmcalibrate -runs on the same HMM. - -.SH SEE ALSO - -.PP -Master man page, with full list of and guide to the individual man -pages: see -.B hmmer(1). -.PP -A User guide and tutorial came with the distribution: -.B Userguide.ps -[Postscript] and/or -.B Userguide.pdf -[PDF]. -.PP -Finally, all documentation is also available online via WWW: -.B http://hmmer.wustl.edu/ - -.SH AUTHOR - -This software and documentation is: -.nf -@COPYRIGHT@ -HMMER - Biological sequence analysis with profile HMMs -Copyright (C) 1992-1999 Washington University School of Medicine -All Rights Reserved - - This source code is distributed under the terms of the - GNU General Public License. See the files COPYING and LICENSE - for details. -.fi -See the file COPYING in your distribution for complete details. - -.nf -Sean Eddy -HHMI/Dept. of Genetics -Washington Univ. School of Medicine -4566 Scott Ave. -St Louis, MO 63110 USA -Phone: 1-314-362-7666 -FAX : 1-314-362-7855 -Email: eddy@genetics.wustl.edu -.fi - diff --git a/forester/archive/RIO/others/hmmer/documentation/man/hmmconvert.man b/forester/archive/RIO/others/hmmer/documentation/man/hmmconvert.man deleted file mode 100644 index 58cb3a6..0000000 --- a/forester/archive/RIO/others/hmmer/documentation/man/hmmconvert.man +++ /dev/null @@ -1,124 +0,0 @@ -.TH "hmmconvert" 1 "@RELEASEDATE@" "HMMER @RELEASE@" "HMMER Manual" - -.SH NAME -.TP -hmmconvert - convert between profile HMM file formats - -.SH SYNOPSIS -.B hmmconvert -.I [options] -.I oldhmmfile -.I newhmmfile - -.SH DESCRIPTION - -.B hmmconvert -reads an HMM file from -.I oldhmmfile -in any HMMER format, and writes it to a new file -.I newhmmfile -in a new format. -.I oldhmmfile -and -.I newhmmfile -must be different files; you can't reliably overwrite -the old file. -By default, the new HMM file is written in HMMER 2 -ASCII format. - -Available formats are HMMER 2 ASCII (default), HMMER 2 binary -.I (-b) -GCG profile -.I (-p) -, and Compugen XSW extended profile -.I (-P). - -.SH OPTIONS - -.TP -.B -a -Convert to HMMER 2 ASCII file. This is the default, so this option -is unnecessary. - -.TP -.B -b -Convert to HMMER 2 binary file. - -.TP -.B -h -Print brief help; includes version number and summary of -all options, including expert options. - -.TP -.B -p -Convert to GCG profile .prf format. - -.TP -.B -A -Append mode; append to -.I newhmmfile -rather than creating a new file. - -.TP -.B -F -Force; if -.I newhmmfile -already exists, and -.I -A -is not being used to append to the file, -hmmconvert will refuse to clobber the existing -file unless -.I -F -is used. - -.TP -.B -P -Convert the HMM to Compugen XSW extended profile format, -which is similar to GCG profile format but has two -extra columns for delete-open and delete-extend costs. -(I do not believe that Compugen publicly supports this -format; it may be undocumented.) - -.SH SEE ALSO - -.PP -Master man page, with full list of and guide to the individual man -pages: see -.B hmmer(1). -.PP -A User guide and tutorial came with the distribution: -.B Userguide.ps -[Postscript] and/or -.B Userguide.pdf -[PDF]. -.PP -Finally, all documentation is also available online via WWW: -.B http://hmmer.wustl.edu/ - -.SH AUTHOR - -This software and documentation is: -.nf -@COPYRIGHT@ -HMMER - Biological sequence analysis with profile HMMs -Copyright (C) 1992-1999 Washington University School of Medicine -All Rights Reserved - - This source code is distributed under the terms of the - GNU General Public License. See the files COPYING and LICENSE - for details. -.fi -See the file COPYING in your distribution for complete details. - -.nf -Sean Eddy -HHMI/Dept. of Genetics -Washington Univ. School of Medicine -4566 Scott Ave. -St Louis, MO 63110 USA -Phone: 1-314-362-7666 -FAX : 1-314-362-7855 -Email: eddy@genetics.wustl.edu -.fi - - diff --git a/forester/archive/RIO/others/hmmer/documentation/man/hmmemit.man b/forester/archive/RIO/others/hmmer/documentation/man/hmmemit.man deleted file mode 100644 index bfc61c9..0000000 --- a/forester/archive/RIO/others/hmmer/documentation/man/hmmemit.man +++ /dev/null @@ -1,130 +0,0 @@ -.TH "hmmemit" 1 @RELEASEDATE@ "HMMER @RELEASE@" "HMMER Manual" - -.SH NAME -.TP -hmmemit - generate sequences from a profile HMM - -.SH SYNOPSIS -.B hmmemit -.I [options] -.I hmmfile - -.SH DESCRIPTION - -.B hmmemit -reads an HMM file from -.I hmmfile -containing one or more HMMs, -and generates a number of sequences from each HMM; -or, if the -.B -c -option is selected, generate a single majority-rule consensus. -This can be useful for various applications in which one needs a simulation -of sequences consistent with a sequence family consensus. - -.pp -By default, -.B hmmemit -generates 10 sequences and outputs them in FASTA (unaligned) format. - -.SH OPTIONS - -.TP -.B -a -Write the generated sequences in an aligned format (SELEX) rather than -FASTA. - -.TP -.B -c -Predict a single majority-rule consensus sequence instead of sampling -sequences from the HMM's probability distribution. Highly conserved -residues (p >= 0.9 for DNA, p >= 0.5 for protein) are shown in upper -case; others are shown in lower case. Some insert states may become -part of the majority rule consensus, because they are used in >= 50% -of generated sequences; when this happens, insert-generated residues -are simply shown as "x". - -.TP -.B -h -Print brief help; includes version number and summary of -all options, including expert options. - -.TP -.BI -n " " -Generate -.I -sequences. Default is 10. - -.TP -.BI -o " " -Save the synthetic sequences to file -.I -rather than writing them to stdout. - -.TP -.B -q -Quiet; suppress all output except for the sequences themselves. -Useful for piping or directing the output. - -.SH EXPERT OPTIONS - -.TP -.BI --seed " " -Set the random seed to -.I , -where -.I -is a positive integer. The default is to use -.B time() -to generate a different seed for each run, which -means that two different runs of -.B hmmemit -on the same HMM will give slightly different -results. You can use -this option to generate reproducible results. - - - -.SH SEE ALSO - -.PP -Master man page, with full list of and guide to the individual man -pages: see -.B hmmer(1). -.PP -A User guide and tutorial came with the distribution: -.B Userguide.ps -[Postscript] and/or -.B Userguide.pdf -[PDF]. -.PP -Finally, all documentation is also available online via WWW: -.B http://hmmer.wustl.edu/ - -.SH AUTHOR - -This software and documentation is: -.nf -@COPYRIGHT@ -HMMER - Biological sequence analysis with profile HMMs -Copyright (C) 1992-1999 Washington University School of Medicine -All Rights Reserved - - This source code is distributed under the terms of the - GNU General Public License. See the files COPYING and LICENSE - for details. -.fi -See the file COPYING in your distribution for complete details. - -.nf -Sean Eddy -HHMI/Dept. of Genetics -Washington Univ. School of Medicine -4566 Scott Ave. -St Louis, MO 63110 USA -Phone: 1-314-362-7666 -FAX : 1-314-362-7855 -Email: eddy@genetics.wustl.edu -.fi - - diff --git a/forester/archive/RIO/others/hmmer/documentation/man/hmmer.man b/forester/archive/RIO/others/hmmer/documentation/man/hmmer.man deleted file mode 100644 index 70e3ce9..0000000 --- a/forester/archive/RIO/others/hmmer/documentation/man/hmmer.man +++ /dev/null @@ -1,168 +0,0 @@ -.TH "hmmer" 1 @RELEASEDATE@ "HMMER @RELEASE@" "HMMER Manual" - -.SH NAME -.TP -HMMER - profile hidden Markov model software - -.SH SYNOPSIS -.TP -.B hmmalign -Align multiple sequences to a profile HMM. - -.TP -.B hmmbuild -Build a profile HMM from a given multiple sequence alignment. - -.TP -.B hmmcalibrate -Determine appropriate statistical significance parameters -for a profile HMM prior to doing database searches. - -.TP -.B hmmconvert -Convert HMMER profile HMMs to other formats, such as GCG profiles. - -.TP -.B hmmemit -Generate sequences probabilistically from a profile HMM. - -.TP -.B hmmfetch -Retrieve an HMM from an HMM database - -.TP -.B hmmindex -Create a binary SSI index for an HMM database - -.TP -.B hmmpfam -Search a profile HMM database with a sequence (i.e., annotate various -kinds of domains in the query sequence). - -.TP -.B hmmsearch -Search a sequence database with a profile HMM (i.e., find additional -homologues of a modeled family). - -.SH DESCRIPTION - -These programs use profile hidden Markov models (profile HMMs) to -model the primary structure consensus of a family of protein or -nucleic acid sequences. - -.SH OPTIONS - -.PP -All -.B HMMER -programs give a brief summary of their command-line syntax and options -if invoked without any arguments. -When invoked with the single argument, -.B -h -(i.e., help), a program will report more verbose command-line usage -information, including rarely used, experimental, and expert options. -.B -h -will report version numbers which are useful if -you need to report a bug or problem to me. - -.PP -Each -.B HMMER -program has its own man page briefly summarizing command line usage. -There is also a user's guide that came -with the software distribution, which includes a tutorial introduction -and more detailed descriptions of the programs. - -See http://hmmer.wustl.edu/ for on-line documentation and -the current HMMER release. - -.PP -In general, no command line options should be needed by beginning users. -The defaults are set up for optimum performance in most situations. -Options that are single lowercase letters (e.g. -.B -a -) are "common" options that are expected to be frequently used -and will be important in many applications. -Options that are single uppercase letters (e.g. -.B -B -) are usually less common options, but also may be important -in some applications. -Options that are full words (e.g. -.B --verbose -) are either rarely used, experimental, or expert options. -Some experimental options are only there for my own ongoing experiments -with HMMER, and may not be supported or documented adequately. - - -.SH SEQUENCE FILE FORMATS - -In general, -.B HMMER -attempts to read most common biological sequence file formats. -It autodetects the format of the file. It also autodetects -whether the sequences are protein or nucleic acid. -Standard IUPAC degeneracy codes are allowed in addition -to the usual 4-letter or 20-letter codes. - -.TP -.B Unaligned sequences -Unaligned sequence files may be in FASTA, Swissprot, EMBL, GenBank, -PIR, Intelligenetics, Strider, or GCG format. -These formats -are documented in the User's Guide. - -.TP -.B Sequence alignments -Multiple sequence alignments may be in CLUSTALW, SELEX, or GCG MSF -format. These formats -are documented in the User's Guide. - -.SH ENVIRONMENT VARIABLES - -For ease of using large stable sequence and HMM databases, -.B HMMER -looks for sequence files and HMM files in the current -working directory as well as in system directories specified -by environment variables. - -.TP -.B BLASTDB -Specifies the directory location of sequence databases. Example: -.B /seqlibs/blast-db/. -In installations that use BLAST software, this environment variable -is likely to already be set. - -.TP -.B HMMERDB -Specifies the directory location of HMM databases. Example: -.B /seqlibs/pfam/. - -.SH SEE ALSO - -.PP -@SEEALSO@ -.PP -User guide and tutorial: Userguide.ps -.PP -WWW: -.B http://hmmer.wustl.edu/ - -.SH AUTHOR - -This software and documentation is Copyright (C) 1992-1998 Washington -University School of Medicine. It is freely distributable under terms -of the GNU General Public License. See COPYING in the source code -distribution for more details, or contact me. - -.nf -Sean Eddy -Dept. of Genetics -Washington Univ. School of Medicine -4566 Scott Ave. -St Louis, MO 63110 USA -Phone: 1-314-362-7666 -FAX : 1-314-362-7855 -Email: eddy@genetics.wustl.edu -.fi - - diff --git a/forester/archive/RIO/others/hmmer/documentation/man/hmmfetch.man b/forester/archive/RIO/others/hmmer/documentation/man/hmmfetch.man deleted file mode 100644 index c8e8051..0000000 --- a/forester/archive/RIO/others/hmmer/documentation/man/hmmfetch.man +++ /dev/null @@ -1,83 +0,0 @@ -.TH "hmmfetch" 1 @RELEASEDATE@ "HMMER @RELEASE@" "HMMER Manual" - -.SH NAME -.TP -hmmfetch - retrieve an HMM from an HMM database - -.SH SYNOPSIS -.B hmmfetch -.I [options] -.I database -.I name - -.SH DESCRIPTION - -.B hmmfetch -is a small utility that retrieves an HMM called -.I name -from a HMMER model database called -.I database. -in a new format, -and prints that model to standard output. -For example, -.I hmmfetch Pfam rrm -retrieves the RRM (RNA recognition motif) model from -Pfam, if the environment variable HMMERDB is -set to the location of the Pfam database. -The retrieved HMM file is written in HMMER 2 ASCII format. - -.PP -The database must have an associated GSI index file. -To index an HMM database, use the program -.B hmmindex. - -.SH OPTIONS - -.TP -.B -h -Print brief help; includes version number and summary of -all options, including expert options. - - - -.SH SEE ALSO - -.PP -Master man page, with full list of and guide to the individual man -pages: see -.B hmmer(1). -.PP -A User guide and tutorial came with the distribution: -.B Userguide.ps -[Postscript] and/or -.B Userguide.pdf -[PDF]. -.PP -Finally, all documentation is also available online via WWW: -.B http://hmmer.wustl.edu/ - -.SH AUTHOR - -This software and documentation is: -.nf -@COPYRIGHT@ -HMMER - Biological sequence analysis with profile HMMs -Copyright (C) 1992-1999 Washington University School of Medicine -All Rights Reserved - - This source code is distributed under the terms of the - GNU General Public License. See the files COPYING and LICENSE - for details. -.fi -See the file COPYING in your distribution for complete details. - -.nf -Sean Eddy -HHMI/Dept. of Genetics -Washington Univ. School of Medicine -4566 Scott Ave. -St Louis, MO 63110 USA -Phone: 1-314-362-7666 -FAX : 1-314-362-7855 -Email: eddy@genetics.wustl.edu -.fi diff --git a/forester/archive/RIO/others/hmmer/documentation/man/hmmindex.man b/forester/archive/RIO/others/hmmer/documentation/man/hmmindex.man deleted file mode 100644 index 3bf171b..0000000 --- a/forester/archive/RIO/others/hmmer/documentation/man/hmmindex.man +++ /dev/null @@ -1,73 +0,0 @@ -.TH "hmmindex" 1 @RELEASEDATE@ "HMMER @RELEASE@" "HMMER Manual" - -.SH NAME -.TP -hmmindex - create a binary SSI index for an HMM database - -.SH SYNOPSIS -.B hmmindex -.I [options] -.I database - -.SH DESCRIPTION - -.B hmmindex -is a utility that creates a binary SSI ("squid sequence index" -format) index for an HMM database file called -.I database. -The new index file is named -.IR database.ssi. -An SSI index file is required for -.B hmmfetch -to work, and also for the PVM implementation of -.B hmmpfam. - -.SH OPTIONS - -.TP -.B -h -Print brief help; includes version number and summary of -all options, including expert options. - - -.SH SEE ALSO - -.PP -Master man page, with full list of and guide to the individual man -pages: see -.B hmmer(1). -.PP -A User guide and tutorial came with the distribution: -.B Userguide.ps -[Postscript] and/or -.B Userguide.pdf -[PDF]. -.PP -Finally, all documentation is also available online via WWW: -.B http://hmmer.wustl.edu/ - -.SH AUTHOR - -This software and documentation is: -.nf -@COPYRIGHT@ -HMMER - Biological sequence analysis with profile HMMs -Copyright (C) 1992-1999 Washington University School of Medicine -All Rights Reserved - - This source code is distributed under the terms of the - GNU General Public License. See the files COPYING and LICENSE - for details. -.fi -See the file COPYING in your distribution for complete details. - -.nf -Sean Eddy -HHMI/Dept. of Genetics -Washington Univ. School of Medicine -4566 Scott Ave. -St Louis, MO 63110 USA -Phone: 1-314-362-7666 -FAX : 1-314-362-7855 -Email: eddy@genetics.wustl.edu -.fi diff --git a/forester/archive/RIO/others/hmmer/documentation/man/hmmpfam.man b/forester/archive/RIO/others/hmmer/documentation/man/hmmpfam.man deleted file mode 100644 index a3e8c8a..0000000 --- a/forester/archive/RIO/others/hmmer/documentation/man/hmmpfam.man +++ /dev/null @@ -1,320 +0,0 @@ -.TH "hmmpfam" 1 @RELEASEDATE@ "HMMER @RELEASE@" "HMMER Manual" - -.SH NAME -.TP -hmmpfam - search one or more sequences against an HMM database - -.SH SYNOPSIS -.B hmmpfam -.I [options] -.I hmmfile -.I seqfile - -.SH DESCRIPTION - -.B hmmpfam -reads a sequence file -.I seqfile -and compares each sequence in it, one at a time, against all the HMMs in -.I hmmfile -looking for significantly similar sequence matches. - -.PP -.I hmmfile -will be looked for first in the current working directory, -then in a directory named by the environment variable -.I HMMERDB. -This lets administrators install HMM library(s) such as -Pfam in a common location. - -.PP -There is a separate output report for each sequence in -.I seqfile. -This report consists of three sections: a ranked list -of the best scoring HMMs, a list of the -best scoring domains in order of their occurrence -in the sequence, and alignments for all the best scoring -domains. -A sequence score may be higher than a domain score for -the same sequence if there is more than one domain in the sequence; -the sequence score takes into account all the domains. -All sequences scoring above the -.I -E -and -.I -T -cutoffs are shown in the first list, then -.I every -domain found in this list is -shown in the second list of domain hits. -If desired, E-value and bit score thresholds may also be applied -to the domain list using the -.I --domE -and -.I --domT -options. - -.SH OPTIONS - -.TP -.B -h -Print brief help; includes version number and summary of -all options, including expert options. - -.TP -.B -n -Specify that models and sequence are nucleic acid, not protein. -Other HMMER programs autodetect this; but because of the order in -which -.B hmmpfam -accesses data, it can't reliably determine the correct "alphabet" -by itself. - -.TP -.BI -A " " -Limits the alignment output to the -.I -best scoring domains. -.B -A0 -shuts off the alignment output and can be used to reduce -the size of output files. - -.TP -.BI -E " " -Set the E-value cutoff for the per-sequence ranked hit list to -.I , -where -.I -is a positive real number. The default is 10.0. Hits with E-values -better than (less than) this threshold will be shown. - -.TP -.BI -T " " -Set the bit score cutoff for the per-sequence ranked hit list to -.I , -where -.I -is a real number. -The default is negative infinity; by default, the threshold -is controlled by E-value and not by bit score. -Hits with bit scores better than (greater than) this threshold -will be shown. - -.TP -.BI -Z " " -Calculate the E-value scores as if we had seen a sequence database of -.I -sequences. The default is arbitrarily set to 59021, the size of -Swissprot 34. - -.SH EXPERT OPTIONS - -.TP -.B --acc -Report HMM accessions instead of names in the output reports. -Useful for high-throughput annotation, where the data are being -parsed for storage in a relational database. - -.TP -.B --compat -Use the output format of HMMER 2.1.1, the 1998-2001 public -release; provided so 2.1.1 parsers don't have to be rewritten. - -.TP -.BI --cpu " " -Sets the maximum number of CPUs that the program -will run on. The default is to use all CPUs -in the machine. Overrides the HMMER_NCPU -environment variable. Only affects threaded -versions of HMMER (the default on most systems). - -.TP -.B --cut_ga -Use Pfam GA (gathering threshold) score cutoffs. -Equivalent -to --globT --domT , but the GA1 and GA2 cutoffs -are read from each HMM in -.I hmmfile -individually. hmmbuild puts these cutoffs there -if the alignment file was annotated in a Pfam-friendly -alignment format (extended SELEX or Stockholm format) and -the optional GA annotation line was present. If these -cutoffs are not set in the HMM file, -.B --cut_ga -doesn't work. - -.TP -.B --cut_tc -Use Pfam TC (trusted cutoff) score cutoffs. Equivalent -to --globT --domT , but the TC1 and TC2 cutoffs -are read from each HMM in -.I hmmfile -individually. hmmbuild puts these cutoffs there -if the alignment file was annotated in a Pfam-friendly -alignment format (extended SELEX or Stockholm format) and -the optional TC annotation line was present. If these -cutoffs are not set in the HMM file, -.B --cut_tc -doesn't work. - -.TP -.B --cut_nc -Use Pfam NC (noise cutoff) score cutoffs. Equivalent -to --globT --domT , but the NC1 and NC2 cutoffs -are read from each HMM in -.I hmmfile -individually. hmmbuild puts these cutoffs there -if the alignment file was annotated in a Pfam-friendly -alignment format (extended SELEX or Stockholm format) and -the optional NC annotation line was present. If these -cutoffs are not set in the HMM file, -.B --cut_nc -doesn't work. - -.TP -.BI --domE " " -Set the E-value cutoff for the per-domain ranked hit list to -.I , -where -.I -is a positive real number. -The default is infinity; by default, all domains in the sequences -that passed the first threshold will be reported in the second list, -so that the number of domains reported in the per-sequence list is -consistent with the number that appear in the per-domain list. - -.TP -.BI --domT " " -Set the bit score cutoff for the per-domain ranked hit list to -.I , -where -.I -is a real number. The default is negative infinity; -by default, all domains in the sequences -that passed the first threshold will be reported in the second list, -so that the number of domains reported in the per-sequence list is -consistent with the number that appear in the per-domain list. -.I Important note: -only one domain in a sequence is absolutely controlled by this -parameter, or by -.B --domT. -The second and subsequent domains in a sequence have a de facto -bit score threshold of 0 because of the details of how HMMER -works. HMMER requires at least one pass through the main model -per sequence; to do more than one pass (more than one domain) -the multidomain alignment must have a better score than the -single domain alignment, and hence the extra domains must contribute -positive score. See the Users' Guide for more detail. - -.TP -.BI --forward -Use the Forward algorithm instead of the Viterbi algorithm -to determine the per-sequence scores. Per-domain scores are -still determined by the Viterbi algorithm. Some have argued that -Forward is a more sensitive algorithm for detecting remote -sequence homologues; my experiments with HMMER have not -confirmed this, however. - -.TP -.BI --informat " " -Assert that the input -.I seqfile -is in format -.I ; -do not run Babelfish format autodection. This increases -the reliability of the program somewhat, because -the Babelfish can make mistakes; particularly -recommended for unattended, high-throughput runs -of HMMER. Valid format strings include FASTA, -GENBANK, EMBL, GCG, PIR, STOCKHOLM, SELEX, MSF, -CLUSTAL, and PHYLIP. See the User's Guide for a complete -list. - -.TP -.B --null2 -Turn off the post hoc second null model. By default, each alignment -is rescored by a postprocessing step that takes into account possible -biased composition in either the HMM or the target sequence. -This is almost essential in database searches, especially with -local alignment models. There is a very small chance that this -postprocessing might remove real matches, and -in these cases -.B --null2 -may improve sensitivity at the expense of reducing -specificity by letting biased composition hits through. - -.TP -.B --pvm -Run on a Parallel Virtual Machine (PVM). The PVM must -already be running. The client program -.B hmmpfam-pvm -must be installed on all the PVM nodes. -The HMM database -.I hmmfile -and an associated GSI index file -.IR hmmfile. gsi -must also be installed on all the PVM nodes. -(The GSI index is produced by the program -.BR hmmindex .) -Because the PVM implementation is I/O bound, -it is highly recommended that each node have a -local copy of -.I hmmfile -rather than NFS mounting a shared copy. -Optional PVM support must have been compiled into -HMMER for -.B --pvm -to function. - -.TP -.B --xnu -Turn on XNU filtering of target protein sequences. Has no effect -on nucleic acid sequences. In trial experiments, -.B --xnu -appears to perform less well than the default -post hoc null2 model. - - - -.SH SEE ALSO - -.PP -Master man page, with full list of and guide to the individual man -pages: see -.B hmmer(1). -.PP -A User guide and tutorial came with the distribution: -.B Userguide.ps -[Postscript] and/or -.B Userguide.pdf -[PDF]. -.PP -Finally, all documentation is also available online via WWW: -.B http://hmmer.wustl.edu/ - -.SH AUTHOR - -This software and documentation is: -.nf -@COPYRIGHT@ -HMMER - Biological sequence analysis with profile HMMs -Copyright (C) 1992-1999 Washington University School of Medicine -All Rights Reserved - - This source code is distributed under the terms of the - GNU General Public License. See the files COPYING and LICENSE - for details. -.fi -See the file COPYING in your distribution for complete details. - -.nf -Sean Eddy -HHMI/Dept. of Genetics -Washington Univ. School of Medicine -4566 Scott Ave. -St Louis, MO 63110 USA -Phone: 1-314-362-7666 -FAX : 1-314-362-7855 -Email: eddy@genetics.wustl.edu -.fi - - diff --git a/forester/archive/RIO/others/hmmer/documentation/man/hmmsearch.man b/forester/archive/RIO/others/hmmer/documentation/man/hmmsearch.man deleted file mode 100644 index 0073a07..0000000 --- a/forester/archive/RIO/others/hmmer/documentation/man/hmmsearch.man +++ /dev/null @@ -1,289 +0,0 @@ -.TH "hmmsearch" 1 @RELEASEDATE@ "HMMER @RELEASE@" "HMMER Manual" - -.SH NAME -.TP -hmmsearch - search a sequence database with a profile HMM - -.SH SYNOPSIS -.B hmmsearch -.I [options] -.I hmmfile -.I seqfile - -.SH DESCRIPTION - -.B hmmsearch -reads an HMM from -.I hmmfile -and searches -.I seqfile -for significantly similar sequence matches. - -.PP -.I seqfile -will be looked for first in the current working directory, -then in a directory named by the environment variable -.I BLASTDB. -This lets users use existing BLAST databases, if BLAST -has been configured for the site. - -.PP -.B hmmsearch -may take minutes or even hours to run, depending -on the size of the sequence database. It is a good -idea to redirect the output to a file. - -.PP -The output consists of four sections: a ranked list -of the best scoring sequences, a ranked list of the -best scoring domains, alignments for all the best scoring -domains, and a histogram of the scores. -A sequence score may be higher than a domain score for -the same sequence if there is more than one domain in the sequence; -the sequence score takes into account all the domains. -All sequences scoring above the -.I -E -and -.I -T -cutoffs are shown in the first list, then -.I every -domain found in this list is -shown in the second list of domain hits. -If desired, E-value and bit score thresholds may also be applied -to the domain list using the -.I --domE -and -.I --domT -options. - -.SH OPTIONS - -.TP -.B -h -Print brief help; includes version number and summary of -all options, including expert options. - -.TP -.BI -A " " -Limits the alignment output to the -.I -best scoring domains. -.B -A0 -shuts off the alignment output and can be used to reduce -the size of output files. - -.TP -.BI -E " " -Set the E-value cutoff for the per-sequence ranked hit list to -.I , -where -.I -is a positive real number. The default is 10.0. Hits with E-values -better than (less than) this threshold will be shown. - -.TP -.BI -T " " -Set the bit score cutoff for the per-sequence ranked hit list to -.I , -where -.I -is a real number. -The default is negative infinity; by default, the threshold -is controlled by E-value and not by bit score. -Hits with bit scores better than (greater than) this threshold -will be shown. - -.TP -.BI -Z " " -Calculate the E-value scores as if we had seen a sequence database of -.I -sequences. The default is the number of sequences seen in your -database file -.I . - -.SH EXPERT OPTIONS - -.TP -.B --compat -Use the output format of HMMER 2.1.1, the 1998-2001 public -release; provided so 2.1.1 parsers don't have to be rewritten. - -.TP -.BI --cpu " " -Sets the maximum number of CPUs that the program -will run on. The default is to use all CPUs -in the machine. Overrides the HMMER_NCPU -environment variable. Only affects threaded -versions of HMMER (the default on most systems). - -.TP -.B --cut_ga -Use Pfam GA (gathering threshold) score cutoffs. -Equivalent -to --globT --domT , but the GA1 and GA2 cutoffs -are read from the HMM file. hmmbuild puts these cutoffs there -if the alignment file was annotated in a Pfam-friendly -alignment format (extended SELEX or Stockholm format) and -the optional GA annotation line was present. If these -cutoffs are not set in the HMM file, -.B --cut_ga -doesn't work. - -.TP -.B --cut_tc -Use Pfam TC (trusted cutoff) score cutoffs. Equivalent -to --globT --domT , but the TC1 and TC2 cutoffs -are read from the HMM file. hmmbuild puts these cutoffs there -if the alignment file was annotated in a Pfam-friendly -alignment format (extended SELEX or Stockholm format) and -the optional TC annotation line was present. If these -cutoffs are not set in the HMM file, -.B --cut_tc -doesn't work. - -.TP -.B --cut_nc -Use Pfam NC (noise cutoff) score cutoffs. Equivalent -to --globT --domT , but the NC1 and NC2 cutoffs -are read from the HMM file. hmmbuild puts these cutoffs there -if the alignment file was annotated in a Pfam-friendly -alignment format (extended SELEX or Stockholm format) and -the optional NC annotation line was present. If these -cutoffs are not set in the HMM file, -.B --cut_nc -doesn't work. - -.TP -.BI --domE " " -Set the E-value cutoff for the per-domain ranked hit list to -.I , -where -.I -is a positive real number. -The default is infinity; by default, all domains in the sequences -that passed the first threshold will be reported in the second list, -so that the number of domains reported in the per-sequence list is -consistent with the number that appear in the per-domain list. - -.TP -.BI --domT " " -Set the bit score cutoff for the per-domain ranked hit list to -.I , -where -.I -is a real number. The default is negative infinity; -by default, all domains in the sequences -that passed the first threshold will be reported in the second list, -so that the number of domains reported in the per-sequence list is -consistent with the number that appear in the per-domain list. -.I Important note: -only one domain in a sequence is absolutely controlled by this -parameter, or by -.B --domT. -The second and subsequent domains in a sequence have a de facto -bit score threshold of 0 because of the details of how HMMER -works. HMMER requires at least one pass through the main model -per sequence; to do more than one pass (more than one domain) -the multidomain alignment must have a better score than the -single domain alignment, and hence the extra domains must contribute -positive score. See the Users' Guide for more detail. - -.TP -.BI --forward -Use the Forward algorithm instead of the Viterbi algorithm -to determine the per-sequence scores. Per-domain scores are -still determined by the Viterbi algorithm. Some have argued that -Forward is a more sensitive algorithm for detecting remote -sequence homologues; my experiments with HMMER have not -confirmed this, however. - -.TP -.BI --informat " " -Assert that the input -.I seqfile -is in format -.I ; -do not run Babelfish format autodection. This increases -the reliability of the program somewhat, because -the Babelfish can make mistakes; particularly -recommended for unattended, high-throughput runs -of HMMER. Valid format strings include FASTA, -GENBANK, EMBL, GCG, PIR, STOCKHOLM, SELEX, MSF, -CLUSTAL, and PHYLIP. See the User's Guide for a complete -list. - -.TP -.B --null2 -Turn off the post hoc second null model. By default, each alignment -is rescored by a postprocessing step that takes into account possible -biased composition in either the HMM or the target sequence. -This is almost essential in database searches, especially with -local alignment models. There is a very small chance that this -postprocessing might remove real matches, and -in these cases -.B --null2 -may improve sensitivity at the expense of reducing -specificity by letting biased composition hits through. - -.TP -.B --pvm -Run on a Parallel Virtual Machine (PVM). The PVM must -already be running. The client program -.B hmmsearch-pvm -must be installed on all the PVM nodes. -Optional PVM support must have been compiled into -HMMER. - -.TP -.B --xnu -Turn on XNU filtering of target protein sequences. Has no effect -on nucleic acid sequences. In trial experiments, -.B --xnu -appears to perform less well than the default -post hoc null2 model. - - - -.SH SEE ALSO - -.PP -Master man page, with full list of and guide to the individual man -pages: see -.B hmmer(1). -.PP -A User guide and tutorial came with the distribution: -.B Userguide.ps -[Postscript] and/or -.B Userguide.pdf -[PDF]. -.PP -Finally, all documentation is also available online via WWW: -.B http://hmmer.wustl.edu/ - -.SH AUTHOR - -This software and documentation is: -.nf -@COPYRIGHT@ -HMMER - Biological sequence analysis with profile HMMs -Copyright (C) 1992-1999 Washington University School of Medicine -All Rights Reserved - - This source code is distributed under the terms of the - GNU General Public License. See the files COPYING and LICENSE - for details. -.fi -See the file COPYING in your distribution for complete details. - -.nf -Sean Eddy -HHMI/Dept. of Genetics -Washington Univ. School of Medicine -4566 Scott Ave. -St Louis, MO 63110 USA -Phone: 1-314-362-7666 -FAX : 1-314-362-7855 -Email: eddy@genetics.wustl.edu -.fi - - diff --git a/forester/archive/RIO/others/hmmer/install-sh b/forester/archive/RIO/others/hmmer/install-sh deleted file mode 100755 index e9de238..0000000 --- a/forester/archive/RIO/others/hmmer/install-sh +++ /dev/null @@ -1,251 +0,0 @@ -#!/bin/sh -# -# install - install a program, script, or datafile -# This comes from X11R5 (mit/util/scripts/install.sh). -# -# Copyright 1991 by the Massachusetts Institute of Technology -# -# Permission to use, copy, modify, distribute, and sell this software and its -# documentation for any purpose is hereby granted without fee, provided that -# the above copyright notice appear in all copies and that both that -# copyright notice and this permission notice appear in supporting -# documentation, and that the name of M.I.T. not be used in advertising or -# publicity pertaining to distribution of the software without specific, -# written prior permission. M.I.T. makes no representations about the -# suitability of this software for any purpose. It is provided "as is" -# without express or implied warranty. -# -# Calling this script install-sh is preferred over install.sh, to prevent -# `make' implicit rules from creating a file called install from it -# when there is no Makefile. -# -# This script is compatible with the BSD install script, but was written -# from scratch. It can only install one file at a time, a restriction -# shared with many OS's install programs. - - -# set DOITPROG to echo to test this script - -# Don't use :- since 4.3BSD and earlier shells don't like it. -doit="${DOITPROG-}" - - -# put in absolute paths if you don't have them in your path; or use env. vars. - -mvprog="${MVPROG-mv}" -cpprog="${CPPROG-cp}" -chmodprog="${CHMODPROG-chmod}" -chownprog="${CHOWNPROG-chown}" -chgrpprog="${CHGRPPROG-chgrp}" -stripprog="${STRIPPROG-strip}" -rmprog="${RMPROG-rm}" -mkdirprog="${MKDIRPROG-mkdir}" - -transformbasename="" -transform_arg="" -instcmd="$mvprog" -chmodcmd="$chmodprog 0755" -chowncmd="" -chgrpcmd="" -stripcmd="" -rmcmd="$rmprog -f" -mvcmd="$mvprog" -src="" -dst="" -dir_arg="" - -while [ x"$1" != x ]; do - case $1 in - -c) instcmd="$cpprog" - shift - continue;; - - -d) dir_arg=true - shift - continue;; - - -m) chmodcmd="$chmodprog $2" - shift - shift - continue;; - - -o) chowncmd="$chownprog $2" - shift - shift - continue;; - - -g) chgrpcmd="$chgrpprog $2" - shift - shift - continue;; - - -s) stripcmd="$stripprog" - shift - continue;; - - -t=*) transformarg=`echo $1 | sed 's/-t=//'` - shift - continue;; - - -b=*) transformbasename=`echo $1 | sed 's/-b=//'` - shift - continue;; - - *) if [ x"$src" = x ] - then - src=$1 - else - # this colon is to work around a 386BSD /bin/sh bug - : - dst=$1 - fi - shift - continue;; - esac -done - -if [ x"$src" = x ] -then - echo "install: no input file specified" - exit 1 -else - true -fi - -if [ x"$dir_arg" != x ]; then - dst=$src - src="" - - if [ -d $dst ]; then - instcmd=: - chmodcmd="" - else - instcmd=mkdir - fi -else - -# Waiting for this to be detected by the "$instcmd $src $dsttmp" command -# might cause directories to be created, which would be especially bad -# if $src (and thus $dsttmp) contains '*'. - - if [ -f $src -o -d $src ] - then - true - else - echo "install: $src does not exist" - exit 1 - fi - - if [ x"$dst" = x ] - then - echo "install: no destination specified" - exit 1 - else - true - fi - -# If destination is a directory, append the input filename; if your system -# does not like double slashes in filenames, you may need to add some logic - - if [ -d $dst ] - then - dst="$dst"/`basename $src` - else - true - fi -fi - -## this sed command emulates the dirname command -dstdir=`echo $dst | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'` - -# Make sure that the destination directory exists. -# this part is taken from Noah Friedman's mkinstalldirs script - -# Skip lots of stat calls in the usual case. -if [ ! -d "$dstdir" ]; then -defaultIFS=' -' -IFS="${IFS-${defaultIFS}}" - -oIFS="${IFS}" -# Some sh's can't handle IFS=/ for some reason. -IFS='%' -set - `echo ${dstdir} | sed -e 's@/@%@g' -e 's@^%@/@'` -IFS="${oIFS}" - -pathcomp='' - -while [ $# -ne 0 ] ; do - pathcomp="${pathcomp}${1}" - shift - - if [ ! -d "${pathcomp}" ] ; - then - $mkdirprog "${pathcomp}" - else - true - fi - - pathcomp="${pathcomp}/" -done -fi - -if [ x"$dir_arg" != x ] -then - $doit $instcmd $dst && - - if [ x"$chowncmd" != x ]; then $doit $chowncmd $dst; else true ; fi && - if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dst; else true ; fi && - if [ x"$stripcmd" != x ]; then $doit $stripcmd $dst; else true ; fi && - if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dst; else true ; fi -else - -# If we're going to rename the final executable, determine the name now. - - if [ x"$transformarg" = x ] - then - dstfile=`basename $dst` - else - dstfile=`basename $dst $transformbasename | - sed $transformarg`$transformbasename - fi - -# don't allow the sed command to completely eliminate the filename - - if [ x"$dstfile" = x ] - then - dstfile=`basename $dst` - else - true - fi - -# Make a temp file name in the proper directory. - - dsttmp=$dstdir/#inst.$$# - -# Move or copy the file name to the temp name - - $doit $instcmd $src $dsttmp && - - trap "rm -f ${dsttmp}" 0 && - -# and set any options; do chmod last to preserve setuid bits - -# If any of these fail, we abort the whole thing. If we want to -# ignore errors from any of these, just make sure not to ignore -# errors from the above "$doit $instcmd $src $dsttmp" command. - - if [ x"$chowncmd" != x ]; then $doit $chowncmd $dsttmp; else true;fi && - if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dsttmp; else true;fi && - if [ x"$stripcmd" != x ]; then $doit $stripcmd $dsttmp; else true;fi && - if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dsttmp; else true;fi && - -# Now rename the file to the real destination. - - $doit $rmcmd -f $dstdir/$dstfile && - $doit $mvcmd $dsttmp $dstdir/$dstfile - -fi && - - -exit 0 diff --git a/forester/archive/RIO/others/hmmer/squid/00README b/forester/archive/RIO/others/hmmer/squid/00README deleted file mode 100644 index fa03d67..0000000 --- a/forester/archive/RIO/others/hmmer/squid/00README +++ /dev/null @@ -1,39 +0,0 @@ -SQUID - library of functions for biological sequence analysis -Copyright (C) 1992-2001 Washington University School of Medicine - -SQUID is a freely redistributable library of C code functions for -sequence analysis. SQUID also includes a number of small utility -programs. - -To install squid, see the file: - INSTALL -- instructions for installing the programs - -If you have any questions about redistributing squid or using -squid code in your own work, see the files: - COPYRIGHT -- copyright notice, and information on my distribution policy - LICENSE -- version 2 of the GNU Public License (see COPYRIGHT) - -For a web page with more information on squid, see: - http://www.genetics.wustl.edu/eddy/software/#squid - -You can always download the latest stable release of squid from: - ftp://ftp.genetics.wustl.edu/pub/eddy/software/squid.tar.gz - -The development codebase is available by anonymous CVS: - cvs -d :pserver:anonymous@skynet.wustl.edu:/repository/sre login - (password "anonymous") - cvs -d :pserver:anonymous@skynet.wustl.edu:/repository/sre checkout squid - -If you encounter any bugs in this library, or you have any questions -or comments, please e-mail me at the address below. Due to limited -personal time, I may not respond, but I do read all my mail. - - Sean Eddy - eddy@genetics.wustl.edu - - HHMI/Dept. of Genetics - Washington University School of Medicine - 660 South Euclid Box 8232 - Saint Louis Missouri 63110 - USA - diff --git a/forester/archive/RIO/others/hmmer/squid/Docs/abstract.tex b/forester/archive/RIO/others/hmmer/squid/Docs/abstract.tex deleted file mode 100644 index d27df57..0000000 --- a/forester/archive/RIO/others/hmmer/squid/Docs/abstract.tex +++ /dev/null @@ -1,7 +0,0 @@ -\begin{abstract} - -The {\tt squid} library is an evolving collection of C functions for -nucleic acid and protein sequence analysis. - -\end {abstract} - diff --git a/forester/archive/RIO/others/hmmer/squid/Docs/formats.tex b/forester/archive/RIO/others/hmmer/squid/Docs/formats.tex deleted file mode 100644 index f775cc9..0000000 --- a/forester/archive/RIO/others/hmmer/squid/Docs/formats.tex +++ /dev/null @@ -1,517 +0,0 @@ -% -------------------------------------------------------------- -% squid:formats.tex -% SRE, Wed Jul 14 17:54:59 1999 -% $CVS Id$ -% -------------------------------------------------------------- - -\chapter {Sequence file formats} - -\section{Summary} - -The software can handle a number of different file formats. By -default, it autodetects the file format, so you don't have to worry -about converting formats. Most common file formats are recognized, -including FASTA, Genbank, EMBL, Swissprot, PIR, and FASTA for -unaligned sequences, and GCG MSF, Clustal, Phylip, and Stockholm -format for multiple sequence alignments. Some parts of the source code -call the autodetector the ``Babelfish''. - -The Babelfish has three drawbacks. First, it takes a small amount of -time to do the autodetection. Second, the Babelfish is aggressive, and -it makes mistakes when a file isn't one of the known formats -- in -particular, it can recognize plain text files as SELEX alignments, -because the SELEX format is so free-form. Third, because the Babelfish -works by reading the first part of the file then rewinding it before -starting to process it, you can't use the Babelfish on a nonrewindable -stream: e.g. when you're taking sequence input from a UNIX pipe -instead of a file, or when the file is gzipped and has to be -decompressed before processing. In normal use, when you're using the -software interactively from the command line on sequence files that -you're familiar with, the Babelfish is very convenient and -(relatively) safe. - -However, you'll find that there are times when you want to override -the Babelfish -- particularly in high-throughput analysis, when you -know the format your files are supposed to be in, and you'd rather -increase robustness and sacrifice interactive flexibility. All the -programs have an \verb+--informat + option that lets you -specify the format and shut off the Babelfish. You \emph{must} use -\verb+--informat+ to use compressed files, or to read sequence from a -UNIX pipe... see below for more details on these tricks. - -\section{Formats recognized by the Babelfish} - -Recognized unaligned sequence file formats: - -\begin{tabular}{ll}\hline -Format name & Note \\ \hline -fasta & BLAST flatfile databases, etc.\\ -genbank & NCBI Genbank flat file format.\\ -embl & Includes both EMBL (DNA) and SWISSPROT (protein) databases.\\ -pir & Protein Information Resource database (NBRF/Georgetown)\\ -gcg & Wisconsin Genetics Computer Group; only allows one sequence per file.\\ -gcgdata & I think this GCG database format is obsolete now.\\ \hline -\end{tabular} - -Recognized multiple sequence alignment file formats: - -\begin{tabular}{ll}\hline -Format name & Note \\ \hline -stockholm & Pfam format. Allows databases of more than one alignment per file\\ -selex & Old NeXagen RNA alignment format, adopted by early HMMER releases.\\ -msf & GCG's alignment format.\\ -clustal & ClustalV, ClustalW, and friends.\\ -a2m & Aligned FASTA format; see comment below.\\ -phylip & Format used by Felsenstein's PHYLIP phylogenetic inference software\\\hline -\end{tabular} - -Aligned FASTA format (here called ``A2M'', though I believe that what -Haussler's group at UCSC started calling A2M is yet another variant of -aligned FASTA that's incompatible with this A2M) is only autodetected -when an alignment file is expected. Otherwise an A2M file will be -recognized as unaligned FASTA, and its gap characters (if any) will be -parsed as sequence characters -- often not what you want. - -Alignment files may be used when unaligned files are expected -- the -sequences will silently be de-aligned and read sequentially. The -converse is not true; you can't give an unaligned sequence format when -an alignment is expected (makes sense, right?). - -There is no provision for enforcing that single unaligned sequence -formats really do contain just a single sequence. An attempt to -convert a multisequence file to GCG format will silently ``succeed'', -and the file may look ok to your eye, but that multisequence ``GCG'' -file is illegal. The data will be corrupted if you try to read that -file back in, possibly without generating any error messages. - -It turns out that other formats work too, but they're undocumented, -not subjected to any quality control testing at software release time, -and prone to change without notice at my slightest whim. (In other -words, even less supported than the software already is.) The brave, -curious, or desperate are invited to peruse -\prog{seqio.c} and \prog{squid.h}. - -\section{Special tricks} - -\subsection{Reading from standard input (probably UNIX-only)} - -If you give ``-'' as a sequence filename, the software will read the -sequences from standard input rather than from a file. You will need -to specify the format of the incoming data using the -\verb+--informat+ option. -Any format except SELEX can be read from standard input. This lets you -use any program downstream in a standard UNIX pipe. - -There is one limitation: you can't use ``-'' more than once on a -command line, for obvious reasons. (How is it supposed to read more -than one file from one standard input stream?) If you do, behavior of -the software is undefined -- in other words, the software don't check -for whether you're making this mistake, so God help you if you do. - -\subsection{Reading from gzip'ed files (probably UNIX-only)} - -A sequence file in any format except SELEX can be compressed by gzip, -and read in its compressed form. The software looks for the suffix -\prog{.gz} to detect gzip'ed files. This allows you to save disk space -by keeping sequence files gzip'ed, if you like. gzip is not built in; -the software needs to find a gzip executable in your current PATH. - -If for some reason you name a file with a \prog{.gz} suffix and it's -\emph{not} a gzip-compressed file, the software will still try to -decompress it, and peculiar things may happen. - -\section{FASTA format, the recommended unaligned format} - -FASTA is probably the simplest of formats for unaligned sequences. -FASTA files are easily created in a text editor. Each sequence is -preceded by a line starting with \verb+>+. The first word on this line -is the name of the sequence. The rest of the line is a description of -the sequence (free format). The remaining lines contain the sequence -itself. You can put as many letters on a sequence line as you want. - -\textbf{Example of a simple FASTA file:} -\begin{verbatim} ->seq1 This is the description of my first sequence. -AGTACGTAGTAGCTGCTGCTACGTGCGCTAGCTAGTACGTCA CGACGTAGATGCTAGCTGACTCGATGC ->seq2 This is a description of my second sequence. -CGATCGATCGTACGTCGACTGATCGTAGCTACGTCGTACGTAG CATCGTCAGTTACTGCATGCTCG -\end{verbatim} - -For better or worse, FASTA is not a documented standard. Minor (and -major) variants are in widespread use in the bioinformatics community, -all of which are called ``FASTA format''. My software attempts to -cater to all of them, and is tolerant of common deviations in FASTA -format. Certainly anything that is accepted by the database formatting -programs in NCBI BLAST or WU-BLAST (e.g. setdb, pressdb, xdformat) -will also be accepted by my software. Blank lines in a FASTA file are -ignored, and so are spaces or other gap symbols (dashes, underscores, -periods) in a sequence. Other non-amino or non-nucleic acid symbols in -the sequence are also silently ignored, mostly because some people -seem to think that ``*'' or ``.'' should be added to protein sequences -to (redundantly) indicate the end of the sequence. The parser will -also accept unlimited line lengths, which allows it to accomodate the -enormous description lines in the NCBI NR databases. - -On the other hand, any FASTA files \emph{generated} by my software -adhere closely to community standards, and should be usable by other -software packages (BLAST, FASTA, etc.) that are more picky about -parsing their input files. That means you can run a sloppy FASTA file -thru \prog{sreformat} to clean it up. - -Partly because of this tolerance, the software may have a difficult -time dealing with files that are \textit{not} in FASTA format, -especially if you're relying on the Babelfish to do format -autodetection. Some (now mercifully uncommon) file formats are so -similar to FASTA format that they be erroneously called FASTA by the -Babelfish and then quietly but lethally misparsed. An example is the -old NBRF file format. If you're using \verb+--informat+, things will -be more robust, and the software should simply refuse to accept a -non-FASTA file -- but you shouldn't count on this, because files -perversely similar to FASTA will still confuse the parser. (The gist -of these caveats applies to all formats, not just FASTA.) - -\section{SELEX, the quick and dirty alignment format} - -An example of a simple SELEX alignment file: - -\begin{verbatim} -# Example selex file - -seq1 ACGACGACGACG. -seq2 ..GGGAAAGG.GA -seq3 UUU..AAAUUU.A - -seq1 ..ACG -seq2 AAGGG -seq3 AA...UUU -\end{verbatim} - -SELEX is an interleaved multiple alignment format that arose as a -simple, intuitive format that was easy to write and manipulate -manually in a text editor. It is usually easy to convert other -alignment formats into SELEX format, even with a couple of lines of -Perl, but it can be harder to go the other way, since SELEX is more -free-format than other alignment formats. For instance, GCG's MSF -format and the output of the CLUSTALV multiple alignment program are -similar interleaved formats that can be converted to SELEX just by -stripping a small number of non-sequence lines out. Because SELEX -evolved to accomodate different user input styles, it is very tolerant -of various inconsistencies such as different gap symbols, varying line -lengths, etc. - -Each line contains a name, followed by the aligned sequence. A space, -dash, underscore, or period denotes a gap. If the alignment is too -long to fit on one line, the alignment is split into multiple blocks, -separated by blank lines. The number of sequences, their order, and -their names must be the same in every block (even if a sequence has no -residues in a given block!) Other blank lines are ignored. You can add -comments to the file on lines starting with a \verb+#+. - -SELEX stands for ``Systematic Evolution of Ligands by Exponential -Enrichment'' -- it refers to the Tuerk and Gold technology for -evolving families of small RNAs for particular functions -\cite{Tuerk90b}. SELEX files were what we used to keep track of -alignments of these small RNA families, at a company then called -NeXagen, in Boulder. It's an interesting piece of historical baggage. -With the development of HMMER and more need for annotated alignments -in Pfam, SELEX format later evolved into ``extended SELEX'', with a -reserved comment style that allowed structural markup and other -annotations, but that became unwieldy. We now use Stockholm format -(see below) for highly annotated alignments. (Extended SELEX is -deprecated and undocumented.) Still, the basic SELEX format remains a -useful ``lowest common denominator'' alignment format, and has been -retained. - -\subsubsection {Detailed specification of a SELEX file} - -\begin{enumerate} -\item -Any line beginning with a \verb+#=+ as the first two characters is a -parsed machine comment in extended SELEX, and is now deprecated. - -\item -All other lines beginning with a \verb+%+ or \verb+#+ as the first -character are user comments. User comments are ignored by all -software. Anything may appear on these lines. Any number of comments -may be included in a SELEX file, and at any point. - -\item -Lines of data consist of a name followed by a sequence. The total -length of the line must be smaller than 4096 characters. - -\item -Names must be a single word. Any non-whitespace characters are -accepted. No spaces are tolerated in names: names MUST be a -single word. Names must be less than 32 characters long. - -\item In the sequence, any of the characters \verb+-_.+ or a space are -recognized as gaps. Any other characters are interpreted as sequence. -Sequence is case-sensitive. There is a common assumption by my -software that upper-case symbols are used for consensus (match) -positions and lower-case symbols are used for inserts. This language -of ``match'' versus ``insert'' comes from the hidden Markov model -formalism \cite{Krogh94}. To almost all of my software, this isn't -important, and it immediately converts the sequence to all upper-case -after it's read. - -\item -Multiple different sequences are grouped in a block of data lines. -Blocks are separated by blank lines. No blank lines are tolerated -between the sequence lines in a block. Each block in a multi-block -file of a long alignment must have its sequences in the same order in -each block. The names are checked to verify that this is the case; if -not, only a warning is generated. (In manually constructed files, some -users may wish to use shorthand names in subsequent blocks after an -initial block with full names -- but this isn't recommended.) -\end{enumerate} - -\section{Stockholm, the recommended multiple sequence alignment format} - -While we recommend a community standard format (FASTA) for unaligned -sequence files, the recommended multiple alignment file format is not -a community standard. The Pfam Consortium developed a format (based -on extended SELEX) called ``Stockholm format''. The reasons for this -are two-fold. First, there really is no standard accepted format for -multiple sequence alignment files, so we don't feel guilty about -inventing a new one. Second, the formats of popular multiple alignment -software (e.g. CLUSTAL, GCG MSF, PHYLIP) do not support rich -documentation and markup of the alignment. Stockholm format was -developed to support extensible markup of multiple sequence -alignments, and we use this capability extensively in both RNA work -(with structural markup) and the Pfam database (with extensive use of -both annotation and markup). - -\subsection{A minimal Stockholm file} -\begin{verbatim} -# STOCKHOLM 1.0 - -seq1 ACDEF...GHIKL -seq2 ACDEF...GHIKL -seq3 ...EFMNRGHIKL - -seq1 MNPQTVWY -seq2 MNPQTVWY -seq3 MNPQT... - -\end{verbatim} - -The simplest Stockholm file is pretty intuitive, easily generated in a -text editor. It is usually easy to convert alignment formats into a -``least common denominator'' Stockholm format. For instance, SELEX, -GCG's MSF format, and the output of the CLUSTALV multiple alignment -program are all similar interleaved formats. - -The first line in the file must be \verb+# STOCKHOLM 1.x+, where -\verb+x+ is a minor version number for the format specification -(and which currently has no effect on my parsers). This line allows a -parser to instantly identify the file format. - -In the alignment, each line contains a name, followed by the aligned -sequence. A dash or period denotes a gap. If the alignment is too long -to fit on one line, the alignment may be split into multiple blocks, -with blocks separated by blank lines. The number of sequences, their -order, and their names must be the same in every block. Within a given -block, each (sub)sequence (and any associated \verb+#=GR+ and -\verb+#=GC+ markup, see below) is of equal length, called the -\textit{block length}. Block lengths may differ from block to block; -the block length must be at least one residue, and there is no -maximum. - -Other blank lines are ignored. You can add comments to the file on -lines starting with a \verb+#+. - -All other annotation is added using a tag/value comment style. The -tag/value format is inherently extensible, and readily made -backwards-compatible; unrecognized tags will simply be ignored. Extra -annotation includes consensus and individual RNA or protein secondary -structure, sequence weights, a reference coordinate system for the -columns, and database source information including name, accession -number, and coordinates (for subsequences extracted from a longer -source sequence) See below for details. - -\subsection{Syntax of Stockholm markup} - -There are four types of Stockholm markup annotation, for per-file, -per-sequence, per-column, and per-residue annotation: - -\begin{wideitem} -\item {\emprog{#=GF }} - Per-file annotation. \prog{} is a free format text line - of annotation type \prog{}. For example, \prog{#=GF DATE - April 1, 2000}. Can occur anywhere in the file, but usually - all the \prog{#=GF} markups occur in a header. - -\item {\emprog{#=GS }} - Per-sequence annotation. \prog{} is a free format text line - of annotation type \prog{tag} associated with the sequence - named \prog{}. For example, \prog{#=GS seq1 - SPECIES_SOURCE Caenorhabditis elegans}. Can occur anywhere - in the file, but in single-block formats (e.g. the Pfam - distribution) will typically follow on the line after the - sequence itself, and in multi-block formats (e.g. HMMER - output), will typically occur in the header preceding the - alignment but following the \prog{#=GF} annotation. - -\item {\emprog{#=GC <...s...>} - Per-column annotation. \prog{<...s...>} is an aligned text line - of annotation type \prog{}. - \verb+#=GC+ lines are - associated with a sequence alignment block; \prog{<...s...>} - is aligned to the residues in the alignment block, and has - the same length as the rest of the block. - Typically \verb+#=GC+ lines are placed at the end of each block. - -\item {\emprog{#=GR <.....s.....>} - Per-residue annotation. \prog{<...s...>} is an aligned text line - of annotation type \prog{}, associated with the sequence - named \prog{}. - \verb+#=GR+ lines are - associated with one sequence in a sequence alignment block; - \prog{<...s...>} - is aligned to the residues in that sequence, and has - the same length as the rest of the block. - Typically - \verb+#=GR+ lines are placed immediately following the - aligned sequence they annotate. -\end{wideitem} - -\subsection{Semantics of Stockholm markup} - -Any Stockholm parser will accept syntactically correct files, but is -not obligated to do anything with the markup lines. It is up to the -application whether it will attempt to interpret the meaning (the -semantics) of the markup in a useful way. At the two extremes are the -Belvu alignment viewer and the HMMER profile hidden Markov model -software package. - -Belvu simply reads Stockholm markup and displays it, without trying to -interpret it at all. The tag types (\prog{#=GF}, etc.) are sufficient -to tell Belvu how to display the markup: whether it is attached to the -whole file, sequences, columns, or residues. - -HMMER uses Stockholm markup to pick up a variety of information from -the Pfam multiple alignment database. The Pfam consortium therefore -agrees on additional syntax for certain tag types, so HMMER can parse -some markups for useful information. This additional syntax is imposed -by Pfam, HMMER, and other software of mine, not by Stockholm format -per se. You can think of Stockholm as akin to XML, and what my -software reads as akin to an XML DTD, if you're into that sort of -structured data format lingo. - -The Stockholm markup tags that are parsed semantically by my software -are as follows: - -\subsubsection{Recognized #=GF annotations} -\begin{wideitem} -\item [\emprog{ID }] - Identifier. \emprog{} is a name for the alignment; - e.g. ``rrm''. One word. Unique in file. - -\item [\emprog{AC }] - Accession. \emprog{} is a unique accession number for the - alignment; e.g. - ``PF00001''. Used by the Pfam database, for instance. - Often a alphabetical prefix indicating the database - (e.g. ``PF'') followed by a unique numerical accession. - One word. Unique in file. - -\item [\emprog{DE }] - Description. \emprog{} is a free format line giving - a description of the alignment; e.g. - ``RNA recognition motif proteins''. One line. Unique in file. - -\item [\emprog{AU }] - Author. \emprog{} is a free format line listing the - authors responsible for an alignment; e.g. - ``Bateman A''. One line. Unique in file. - -\item [\emprog{GA }] - Gathering thresholds. Two real numbers giving HMMER bit score - per-sequence and per-domain cutoffs used in gathering the - members of Pfam full alignments. See Pfam and HMMER - documentation for more detail. - -\item [\emprog{NC }] - Noise cutoffs. Two real numbers giving HMMER bit score - per-sequence and per-domain cutoffs, set according to the - highest scores seen for unrelated sequences when gathering - members of Pfam full alignments. See Pfam and HMMER - documentation for more detail. - -\item [\emprog{TC }] - Trusted cutoffs. Two real numbers giving HMMER bit score - per-sequence and per-domain cutoffs, set according to the - lowest scores seen for true homologous sequences that - were above the GA gathering thresholds, when gathering - members of Pfam full alignments. See Pfam and HMMER - documentation for more detail. -\end{wideitem} - -\subsection{Recognized #=GS annotations} - -\begin{wideitem} -\item [\emprog{WT }] - Weight. \emprog{} is a positive real number giving the - relative weight for a sequence, usually used to compensate - for biased representation by downweighting similar sequences. - Usually the weights average 1.0 (e.g. the weights sum to - the number of sequences in the alignment) but this is not - required. Either every sequence must have a weight annotated, - or none of them can. - -\item [\emprog{AC }] - Accession. \emprog{} is a database accession number for - this sequence. (Compare the \prog{#=GF AC} markup, which gives - an accession for the whole alignment.) One word. - -\item [\emprog{DE }] - Description. \emprog{} is one line giving a description for - this sequence. (Compare the \prog{#=GF DE} markup, which gives - a description for the whole alignment.) -\end{wideitem} - - -\subsection{Recognized #=GC annotations} - -\begin{wideitem} -\item [\emprog{RF}] - Reference line. Any character is accepted as a markup for a - column. The intent is to allow labeling the columns with some - sort of mark. - -\item [\emprog{SS_cons}] - Secondary structure consensus. For protein alignments, - DSSP codes or gaps are accepted as markup: [HGIEBTSCX.-_], where - H is alpha helix, G is 3/10-helix, I is p-helix, E is extended - strand, B is a residue in an isolated b-bridge, T is a turn, - S is a bend, C is a random coil or loop, and X is unknown - (for instance, a residue that was not resolved in a crystal - structure). For RNA alignments - the symbols \verb+>+ and \verb+<+ are - used for base pairs (pairs point at each other). \verb-+- indicate - definitely single-stranded positions, and any gap symbol indicates - unassigned bases or single-stranded positions. This description - roughly follows \cite{Konings89}. - RNA pseudoknots are represented by alphabetic characters, with upper - case letters representing the 5' side of the helix and lower case - letters representing the 3' side. Note that this limits the - annotation to a maximum of 26 pseudoknots per sequence. - - -\item [\emprog{SA_cons}] - Surface accessibility consensus. 0-9, gap symbols, or X are - accepted as markup. 0 means <10\% accessible residue surface - area, 1 means <20\%, 9 means <100\%, etc. X means unknown - structure. -\end{wideitem} - -\subsection{Recognized #=GR annotations} - -\begin{wideitem} -\item [\emprog{SS}] - Secondary structure consensus. See \prog{#=GC SS_cons} above. -\item [\emprog{SA}] - Surface accessibility consensus. See \prog{#=GC SA_cons} above. -\end{wideitem} - - diff --git a/forester/archive/RIO/others/hmmer/squid/Docs/gsi-format.tex b/forester/archive/RIO/others/hmmer/squid/Docs/gsi-format.tex deleted file mode 100644 index 3170824..0000000 --- a/forester/archive/RIO/others/hmmer/squid/Docs/gsi-format.tex +++ /dev/null @@ -1,87 +0,0 @@ -% Mon Dec 5 15:23:18 1994 - -\section{GSI format} - -{\tt GSI} (``generic sequence index'') is a format for indexing -sequence databases. Database retrieval programs such as {\tt sfetch} -can read GSI files when they are available to enable fast retrieval of -a sequence from large databases. - -GSI files are created from sequence databases by Perl scripts. -Scripts are currently provided for indexing GenBank, SwissProt, -GenPept, FASTA, and PIR formatted databases. - -\subsection {GSI programmatic details} - -A single GSI file indexes one or more files in a sequence database. -It is a binary file consisting of a number of fixed-length records. -There are three types of records: one header record, one file record -for every file in the database, and one keyword record for every -sequence retrieval key. (The retrieval key is usually the sequence -name, but may also be a database accession number.) - -Every GSI record is 38 bytes long and contains three fields: 32 bytes -of text (31 bytes plus a trailing NUL byte), a 2 byte network short, -and a 4 byte network long. (``Network short'' and ``network long'' -refer to portable integer variables of fixed size and byte order. See -Perl manuals for a few more details.) - -The first record is a header. It contains a short identifying text -string (``GSI''), then the number of files indexed ({\tt nfiles}), and -the number of keywords indexed ({\tt nkeys}). - -The next {\tt nfiles} records (records 1..{\tt nfiles}) map file -numbers onto file names. The three fields are \verb+ +. These records must be in numerical order -according to their file numbers. Because of the 31-character -restriction on filename lengths, the sequence files will generally -have to be in the same directory as the GSI index file. The file -format number is defined in {\tt squid.h}: - -\begin{tabular}{rl} -0 & Unknown \\ -1 & Intelligenetics\\ -2 & Genbank\\ -4 & EMBL\\ -5 & GCG single sequence\\ -6 & Strider \\ -7 & FASTA\\ -8 & Zuker\\ -9 & Idraw\\ -12 & PIR\\ -13 & Raw\\ -14 & SQUID\\ -16 & GCG data library \\ -101& Stockholm alignment\\ -102& SELEX alignment\\ -103& GCG MSF alignment\\ -104& Clustal alignment\\ -105& A2M (aligned FASTA) alignment\\ -106& Phylip\\ -\end{tabular} - -The remaining records ({\tt nfiles}+1..{\tt nfiles+nkeys}) are for -mapping keys onto files and disk offsets. The three fields are -\verb+ +. These records must be -sorted in alphabetic order by their retrieval keys, because the -function GSIGetOffset() locates a keyword in the index file by a -binary search. - -\subsection{Relevant functions} -\begin{description} -\item[GSIOpen()] - Opens a GSI index file. -\item[GSIGetRecord()] - Gets three fields from the current record. -\item[GSIGetOffset()] - Looks up a keyword in a GSI index and returns a filename, - file format, and disk offset in the file. -\item[SeqfilePosition()] - Repositions an open sequence file to a given disk offset. -\item[GSIClose()] - Closes an open GSI index file. -\end{description} - - - - diff --git a/forester/archive/RIO/others/hmmer/squid/Docs/intro.tex b/forester/archive/RIO/others/hmmer/squid/Docs/intro.tex deleted file mode 100644 index 166c382..0000000 --- a/forester/archive/RIO/others/hmmer/squid/Docs/intro.tex +++ /dev/null @@ -1,2 +0,0 @@ -\section {Introduction} - diff --git a/forester/archive/RIO/others/hmmer/squid/Docs/main.tex b/forester/archive/RIO/others/hmmer/squid/Docs/main.tex deleted file mode 100644 index e469d1f..0000000 --- a/forester/archive/RIO/others/hmmer/squid/Docs/main.tex +++ /dev/null @@ -1,35 +0,0 @@ - -\documentstyle[jmb]{article} -\setcounter{secnumdepth}{0} -\input{psfig} - -\addtolength{\oddsidemargin}{-.5in} -\addtolength{\textwidth}{1in} -\addtolength{\topmargin}{-.5in} -\addtolength{\textheight}{1in} -\renewcommand{\baselinestretch}{1.2} - -\title{The SQUID sequence function library} - -\author{Sean R. Eddy \\ -MRC Laboratory of Molecular Biology\\ -Hills Road\\ -Cambridge CB2 2QH\\ -England\\ -sre@mrc-lmb.cam.ac.uk} - -\begin{document} -\bibliographystyle{jmb} -\nocite{TitlesOn} - -\maketitle - -\input{abstract} -\input{formats} -\input{selex} -\input{squid-format} -\input{gsi-format} - -\bibliography{master} - -\end{document} diff --git a/forester/archive/RIO/others/hmmer/squid/Docs/selex.tex b/forester/archive/RIO/others/hmmer/squid/Docs/selex.tex deleted file mode 100644 index c802e16..0000000 --- a/forester/archive/RIO/others/hmmer/squid/Docs/selex.tex +++ /dev/null @@ -1,153 +0,0 @@ -\section{ SELEX alignment file format } - -\subsection{ Example of a simple SELEX format file} - -\begin{verbatim} -# Example selex file - -seq1 ACGACGACGACG. -seq2 ..GGGAAAGG.GA -seq3 UUU..AAAUUU.A - -seq1 ..ACG -seq2 AAGGG -seq3 AA...UUU -\end{verbatim} - -SELEX is an interleaved multiple alignment format that evolved as an -intuitive format. SELEX files are easy to write and manipulate -manually with a text editor. It is usually easy to convert other -alignment formats into SELEX format; the output of the CLUSTALV -multiple alignment program and GCG's MSF format are similar -interleaved formats. Because it evolved to accomodate different user -input styles, it is very tolerant of various inconsistencies such as -different gap symbols, varying line lengths, etc. - -As the format evolved, more features have been added. To maintain -compatibility with past alignment files, the new features are added -using a reserved comment style. These extra features are usually -maintained by automated SELEX-generating software, such as the {\tt -koala} sequence alignment editor or my {\tt cove} and {\tt hmm} sequence -analysis packages. This extra information includes consensus and -individual RNA or protein secondary structure, per-sequence weights, a -reference coordinate system for the columns, and database source -information including name, accession number, and coordinates (for -subsequences extracted from a longer source sequence). - -\subsection {Specification of a SELEX file} - -\begin{enumerate} -\item -Any line beginning with a \verb+#=+ as the first two characters is a -machine ``comment''. \verb+#=+ comments are reserved for additional -data about the alignment. Usually these features are maintained by -software such as the {\tt koala} editor, not by hand. - -\item -All other lines beginning with a \verb+%+ or \verb+#+ as the first -character is a user comment. User comments are ignored by all -software. Any number of comments may be included. - -\item -Lines of data consist of a name followed by a sequence. The total -length of the line must be smaller than 1024 characters. - -\item -Names must be a single word. Any non-whitespace characters are -accepted. No spaces are tolerated in names: names MUST be a -single word. - -\item -In the sequence, any of the characters \verb+-_.+ or a space are -recognized as gaps. Gaps are converted to a '.'. Any other characters -are interpreted as sequence. Sequence is case-sensitive. There is a -common assumption by my software that upper-case symbols are used for -consensus (match) positions and lower-case symbols are used for -inserts. This language of ``match'' versus ``insert'' comes from the -hidden Markov model formalism \cite{Krogh94}. To almost all of my -software, this isn't important, and it immediately converts the -sequence to all upper-case after it's read. - -\item -Multiple different sequences are grouped in a block of data lines. -Blocks are separated by blank lines. No blank lines are tolerated -between the sequence lines in a block. Each block in a multi-block -file of a long alignment must have its sequences in the same order in -each block. The names are checked to verify that this is the case; if -not, only a warning is generated. (In manually constructed files, some -users may wish to use shorthand names after the first block with full -names, but this isn't recommended.) -\end{enumerate} - -\subsection {Special comments} - -\subsubsection {Secondary structure} - -I use one-letter codes to indicate secondary structures. Secondary -structure strings are aligned to sequence blocks just like additional -sequences. - -For RNA secondary structure, the symbols \verb+>+ and \verb+<+ are -used for base pairs (pairs point at each other). \verb-+- indicate -other single-stranded positions, {\tt .} indicates unassigned bases. -This description follows \cite{Konings89}. For protein secondary -structure, I use {\tt E} to indicate residues in $\beta$-sheet, {\tt -H} for those in $\alpha$-helix, {\tt L} for those in loops, and {\tt -.} for unassigned residues. - -RNA pseudoknots are represented by alphabetic characters, with upper -case letters representing the 5' side of the helix and lower case -letters representing the 3' side. Note that this restricts the -annotation to a maximum of 26 pseudoknots per sequence. - -Lines beginning with \verb+#=SS+ or \verb+#=CS+ are individual or -consensus secondary structure data, respectively. \verb+#=SS+ -individual secondary structure lines must immediately follow the -sequence they are associated with. There can only be one \verb+#=SS+ -per sequence. \verb+#=CS+ consensus secondary structure predictions -precede all the sequences in each block. There can only be one -\verb+#=CS+ per file. - -\subsubsection {Reference coordinate system} - -Alignments are usually numbered by some reference coordinate system, -often a canonical molecule. For instance, tRNA positions are numbered -by reference to the positions of yeast tRNA-Phe. - -A line beginning with \verb+#=RF+ preceding the sequences in a block -gives a reference coordinate system. Any non-gap symbol in the -\verb+#=RF+ line indicates that sequence positions in its columns are -numbered. For instance, the \verb+#=RF+ lines for a tRNA alignment -would have 76 non-gap symbols for the canonical numbered columns; they -might be the aligned tRNA-Phe sequence itself, or they might be just -X's. - -\subsubsection {Sequence header} - -Additional per-sequence information can be placed in a header before -any blocks appear. These lines, one per sequence and in exactly the -same order as the sequences appear in the alignment, are formatted -like \verb+#=SQ -+. - -This information includes a sequence weight (for compensating for -biased representation of subfamilies of sequences in the alignment); -source information, if the sequence came from a database, consisting -of identifier, accession number, and source coordinates; and a -description of the sequence. - -If a \verb+#=SQ+ line is present, all the fields must be present. If -no information is available for a field, use '-' for all the fields -except the source coordinates, which would be given as '0'. - -\subsubsection {Author} - -The first non-comment, non-blank line of the file may be a \verb+#=AU+ -``author'' line. There is a programmatic interface for -alignment-generating programs to record a short comment like \verb+11 -November 1993, by Feng-Doolittle v. 2.1.1+, and this comment will be -recorded on the \verb+#=AU+ line by \verb+WriteSELEX()+. - - - diff --git a/forester/archive/RIO/others/hmmer/squid/Docs/squid-format.tex b/forester/archive/RIO/others/hmmer/squid/Docs/squid-format.tex deleted file mode 100644 index e318534..0000000 --- a/forester/archive/RIO/others/hmmer/squid/Docs/squid-format.tex +++ /dev/null @@ -1,80 +0,0 @@ -\newpage -\section {SQUID format} - -SQUID format is a sequence database format similar to the PIR, -GenBank, and EMBL formats. The primary difference is that SQUID format -may optionally contain secondary structure annotation information for -the sequence. No other sequence format allows secondary structure -annotation, which is why SQUID format became necessary. - -An example SQUID format file: - -\begin{verbatim} -NAM DY9990 -SRC HSTGYA M27547 76..169::196 -DES Human Tyr-tRNA gene, clone pM6. -SEQ +SS - 1 ccttcgatagctcagctggtagagcggaggactgtagactgcggaaacgt - >>>>>>>..>>>>........<<<<.>>>>>................... - 51 ttgtggacatccttaggtcgctggttcaattccggctcgaagga - .........<<<<<.....>>>>>.......<<<<<<<<<<<<. -++ -NAM DY9991 -SRC HSTRNAYE M55611 1..93::93 -DES Human Tyr-tRNA precursor. -SEQ +SS - 1 ccttcgatagctcagctggtagagcggaggactgtagcctgtagaaacat - >>>>>>>..>>>>........<<<<.>>>>>................... - 51 ttgtggacatccttaggtcgctggttcgattccggctcgaagg - .........<<<<<.....>>>>>.......<<<<<<<<<<<< -++ -NAM DA0260 -SEQ - 1 GGGCGAAUAGUGUCAGCGGGAGCACACCAGACUUGCAAUCUGGUAGGGAG - 51 GGUUCGAGUCCCUCUUUGUCCACCA -++ -\end{verbatim} - - -\subsection {Specification of a SQUID file} - -\begin{enumerate} -\item There must be a line of the form \verb+NAM +. - -\item There may be an optional line \verb+SRC -..::+, which specified a database source for this -sequence, giving the database identifier (name), accession number, -start and end position in the database sequence, and the original -length of the database sequence, respectively. If a \verb+SRC+ line -is present, all of these values must be specified. If any values are -unknown, they may be set to \verb+-+ in the case of \verb++ and -\verb++ and \verb+0+ in the case of \verb++, \verb+, -and \verb++, and in these cases the values will be ignored. - -\item There may be an optional line \verb+DES + giving -a one-line description of the sequence. - -\item There must be a line of the form \verb-SEQ +SS- or \verb-SEQ-. -If the line contains \verb-+SS-, it means that the record contains -secondary structure annotation interleaved with the sequence. - -\item The sequence (and optional structure) immediately follow. There may be -optional numbering either before or after the sequence. The number of -characters per line is unimportant. Spaces and tabs are ignored. -There must be no non-numeric non-space characters on any lines except -sequence or structure annotation characters. Structure annotation is -fairly free-form; any alphabetic character or character in the set -\verb/_.-*?<>{}[]()!@#$%^&=+;:'|`~"\/ is accepted. There must -be one such character for every sequence character (preferably aligned -to the sequence, but in fact this is not checked for). Note that -spaces in the secondary structure annotation are not permitted, -except where they are aligned to gaps in the sequence. - -\item Sequence records are separated by a line of the form \verb-++-. -\end{enumerate} - - - - - - diff --git a/forester/archive/RIO/others/hmmer/squid/Docs/ssi-format.tex b/forester/archive/RIO/others/hmmer/squid/Docs/ssi-format.tex deleted file mode 100644 index b236d13..0000000 --- a/forester/archive/RIO/others/hmmer/squid/Docs/ssi-format.tex +++ /dev/null @@ -1,641 +0,0 @@ -% SRE, Mon Dec 25 13:00:46 2000 - -\documentclass[12pt]{report} -\usepackage{fullpage} -\usepackage{times} -\usepackage{epsfig} -%\usepackage{html} % From the LaTeX2html translator -\usepackage{apalike} -\setcounter{secnumdepth}{2} - -\input{macros} - -\begin{document} -\bibliographystyle{apalike} - -\section{SSI format} - -SSI format (Sequence/Subsequence Index format) indexes flatfile -databases by names and/or accessions, enabling fast retrieval. - -An SSI index is a binary file that stores sequence names or accessions -as \emph{keys} that it can look up rapidly. It differentiates between -\emph{primary keys} and \emph{secondary keys}. There is one and only -one primary key per sequence. There can be more than one secondary key -per sequence. Both primary and secondary keys must be unique -identifiers (no two records have the same key). A program (like -HMMER's distributed PVM implementation) that needs to step through -each sequence one at a time can refer to the list of primary keys. A -program solely concerned with flexible sequence retrieval (such as -SQUID's \prog{sfetch}) might consult an SSI index with accessions as -primary keys, and names as secondary keys. - -A single SSI file can index multiple sequence data files. This allows -indexing multifile databases (e.g. Genbank flatfile distributions). - -The SSI format is relatively simple and may prove useful for other -indexing tasks besides sequence names. HMMER uses SSI format to index -HMM files. - -\subsection{Special features of SSI} - -SSI superceded 1994's GSI format after human genome sequence files -started exceeding 2 GB filesystem limitations, and after problems in -the HMMER PVM implementation had to be hacked around. SSI has the -following additional features compared to GSI. - -\begin{description} -\item[Separate primary key section] -Primary keys are set apart in a separate section, enabling programs to -step through a guaranteed one-to-one mapping of keys to sequences. A -secondary key section adds many-to-one mapping of keys to sequences. - -\item[Arbitrary filename and key lengths] -File name lengths and key name lengths are effectively unlimited. - -\item[64-bit indexing] -For sequence files exceeding 2GB, on architectures that support 64-bit -filesystems (such as IRIX, Solaris, Tru64 UNIX, FreeBSD...), SSI -supports 64-bit indexing; depending on the system, file sizes may -theoretically be allowed to range up to millions of terabytes. - -\item[Fast subsequence extraction] -SSI can be used to greatly accelerate \emph{subsequence} extraction -from very long sequences (example: human chromosome contigs). The -sequence file must meet certain formatting conditions for this to -work; see below for details. -\end{description} - -\subsection{SSI API in SQUID} - -\subsubsection{Functions for using a SSI index file:} - -\begin{sreapi} -\item[int SSIOpen(char *filename, SSIFILE **ret\_sfp)] - -Opens the SSI index file \prog{filename} and returns a \prog{SSIFILE -*} stream through \prog{ret\_sfp}. Returns 0 on success, nonzero on -failure. The caller must eventually close this stream using -\prog{SSIClose()}. More than one index can be open at once. - -Error codes:\\ -\begin{tabular}{ll} -\prog{SSI\_ERR\_NOFILE} & failed to open file; doesn't exist or not readable\\ -\prog{SSI\_ERR\_BADMAGIC} & not a SSI file \\ -\prog{SSI\_ERR\_NO64BIT} & it has 64-bit offsets, and we can't support that\\ -\prog{SSI\_ERR\_FORMAT} & file appears to be corrupted\\ -\prog{SSI\_ERR\_MALLOC} & malloc failed \\ -\end{tabular} - -\item[int SSIGetOffsetByName(SSIFILE *sfp, char *key, int *ret\_fh, SSIOFFSET *ret\_offset)] - -Looks up the string \prog{key} in the open index \prog{sfp}. -\prog{key} can be either a primary or secondary key. If \prog{key} is -found, \prog{*ret\_fh} contains a unique handle on the file -that contains {key} (suitable for an \prog{SSIFileInfo()} call, or for -comparison to the handle of the last file that was opened for -retrieval), and \prog{offset} is filled in with the offset in that -file. Returns 0 on success, non-zero on error. - -Error codes:\\ -\begin{tabular}{ll} -\prog{SSI\_ERR\_NO\_SUCH\_KEY} & key not found \\ -\prog{SSI\_ERR\_NODATA} & fread() failed, file appears to be corrupted\\ -\end{tabular} - -\item[int SSIGetOffsetByNumber(SSIFILE *sfp, int nkey, int -*ret\_fh, SSIOFFSET *offset)] - -Retrieves information for primary key number \prog{nkey}. \prog{nkey} -ranges from 0..\prog{nprimary-1}. When the key is found, -\prog{*ret\_fh} contains a unique handle on the file that -contains {key} (suitable for an SSIFileInfo() call, or for comparison -to the handle of the last file that was opened for retrieval), and -\prog{offset} is filled in with the offset in that file. Returns 0 on -success, non-zero on error. - -Error codes:\\ -\begin{tabular}{ll} -\prog{SSI\_ERR\_SEEK\_FAILED} & failed to reposition in index file\\ -\prog{SSI\_ERR\_NO\_SUCH\_KEY} & key not found \\ -\prog{SSI\_ERR\_NODATA} & fread() failed, file appears to be corrupted\\ -\end{tabular} - -\item[int SSIGetSubseqOffset(SSIFILE *sfp, char *key, int -requested\_start, int *ret\_fh, -SSIOFFSET *record\_offset, SSIOFFSET *data\_offset, int *ret\_actual\_start)] - -Implements \prog{SSI\_FAST\_SUBSEQ}. - -Looks up the string \prog{key} in the open index \prog{sfp}, and -asks for the nearest offset to a subsequence starting at position -\prog{requested\_start} in the sequence (numbering the sequence 1..L). -\prog{key} can be either a primary or secondary key. If \prog{key} is -found, \prog{*ret\_fh} contains a unique handle on the file that -contains {key} (suitable for an SSIFileInfo() call, or for comparison -to the handle of the last file that was opened for retrieval); -\prog{record\_offset} contains the disk offset to the start of the -record; \prog{data\_offset} contains the disk offset either exactly at -the requested residue, or at the start of the line containing the -requested residue; \prog{ret\_actual\_start} contains the coordinate -(1..L) of the first valid residue at or after -\prog{data\_offset}. \prog{ret\_actual\_start} is $\leq$ -\prog{requested\_start}. Returns 0 on success, non-zero on failure. - -Error codes:\\ -\begin{tabular}{ll} -\prog{SSI\_ERR\_NO\_SUBSEQS} & this file or key doesn't allow subseq lookup\\ -\prog{SSI\_ERR\_NO\_SUCH\_KEY} & key not found \\ -\prog{SSI\_ERR\_RANGE} & the requested\_start is out of bounds\\ -\prog{SSI\_ERR\_NODATA} & fread() failed, file appears to be corrupted\\ -\end{tabular} - -\item[int SSISetFilePosition(FILE *fp, SSIOFFSET *offset] - -Uses \prog{offset} to sets the file position for \prog{fp} (usually an -open sequence file) relative to the start of the file. Hides the -details of system-dependent shenanigans necessary for file positioning -in large ($>2$ GB) files. Behaves just like \prog{fseek(fp, offset, -SEEK\_SET)} for 32 bit offsets and $<2$ GB files. Returns 0 on -success, nonzero on error. - -Error codes:\\ -\begin{tabular}{ll} -\prog{SSI\_ERR\_SEEK\_FAILED} & failed to reposition the file\\ -\end{tabular} - -\item[int SSIFileInfo(SSIFILE *sfp, int fh, char **ret\_filename, int *ret\_format)] - -Given a file handle \prog{fh} in an open index file \prog{sfp}, -retrieve file name \prog{ret\_filename} and the file format -\prog{ret\_format}. \prog{ret\_filename} is a pointer to a string -maintained internally by \prog{sfp}. It should not be free'd; -\prog{SSIClose(sfp)} will take care of it. - -Error codes:\\ -\begin{tabular}{ll} -\prog{SSI\_ERR\_BADARG} & no such file n\\ -\end{tabular} - -\item[void SSIClose(SSIFILE *sfp)] - -Close an open \prog{SSIFILE *}. -\end{sreapi} - -\subsubsection{Skeleton example code for using a SSI index file:} - -\small\begin{verbatim} - SSIFILE *sfp; - FILE *fp; - int fh; - char *seqfile; - int fmt; - SSIOFFSET offset; - - SSIOpen(``foo.gsi'', &sfp); - - /* Finding an entry by name - * (by number, with SSIGetOffsetByNumber(), is analogous) - */ - SSIGetOffsetByName(sfp, ``important_key'', &fh, &offset); - SSIGetFileInfo(sfp, fh, &seqfile, &fmt); - fp = fopen(seqfile, ``r''); /* more usually SeqfileOpen(), using fmt */ - SSIFilePosition(fp, &offset); - /* read the entry from there, do whatever... */ - free(seqfile); - fclose(fp); - - SSIClose(sfp); -\end{verbatim}\normalsize - -\subsubsection{Functions for creating a SSI index file:} - -\begin{sreapi} -\item[int SSIRecommendMode(char *file)] - -Examines the file and determines whether it should be indexed with -large file support or not; returns \prog{SSI\_OFFSET\_I32} for most -files, \prog{SSI\_OFFSET\_I64} for large files, or -1 on failure. - -\item[SSIINDEX *SSICreateIndex(int mode)] - -Creates and initializes a SSI index structure. Sequence file offset -type to be used is specified by \prog{mode}, which may be either -\prog{SSI\_OFFSET\_I32} or \prog{SSI\_OFFSET\_I64}. Returns a -pointer to the new structure, or NULL on failure. The caller must free -this structure with \prog{SSIFreeIndex()} when done. - -\item[int SSIGetFilePosition(FILE *fp, int mode, SSIOFFSET *ret\_offset)] - -Fills \prog{ret\_offset} with the current disk offset of \prog{fp}, -relative to the start of the file. {mode} is the type of offset to -use; it must be either \prog{SSI\_OFFSET\_I32} or -\prog{SSI\_OFFSET\_I64}. Returns 0 on success, non-zero on error. - -Error codes:\\ -\begin{tabular}{ll} -\prog{SSI\_ERR\_NO64BIT} & 64-bit mode unsupported on this system\\ -\prog{SSI\_ERR\_TELL\_FAILED} & failed to determine position in file\\ -\end{tabular} - -\item[int SSIAddFileToIndex(SSIINDEX *g, char *filename, int fmt, -int *ret\_fh)] - -Adds the sequence file \prog{filename}, which is known to be in format -\prog{fmt}, to the index \prog{g}. Creates and returns a unique -filehandle \prog{ret\_fh} for associating primary keys with this file -using \prog{SSIAddPrimaryKeyToIndex()}. Returns 0 on success, non-zero -on failure. - -Error codes:\\ -\begin{tabular}{ll} -\prog{SSI\_ERR\_TOOMANY\_FILES} & exceeded file number limit\\ -\prog{SSI\_ERR\_MALLOC} & a malloc() failed\\ -\end{tabular} - -\item[int SSISetFileForSubseq(SSIINDEX *g, int fh, int bpl, int rpl)] - -Set \prog{SSI\_FAST\_SUBSEQ} for the file indicated by filehandle -\prog{fh} in the index \prog{g}, setting parameters \prog{bpl} and -\prog{rpl} to the values given. \prog{bpl} is the number of bytes per -sequence data line. \prog{rpl} is the number of residues per sequence -data line. Caller must be sure that \prog{bpl} and \prog{rpl} do not -change on any line of any sequence record in the file (except for the -last data line of each record). If this is not the case in this file, -\prog{SSI\_FAST\_SUBSEQ} will not work, and this routine should not be -called. Returns 0 on success, non-zero on failure. - -\item[int SSIAddPrimaryKeyToIndex(SSIINDEX *g, char *key, int -fh, SSIOFFSET *r\_off, SSIOFFSET *d\_off, int L)] - -Puts a primary key \prog{key} in the index \prog{g}, while telling the -index that this primary key is in the file associated with filehandle -\prog{fh} and its record starts at position \prog{r\_off} in that -file. - -\prog{d\_off} and \prog{L} are optional; they may be left unset by -passing NULL and 0, respectively. (If one is provided, both must be -provided.) If they are provided, \prog{d\_off} gives the position of -the first line of sequence data in the record, and \prog{L} gives -the length of the sequence in residues. They are used when -\prog{SSI\_FAST\_SUBSEQ} is set for the sequence file. If -\prog{SSI\_FAST\_SUBSEQ} is not set for the file, \prog{d\_off} and -\prog{L} will be ignored even if they are available, so it doesn't -hurt for the indexing program to provide them; typically it won't know -whether it's safe to set \prog{SSI\_FAST\_SUBSEQ} for the whole file -until the whole file has been read and every key has already been -added to the index. - -Through \prog{ret\_kh} it provides a ``handle'' - a unique -identifier for the primary key - that any subsequent calls to -\prog{SSIAddSecondaryKeyToIndex()} will use to associate one or more -secondary keys with this primary key. - -Returns 0 on success, non-zero on error. - -Error codes:\\ -\begin{tabular}{ll} -\prog{SSI\_ERR\_TOOMANY\_KEYS} & exceeded primary key limit\\ -\prog{SSI\_ERR\_TOOMANY\_FILES} & filenum exceeds file limit\\ -\prog{SSI\_ERR\_MALLOC} & a malloc() failed\\ -\end{tabular} - - -\item[int SSIAddSecondaryKeyToIndex(SSIINDEX *g, char *key, char *pkey)] - -Puts a secondary key \prog{key} in the index \prog{g}, associating it -with a primary key \prog{pkey} that has already been added to the index -by \prog{SSIAddPrimaryKeyToIndex()}. -Returns 0 on success, non-zero on error. - -Error codes:\\ -\begin{tabular}{ll} -\prog{SSI\_ERR\_TOOMANY\_KEYS} & exceeded secondary key limit\\ -\prog{SSI\_ERR\_MALLOC} & a malloc() failed\\ -\end{tabular} - - - -\item[int SSIWriteIndex(char *file, SSIINDEX *g)] - -Writes complete index \prog{g} in SSI format to a binary file -\prog{file}. Does all overhead of sorting the primary and secondary -keys, and maintaining the association of secondary keys with primary -keys during and after the sort. Returns 0 on success, non-zero on -error. - -Error codes:\\ -\begin{tabular}{ll} -\prog{SSI\_ERR\_NOFILE} & an fopen() failed\\ -\prog{SSI\_ERR\_FWRITE} & an fwrite() failed\\ -\prog{SSI\_ERR\_MALLOC} & a malloc() failed\\ -\end{tabular} - - -\item[void SSIFreeIndex(SSIINDEX *g)] - -Free an index structure. -\end{sreapi} - - -\subsubsection{Other SSI functions:} - -\begin{sreapi} -\item[char *SSIErrorString(int n)] - -Returns a pointer to an internal string corresponding to error -\prog{n}, a return code from any of the functions in the API that -return non-zero on error. -\end{sreapi} - -\subsection{Detailed specification of SSI binary format} - -There are four sections to the SSI file: -\begin{sreitems}{\textbf{Secondary keys}} -\item[\textbf{Header}] -Contains a magic number indicating GSI version number, and -various information about the number and sizes of things in the index. - -\item[\textbf{Files}] -Contains one or more \emph{file records}, one per sequence file that's -indexed. These contain information about the individual files. - -\item[\textbf{Primary keys}] -Contains one or more \emph{primary key records}, one per primary key. - -\item[\textbf{Secondary keys}] -Contains one or more \emph{secondary key records}, one per secondary key. -\end{sreitems} - -All numeric quantities are stored as unsigned integers of known size -in network (bigendian) order, for maximum crossplatform portability of -the index files. \prog{sqd\_uint16}, \prog{sqd\_uint32}, and -\prog{sqd\_uint64} are typically typedef'd as \prog{unsigned short}, -\prog{unsigned int}, and \prog{unsigned long long} or \prog{unsigned -long} at SQUID compile-time. Values may need to be cast to signed -quantities, so only half of their dynamic range is valid -(e.g. 0..32,767 for values of type \prog{sqd\_uint16}; -0..2,146,483,647 (2 billion) for \prog{sqd\_uint32}; and 0..9.22e18 (9 -million trillion) for \prog{sqd\_uint64}). These typedef's are -handled automatically by the \prog{./configure} script (see -\prog{squidconf.h.in} before configuration, \prog{squidconf.h} after -configuration). If necessary, \prog{./configure}'s guess can be -overridden in \prog{squidconf.h} after configuration. - -\subsubsection{Header section} - -The header section contains: - -\vspace{1em} -\begin{tabular}{llrr} -Variable & Description & Bytes & Type \\\hline -\prog{magic} & SSI version magic number. & 4 & \prog{sqd\_uint32}\\ -\prog{flags} & Optional behavior flags (see below) & 4 & \prog{sqd\_uint32}\\ -\prog{nfiles} & Number of files in file section. & 2 & \prog{sqd\_uint16}\\ -\prog{nprimary} & Number of primary keys. & 4 & \prog{sqd\_uint32}\\ -\prog{nsecondary} & Number of secondary keys. & 4 & \prog{sqd\_uint32}\\ -\prog{flen} & Length of filenames (incl. '\verb+\0+') & 4 & \prog{sqd\_uint32}\\ -\prog{plen} & Length of primary key names (incl. '\verb+\0+') & 4 & \prog{sqd\_uint32}\\ -\prog{slen} & Length of sec. key names (incl. '\verb+\0+') & 4 & \prog{sqd\_uint32}\\ -\prog{frecsize} & \# of bytes in a file record & 4 & \prog{sqd\_uint32}\\ -\prog{precsize} & \# of bytes in a primary key record & 4 & \prog{sqd\_uint32}\\ -\prog{srecsize} & \# of bytes in a sec. key record & 4 & \prog{sqd\_uint32}\\ -\prog{foffset} & disk offset, start of file records & \dag & \dag\\ -\prog{poffset} & disk offset, start of primary key recs & \dag & \dag\\ -\prog{soffset} & disk offset, start of sec. key records & \dag & \dag\\ -\end{tabular} -\vspace{1em} - -The optional behavior flags are: - -\vspace{1em} -\begin{tabular}{lll} -Flag & Value& Note\\ \hline -\prog{SSI\_USE64} & $1 \ll 0$ & Large sequence files; all key offsets 64 bit.\\ -\prog{SSI\_USE64\_INDEX} & $1 \ll 1$ & Large index; GSI file itself uses 64-bit offsets.\\\hline -\end{tabular} -\vspace{1em} - -The optional behavior flags define whether the SSI file uses large -file (64-bit) offsets. This issue is discussed in greater detail -below (see ``Large sequence files and large indices''). Briefly: if -\prog{SSI\_USE64} is set, the sequence file is large, and all sequence -file offsets are 64-bit integers. If \prog{SSI\_USE64\_INDEX} is -set, the index file itself is large, and \prog{foffset}, -\prog{poffset}, and \prog{soffset} (that is, all offsets within the -index file itself, indicated as \dag\ in the above table) are 64-bit -integers. \footnote{In the current API it is not expected that -\prog{SSI\_USE64\_INDEX} would ever be set. The current index-writing -API keeps the entire index in RAM (it has to sort the keys), and would -presumably have to be modified or replaced to be able to generate very -large indices.} - -The reason to explicitly record various record sizes (\prog{frecsize}, -\prog{precsize}, \prog{srecsize}) and index file positions -(\prog{foffset}, \prog{poffset}, \prog{soffset}) is to allow future -extendibility. More fields might be added without breaking older SSI -parsers. The format is meant to be both forwards- and -backwards-compatible. - -\subsubsection{File section} - -The file section consists of \prog{nfiles} file records. Each record -is \prog{frecsize} bytes long, and contains: - -\vspace{1em} -\begin{tabular}{llrr} -Variable & Description & Bytes & Type \\\hline -\prog{filename} & Name of file (possibly including full path) & \prog{flen} & char *\\ -\prog{format} & Format code for file; see squid.h for definitions & 4 & \prog{sqd\_uint32} \\ -\prog{flags} & Optional behavior flags & 4 & \prog{sqd\_uint32} \\ -\prog{bpl} & Bytes per sequence data line & 4 & \prog{sqd\_uint32} \\ -\prog{rpl} & Residues per sequence data line & 4 & \prog{sqd\_uint32} \\\hline -\end{tabular} -\vspace{1em} - -When a SSI file is written, \prog{frecsize} is equal to the sum of -the sizes above. When a SSI file is read by a parser, it is possible -that \prog{frecsize} is larger than the parser expects, if the parser -is expecting an older version of the SSI format: additional fields -may be present, which increases \prog{frecsize}. The parser will only -try to understand the data up to the \prog{frecsize} it expected to -see, but still knows the absolutely correct \prog{frecsize} for -purposes of skipping around in the index file. - -Normally the SSI index resides in the same directory as the sequence -data file(s), so \prog{filename} is relative to the location of the -SSI index. In the event this is not true, \prog{filename} can contain -a full path. - -\prog{format} is a SQUID sequence file format code; e.g. something like -\prog{SQFILE\_FASTA} or \prog{MSAFILE\_STOCKHOLM}. These constants are defined -in \prog{squid.h}. - -Only one possible optional behavior flag is defined: - -\vspace{1em} -\begin{tabular}{lll} -Flag & Value& Note\\ \hline -\prog{SSI\_FAST\_SUBSEQ} & $1 \ll 0$ & Fast subseq retrieval is possible for this file.\\\hline -\end{tabular} -\vspace{1em} - -When \prog{SSI\_FAST\_SUBSEQ} is set, \prog{bpl} and \prog{rpl} are -nonzero. They can be used to calculate the offset of subsequence -positions in the data file. This is described in the optional behavior -section below. - -\subsubsection{Primary key section} - -The primary key section consists of \prog{nprimary} records. Each -record is \prog{precsize} bytes long, and contains: - -\vspace{1em} -\begin{tabular}{llrr} -Variable & Description & Bytes & Type \\\hline -\prog{key} & Key name (seq name, identifier, accession) & \prog{plen}& char *\\ -\prog{fnum} & File number (0..nfiles-1) & 2 & \prog{sqd\_uint16}\\ -\prog{offset1} & Offset to start of record & \ddag & \ddag \\ -\prog{offset2} & Offset to start of sequence data & \ddag & \ddag \\ -\prog{len} & Length of data (e.g. seq length, residues) & 4 & \prog{sqd\_uint32} \\\hline -\end{tabular} -\vspace{1em} - -The offsets are sequence file offsets (indicated by \ddag). They are -4 bytes of type \prog{sqd\_uint32} normally, 8 bytes of type -\prog{sqd\_uint32} if \prog{SSI\_USE64} is set, and \prog{sizeof(fpos\_t)} -bytes of type \prog{fpos\_t} if \prog{SSI\_FPOS\_T} is set. - -\prog{offset2} and \prog{len} are only meaningful if \prog{SSI\_FAST\_SUBSEQ} -is set on this key's file. \prog{offset2} gives the absolute disk -position of line 0 in the sequence data. \prog{len} is necessary for -bounds checking in a subsequence retrieval, to be sure we don't try to -reposition the disk outside the valid data. - -\subsubsection{Secondary key section} - -The secondary key section consists of \prog{nsecondary} records. Each -record is \prog{srecsize} bytes long, and contains: - -\vspace{1em} -\begin{tabular}{llrr} -Variable & Description & Bytes & Type \\\hline -\prog{key} & Key name (seq name, identifier, accession) & \prog{slen}& char *\\ -\prog{pkey} & Primary key & -\prog{plen}& char *\\\hline -\end{tabular} -\vspace{1em} - -All data are kept with the primary key records. Secondary keys are -simply translated to primary keys, then the primary key has to be -looked up. - -\subsection{Optional behaviors} - -\subsubsection{Large sequence files and large indices: 64-bit operation} - -Normally a SSI index file can be no larger than 2 GB, and can index -sequence files that are no larger than 2 GB each. This is due to -limitations in the ANSI C/POSIX standards, which were developed for -32-bit operating systems and filesystems. Most modern operating -systems allow larger 64-bit file sizes, but as far as I'm aware (Dec -2000), there are no standard interfaces yet for working with positions -(offsets) in large files. On many platforms, SSI can extend to full -64-bit capabilities, but on some platforms, it cannot. To understand -the limitations (of SSI, and possibly of my understanding) you need -to understand some details about what's happening behind the SSI API -and how I understand C API's to modern 64-bit OS's and hardware. - -First, some information on ANSI C APIs for file positioning. ANSI C -provides the portable functions \prog{fseek()} and \prog{ftell()} for -manipulating simple offsets in a file. They store the offset in a -\prog{long} (which ranges up to 2 GB). The Standard says we're allowed -to do arithmetic on this value if the file is binary. ANSI C also -provides \prog{fgetpos()} and \prog{fsetpos()} which store file -positions in an opaque data type called \prog{fpos\_t}. Modern -operating systems with large file support define \prog{fpos\_t} in a -way that permits files $>$2 GB. However, \prog{fpos\_t} is an opaque -type. It has two disadvantages compared to a simple arithmetic type -like \prog{long}: first, we're not allowed to do arithmetic on it, and -second, we can't store it in a binary file in an -architecture-independent manner. We need both features for SSI, -unfortunately. \footnote{Surely the professional C community has the -same problem; does \emph{everyone} hack around \prog{fpos\_t}?} - -Therefore we have to rely on system dependent features. Most operating -systems provide a non-compliant library call that returns an -arithmetic offset. Fully 64-bit systems typically give us a 64-bit -\prog{off\_t} and functions \prog{ftello()}/\prog{fseeko()} that work -with that offset. Many systems provide a ``transitional interface'' -where all normally named functions are 32-bits, but specially named -64-bit varieties are available: e.g. \prog{off\_t} is 32 bits, but -\prog{off64\_t} is 64 bits and we have functions \prog{ftello64()} and -\prog{fseeko64()}. Some systems provide a \prog{ftell64()} and -\prog{fseek64()} that work on offsets of type \prog{long long}. Many -popular systems may even provide more than one of these models, -depending on compiler flags. - -And, unfortunately, some systems provide none of these models (FreeBSD -for example). There, we will exploit the fact that most systems -(including FreeBSD) do in fact implement \prog{fpos\_t} as a simple -arithmetic type, such as an \prog{off\_t}, so we can misuse it. - -At compile time, SQUID's \prog{./configure} script tests for the -system's capabilities for 64-bit file offsets, and configures a -section in the \prog{squidconf.h} file. (The configuration includes a -custom autoconf macro, \prog{SQ\_ARITHMETIC\_FPOS\_T()}, to test -\prog{fpos\_t} and define \prog{ARITHMETIC\_FPOS\_T} if it is.) Four -possible 64-bit models are tested in the following order; if one of -them is possible, it will be used, and the constant -\prog{HAS\_64BIT\_FILE\_OFFSETS} is set. - -\begin{enumerate} -\item has \prog{ftello()}, \prog{fseeko()}; sizeof(\prog{off\_t}) $= 8$. -\item has \prog{ftello64()}, \prog{fseeko64()}; sizeof(\prog{off64\_t}) $= 8$. -\item has \prog{ftell64()}, \prog{fseek64()} -\item \prog{fpos\_t} is an arithmetic 64-bit type; (mis)use -\prog{fgetpos()}, \prog{fsetpos()}. -\end{enumerate} - - - -\subsubsection{Fast subsequence retrieval} - -In some files (notably vertebrate chromosome contigs) the size of each -sequence is large. It may be slow to extract a subsequence by first -reading the whole sequence into memory -- or even prohibitive, if the -sequence is so large that it can't be stored in memory. - -If the sequence data file is very consistently formatted so that each -line in each record (except the last one) is of the same length, in -both bytes and residues, we can determine a disk offset of the start -of any subsequence by direct calculation. -For example, a simple well-formatted FASTA -file with 50 residues per line would have 51 bytes per sequence line -(counting the '\verb+\0+') (\prog{bpl}=51, \prog{rpl}=50). Position $i$ in a sequence -$1..L$ will be on line $l = (i-1)/\mbox{\prog{rpl}}$, and line $l$ starts at -disk offset $l * \mbox{\prog{bpl}}$ relative to the start of the sequence -data. If there are no nonsequence characters in the data line except -the terminal '\verb+\0+' (which is true iff \prog{bpl} = \prog{rpl}+1 and 1 residue = 1 -byte), position $i$ can be precisely found: - -\[ -\mbox{relative offset of residue $i$} = -\left((i-1)/\mbox{\prog{rpl}}\right)*\mbox{\prog{bpl}} + (i-1) \% \mbox{ \prog{rpl}} -\] - -Even for sequence data lines with extra characters (e.g. spaces, -coordinates, whatever), fast subsequence retrieval is possible; a -parser can be positioned at the beginning of the appropriate line $l$, -which starts at residue $(l*\mbox{\prog{rpl}}) + 1$, and it can start reading -from there (e.g. the line that $i$ is on) rather than the beginning of -the whole sequence record. - -The program that creates the index is responsible for determining if -\prog{bpl} and \prog{rpl} are consistent throughout a file; if so, it -may set the \prog{SSI\_FAST\_SUBSEQ} flag for the file. Then any record -whose primary key carries the optional data offset (\prog(offset2)) -and sequence length data is available for subsequence position -calculations by \prog{SSIGetSubseqOffset()}. - -\end{document} \ No newline at end of file diff --git a/forester/archive/RIO/others/hmmer/squid/Formats/a2m b/forester/archive/RIO/others/hmmer/squid/Formats/a2m deleted file mode 100644 index 5001742..0000000 --- a/forester/archive/RIO/others/hmmer/squid/Formats/a2m +++ /dev/null @@ -1,200 +0,0 @@ ->GLB2_MORMR -...PIVD..SGSVSPLSDAEKNKIRAAW.DIVYKNYEKNGVDILVKFFTGTPAAQAFFPK -FKGLTTADALKKSSDVRWHAERIINAVNDAVKSMDDTEKMSMKLQELSVKHAQSFYVDRQ -YFKVLAGII.........ADTTAPGDAGFEKLMSMICILLSSAY....... ->GLBZ_CHITH -MKFIILALCVAAASALSGDQIGLVQST.YGKVKG....DSVGILYAVFKADPTIQAAFPQ -FVGK.DLDAIKGGAEFSTHAGRIVGFLGGVIDDLP...NIGKHVDALVATH.KPRGVTHA -QFNNFRAAFIAYLKGHV..DYTAAVEAAWGATFDAFFGAVFAK.......M ->HBA2_BOSMU -...V...........LSAADKGNVKAAW.GKVGGHAAEYGAEALERMFLSFPTTKTYFPH -FD.LSH.....GSAQVKGHGAKVAAALTKAVGHLDD...LPGALSELSDLHAHKLRVDPV -NFKLLSHSLLVTLASHLPSDFTPAVHASLDKFLANVSTVLTSKYR...... ->HBA2_GALCR -...V...........LSPTDKSNVKAAW.EKVGAHAGDYGAEALERMFLSFPTTKTYFPH -FD.LSH.....GSTQVKGHGKKVADALTNAVLHVDD...MPSALSALSDLHAHKLRVDPV -NFKLLRHCLLVTLACHHPAEFTPAVHASLDKFMASVSTVLTSKYR...... ->HBA4_SALIR -...S...........LSAKDKANVKAIW.GKILPKSDEIGEQALSRMLVVYPQTKAYFSH -WASVAP.....GSAPVKKHGITIMNQIDDCVGHMDD...LFGFLTKLSELHATKLRVDPT -NFKILAHNLIVVIAAYFPAEFTPEIHLSVDKFLQQLALALAEKYR...... ->HBAD_CHLME -...M...........LTADDKKLLTQLW.EKVAGHQEEFGSEALQRMFLTYPQTKTYFPH -FD.LHP.....GSEQVRGHGKKVAAALGNAVKSLDN...LSQALSELSNLHAYNLRVDPA -NFKLLAQCFQVVLATHLGKDYSPEMHAAFDKFLSAVAAVLAEKYR...... ->HBAD_PASMO -...M...........LTAEDKKLIQQIW.GKLGGAEEEIGADALWRMFHSYPSTKTYFPH -FD.LSQ.....GSDQIRGHGKKVVAALSNAIKNLDN...LSQALSELSNLHAYNLRVDPV -NFKFLSQCLQVSLATRLGKEYSPEVHSAVDKFMSAVASVLAEKYR...... ->HBAZ_HORSE -...S...........LTKAERTMVVSIW.GKISMQADAVGTEALQRLFSSYPQTKTYFPH -FD.LHE.....GSPQLRAHGSKVAAAVGDAVKSIDN...VAGALAKLSELHAYILRVDPV -NFKFLSHCLLVTLASRLPADFTADAHAAWDKFLSIVSSVLTEKYR...... ->HBA_AILME -...V...........LSPADKTNVKATW.DKIGGHAGEYGGEALERTFASFPTTKTYFPH -FD.LSP.....GSAQVKAHGKKVADALTTAVGHLDD...LPGALSALSDLHAHKLRVDPV -NFKLLSHCLLVTLASHHPAEFTPAVHASLDKFFSAVSTVLTSKYR...... ->HBA_ANSSE -...V...........LSAADKGNVKTVF.GKIGGHAEEYGAETLQRMFQTFPQTKTYFPH -FD.LQP.....GSAQIKAHGKKVAAALVEAANHIDD...IAGALSKLSDLHAQKLRVDPV -NFKFLGHCFLVVLAIHHPSLLTPEVHASMDKFLCAVATVLTAKYR...... ->HBA_COLLI -...V...........LSANDKSNVKAVF.AKIGGQAGDLGGEALERLFITYPQTKTYFPH -FD.LSH.....GSAQIKGHGKKVAEALVEAANHIDD...IAGALSKLSDLHAQKLRVDPV -NFKLLGHCFLVVVAVHFPSLLTPEVHASLDKFVLAVGTVLTAKYR...... ->HBA_ERIEU -...V...........LSATDKANVKTFW.GKLGGHGGEYGGEALDRMFQAHPTTKTYFPH -FD.LNP.....GSAQVKGHGKKVADALTTAVNNLDD...VPGALSALSDLHAHKLRVDPV -NFKLLSHCLLVTLALHHPADFTPAVHASLDKFLATVATVLTSKYR...... ->HBA_FRAPO -...V...........LSAADKNNVKGIF.GKISSHAEDYGAEALERMFITYPSTKTYFPH -FD.LSH.....GSAQVKGHGKKVVAALIEAANHIDD...IAGTLSKLSDLHAHKLRVDPV -NFKLLGQCFLVVVAIHHPSALTPEVHASLDKFLCAVGNVLTAKYR...... ->HBA_MACFA -...V...........LSPADKTNVKAAW.GKVGGHAGEYGAEALERMFLSFPTTKTYFPH -FD.LSH.....GSAQVKGHGKKVADALTLAVGHVDD...MPQALSALSDLHAHKLRVDPV -NFKLLSHCLLVTLAAHLPAEFTPAVHASLDKFLASVSTVLTSKYR...... ->HBA_MACSI -...V...........LSPADKTNVKDAW.GKVGGHAGEYGAEALERMFLSFPTTKTYFPH -FD.LSH.....GSAQVKGHGKKVADALTLAVGHVDD...MPQALSALSDLHAHKLRVDPV -NFKLLSHCLLVTLAAHLPAEFTPAVHASLDKFLASVSTVLTSKYR...... ->HBA_MESAU -...V...........LSAKDKTNISEAW.GKIGGHAGEYGAEALERMFFVYPTTKTYFPH -FD.VSH.....GSAQVKGHGKKVADALTNAVGHLDD...LPGALSALSDLHAHKLRVDPV -NFKLLSHCLLVTLANHHPADFTPAVHASLDKFFASVSTVLTSKYR...... ->HBA_PAGLA -...V...........LSSADKNNIKATW.DKIGSHAGEYGAEALERTFISFPTTKTYFPH -FD.LSH.....GSAQVKAHGKKVADALTLAVGHLED...LPNALSALSDLHAYKLRVDPV -NFKLLSHCLLVTLACHHPAEFTPAVHSALDKFFSAVSTVLTSKYR...... ->HBA_PHACO -...V...........LSAADKNNVKGIF.TKIAGHAEEYGAEALERMFITYPSTKTYFPH -FD.LSH.....GSAQIKGHGKKVVAALIEAVNHIDD...ITGTLSKLSDLHAHKLRVDPV -NFKLLGQCFLVVVAIHHPSALTPEVHASLDKFLCAVGTVLTAKYR...... ->HBA_PONPY -...V...........LSPADKTNVKTAW.GKVGAHAGDYGAEALERMFLSFPTTKTYFPH -FD.LSH.....GSAQVKDHGKKVADALTNAVAHVDD...MPNALSALSDLHAHKLRVDPV -NFKLLSHCLLVTLAAHLPAEFTPAVHASLDKFLASVSTVLTSKYR...... ->HBA_PROLO -...V...........LSPADKANIKATW.DKIGGHAGEYGGEALERTFASFPTTKTYFPH -FD.LSP.....GSAQVKAHGKKVADALTLAVGHLDD...LPGALSALSDLHAYKLRVDPV -NFKLLSHCLLVTLACHHPAEFTPAVHASLDKFFTSVSTVLTSKYR...... ->HBA_TRIOC -...V...........LSANDKTNVKTVF.TKITGHAEDYGAETLERMFITYPPTKTYFPH -FD.LHH.....GSAQIKAHGKKVVGALIEAVNHIDD...IAGALSKLSDLHAQKLRVDPV -NFKLLGQCFLVVVAIHHPSVLTPEVHASLDKFLCAVGNVLSAKYR...... ->HBB1_VAREX -...V..........HWTAEEKQLICSLW.GKI..DVGLIGGETLAGLLVIYPWTQRQFSH -FGNLSSPTAIAGNPRVKAHGKKVLTSFGDAIKNLDN...IKDTFAKLSELHCDKLHVDPT -NFKLLGNVLVIVLADHHGKEFTPAHHAAYQKLVNVVSHSLARRYH...... ->HBB2_TRICR -...V..........HLTAEDRKEIAAIL.GKV..NVDSLGGQCLARLIVVNPWSRRYFHD -FGDLSSCDAICRNPKVLAHGAKVMRSIVEATKHLDN...LREYYADLSVTHSLKFYVDPE -NFKLFSGIVIVCLALTLQTDFSCHKQLAFEKLMKGVSHALGHGY....... ->HBB2_XENTR -...V..........HWTAEEKATIASVW.GKV..DIEQDGHDALSRLLVVYPWTQRYFSS -FGNLSNVSAVSGNVKVKAHGNKVLSAVGSAIQHLDD...VKSHLKGLSKSHAEDLHVDPE -NFKRLADVLVIVLAAKLGSAFTPQVQAVWEKLNATLVAALSHGYF...... ->HBBL_RANCA -...V..........HWTAEEKAVINSVW.QKV..DVEQDGHEALTRLFIVYPWTQRYFST -FGDLSSPAAIAGNPKVHAHGKKILGAIDNAIHNLDD...VKGTLHDLSEEHANELHVDPE -NFRRLGEVLIVVLGAKLGKAFSPQVQHVWEKFIAVLVDALSHSYH...... ->HBB_CALAR -...V..........HLTGEEKSAVTALW.GKV..NVDEVGGEALGRLLVVYPWTQRFFES -FGDLSTPDAVMNNPKVKAHGKKVLGAFSDGLTHLDN...LKGTFAHLSELHCDKLHVDPE -NFRLLGNVLVCVLAHHFGKEFTPVVQAAYQKVVAGVANALAHKYH...... ->HBB_COLLI -...V..........HWSAEEKQLITSIW.GKV..NVADCGAEALARLLIVYPWTQRFFSS -FGNLSSATAISGNPNVKAHGKKVLTSFGDAVKNLDN...IKGTFAQLSELHCDKLHVDPE -NFRLLGDILVIILAAHFGKDFTPECQAAWQKLVRVVAHALARKYH...... ->HBB_EQUHE -...V..........QLSGEEKAAVLALW.DKV..NEEEVGGEALGRLLVVYPWTQRFFDS -FGDLSNPAAVMGNPKVKAHGKKVLHSFGEGVHHLDN...LKGTFAQLSELHCDKLHVDPE -NFRLLGNVLVVVLARHFGKDFTPELQASYQKVVAGVANALAHKYH...... ->HBB_LARRI -...V..........HWSAEEKQLITGLW.GKV..NVADCGAEALARLLIVYPWTQRFFAS -FGNLSSPTAINGNPMVRAHGKKVLTSFGEAVKNLDN...IKNTFAQLSELHCDKLHVDPE -NFRLLGDILIIVLAAHFAKDFTPDSQAAWQKLVRVVAHALARKYH...... ->HBB_MANSP -...V..........HLTPEEKTAVTTLW.GKV..NVDEVGGEALGRLLVVYPWTQRFFDS -FGDLSSPDAVMGNPKVKAHGKKVLGAFSDGLNHLDN...LKGTFAQLSELHCDKLHVDPE -NFKLLGNVLVCVLAHHFGKEFTPQVQAAYQKVVAGVANALAHKYH...... ->HBB_ORNAN -...V..........HLSGGEKSAVTNLW.GKV..NINELGGEALGRLLVVYPWTQRFFEA -FGDLSSAGAVMGNPKVKAHGAKVLTSFGDALKNLDD...LKGTFAKLSELHCDKLHVDPE -NFNRLGNVLIVVLARHFSKDFSPEVQAAWQKLVSGVAHALGHKYH...... ->HBB_RABIT -...V..........HLSSEEKSAVTALW.GKV..NVEEVGGEALGRLLVVYPWTQRFFES -FGDLSSANAVMNNPKVKAHGKKVLAAFSEGLSHLDN...LKGTFAKLSELHCDKLHVDPE -NFRLLGNVLVIVLSHHFGKEFTPQVQAAYQKVVAGVANALAHKYH...... ->HBB_SPECI -...V..........HLSDGEKNAISTAW.GKV..HAAEVGAEALGRLLVVYPWTQRFFDS -FGDLSSASAVMGNAKVKAHGKKVIDSFSNGLKHLDN...LKGTFASLSELHCDKLHVDPE -NFKLLGNMIVIVMAHHLGKDFTPEAQAAFQKVVAGVANALAHKYH...... ->HBB_SPETO -...V..........HLTDGEKNAISTAW.GKV..NAAEIGAEALGRLLVVYPWTQRFFDS -FGDLSSASAVMGNAKVKAHGKKVIDSFSNGLKHLDN...LKGTFASLSELHCDKLHVDPE -NFKLLGNMIVIVMAHHLGKDFTPEAQAAFQKVVAGVANALSHKYH...... ->HBB_SUNMU -...V..........HLSGEEKACVTGLW.GKV..NEDEVGAEALGRLLVVYPWTQRFFDS -FGDLSSASAVMGNPKVKAHGKKVLHSLGEGVANLDN...LKGTFAKLSELHCDKLHVDPE -NFRLLGNVLVVVLASKFGKEFTPPVQAAFQKVVAGVANALAHKYH...... ->HBB_TACAC -...V..........HLSGSEKTAVTNLW.GHV..NVNELGGEALGRLLVVYPWTQRFFES -FGDLSSADAVMGNAKVKAHGAKVLTSFGDALKNLDN...LKGTFAKLSELHCDKLHVDPE -NFNRLGNVLVVVLARHFSKEFTPEAQAAWQKLVSGVSHALAHKYH...... ->HBB_TRIIN -...V..........HLTPEEKALVIGLW.AKV..NVKEYGGEALGRLLVVYPWTQRFFEH -FGDLSSASAIMNNPKVKAHGEKVFTSFGDGLKHLED...LKGAFAELSELHCDKLHVDPE -NFRLLGNVLVCVLARHFGKEFSPEAQAAYQKVVAGVANALAHKYH...... ->HBB_TUPGL -...V..........HLSGEEKAAVTGLW.GKV..DLEKVGGQSLGSLLIVYPWTQRFFDS -FGDLSSPSAVMSNPKVKAHGKKVLTSFSDGLNHLDN...LKGTFAKLSELHCDKLHVDPE -NFRLLGNVLVRVLACNFGPEFTPQVQAAFQKVVAGVANALAHKYH...... ->HBB_URSMA -...V..........HLTGEEKSLVTGLW.GKV..NVDEVGGEALGRLLVVYPWTQRFFDS -FGDLSSADAIMNNPKVKAHGKKVLNSFSDGLKNLDN...LKGTFAKLSELHCDKLHVDPE -NFKLLGNVLVCVLAHHFGKEFTPQVQAAYQKVVAGVANALAHKYH...... ->HBE_PONPY -...V..........HFTAEEKAAVTSLW.SKM..NVEEAGGEALGRLLVVYPWTQRFFDS -FGNLSSPSAILGNPKVKAHGKKVLTSFGDAIKNMDN...LKTTFAKLSELHCDKLHVDPE -NFKLLGNVMVIILATHFGKEFTPEVQAAWQKLVSAVAIALAHKYH...... ->HBF1_URECA -..............GLTTAQIKAIQDHWFLNIKGCLQAAADSIFFKYLTAYPGDLAFFHK -FSSV.PLYGLRSNPAYKAQTLTVINYLDKVVDALGG..NAGALMKAKVPSH.DAMGITPK -HFGQLLKLVGGVFQEEF..SADPTTVAAWGDAAGVLVAAM..........K ->LGB1_PEA -GFTDKQEALVNSSSE.FKQNLPGYSILFYTIVLEKAP..AAKGL................ -FSFLKDTAGVEDSPKLQAHAEQVFGLVRDSAAQLRTKGEVVLGNATLGAIHVQKGVTNP. -HFVVVKEALLQTIKKASGNNWSEELNTAWEVAYDGLATAIKKAMKT....A ->LGB1_VICFA -GFTEKQEALVNSSSQLFKQNPSNYSVLFYTIILQKAP..TAKAM................ -FSFLKDSAGVVDSPKLGAHAEKVFGMVRDSAVQLRATGEVVLDGKD.GSIHIQKGVLDP. -HFVVVKEALLKTIKEASGDKWSEELSAAWEVAYDGLATAIK....A....A ->MYG_ESCGI -...V...........LSDAEWQLVLNIW.AKVEADVAGHGQDILIRLFKGHPETLEKFDK -FKHLKTEAEMKASEDLKKHGNTVLTALGGILKKKGH...HEAELKPLAQSHATKHKIPIK -YLEFISDAIIHVLHSRHPGDFGADAQAAMNKALELFRKDIAAKYKELGFQG ->MYG_HORSE -...G...........LSDGEWQQVLNVW.GKVEADIAGHGQEVLIRLFTGHPETLEKFDK -FKHLKTEAEMKASEDLKKHGTVVLTALGGILKKKGH...HEAELKPLAQSHATKHKIPIK -YLEFISDAIIHVLHSKHPGNFGADAQGAMTKALELFRNDIAAKYKELGFQG ->MYG_LYCPI -...G...........LSDGEWQIVLNIW.GKVETDLAGHGQEVLIRLFKNHPETLDKFDK -FKHLKTEDEMKGSEDLKKHGNTVLTALGGILKKKGH...HEAELKPLAQSHATKHKIPVK -YLEFISDAIIQVLQNKHSGDFHADTEAAMKKALELFRNDIAAKYKELGFQG ->MYG_MOUSE -...G...........LSDGEWQLVLNVW.GKVEADLAGHGQEVLIGLFKTHPETLDKFDK -FKNLKSEEDMKGSEDLKKHGCTVLTALGTILKKKGQ...HAAEIQPLAQSHATKHKIPVK -YLEFISEIIIEVLKKRHSGDFGADAQGAMSKALELFRNDIAAKYKELGFQG ->MYG_MUSAN -..................VDWEKVNSVW.SAVESDLTAIGQNILLRLFEQYPESQNHFPK -FKN.KSLGELKDTADIKAQADTVLSALGNIVKKKGS...HSQPVKALAATHITTHKIPPH -YFTKITTIAVDVLSEMYPSEMNAQVQAAFSGAFKIICSDIEKEYKAANFQG ->MYG_PROGU -...G...........LSDGEWQLVLNVW.GKVEGDLSGHGQEVLIRLFKGHPETLEKFDK -FKHLKAEDEMRASEELKKHGTTVLTALGGILKKKGQ...HAAELAPLAQSHATKHKIPVK -YLEFISEAIIQVLQSKHPGDFGADAQGAMSKALELFRNDIAAKYKELGFQG ->MYG_SAISC -...G...........LSDGEWQLVLNIW.GKVEADIPSHGQEVLISLFKGHPETLEKFDK -FKHLKSEDEMKASEELKKHGTTVLTALGGILKKKGQ...HEAELKPLAQSHATKHKIPVK -YLELISDAIVHVLQKKHPGDFGADAQGAMKKALELFRNDMAAKYKELGFQG diff --git a/forester/archive/RIO/others/hmmer/squid/Formats/clustal b/forester/archive/RIO/others/hmmer/squid/Formats/clustal deleted file mode 100644 index cebd347..0000000 --- a/forester/archive/RIO/others/hmmer/squid/Formats/clustal +++ /dev/null @@ -1,47 +0,0 @@ -CLUSTAL W(1.5) multiple sequence alignment - - -REF -----GCGGATTTAGCTCAGTTGGGAGAGCGCCAGACTGAAAATCTGGAGGTC-CTGTGT -A0380 -----GGGCTCGTAGATCAG-CGGTAGATCGCTTCCTTCGCAAGGAAGAGGCC-CTGGGT -A0500 -----GGGCTCGTAGATCAG-TGGCAGATCGCTTCCTTCGCAAGGAAGAGGCC-CGGGGT -A0501 -----GGGCTCGTAGATCAG-GGGTAGATCACTCCCTTGGCATGGGAGAGGCC-CCGGGT -A0502 -----GGGCCCATAGCTCAG-TGGTAGAGTGCCTCCTTTGCAAGGAGGATGCC-CAGGGT -A1140 -----GGGCCCTAAGCTCAGCTGGGAGAGCACCTGCCTTGCACGCAGGGGGTC-GACGGT -A1180 -----GGGCCCTTAGCTCAGCTGGGAGAGCACCTGCCTTGCACGCAGGGGGTC-GACGGT -A1540 -----GGAGCCTTAGCTCAGCTGGGAGAGCGCCTGCTTTGCACGCAGGAGGTC-AGCGGT -A1660 -----GGGGCTATAGCTCAGCTGGGAGAGCGCTTGCATGGCATGCAAGAGGTC-AGCGGT -A1661 -----GGGGGCATAGCTCAGCTGGGAGAGCGCCTGCTTTGCACGCAGGAGGTC-TGCGGT -A1662 -----GGGGCTATAGCTCAGCTGGGAGAGCGCCTGCTTTGCACGCAGGAGGTC-TGCGGT -A3920 -----GGGGGTATAGTATAATTGGTAGTACAGCAATCTTGCTCAATGCTTGTC--AAGGT -A6360 -----GGGCGTGTGGCGTAGTTGGTAGCGCGTTCGCTTAGCATGCGAAAGGTC-TCCGGT -A6400 -----GGGCGTGTGGCGTAGTCGGTAGCGCGCTCCCTTAGCATGGGAGAGGTC-TCCGGT -A7680 -----GGGGGCGTAGCTCAGATGGTAGAGCGCTCGCTTAGCATGTGAGAGGTA-CCGGGA -A7681 -----GGGGGCGTAGCTCAGATGGTAGAGCGCTCGCTTAGCATGCGAGAGGTA-CCGGGA -A9990 -----GGGGGATTAGCTCAAATGGTAGAGCGCTCGCTTAGCATGCGAGAGGTA-GCGGGA -A9991 -----GGGGAATTAGCTCAAATGGTAGAGCGCTCGCTTAGCATGCGAGAGGTA-GCGGGA -C0500 GCCAAGGTGGCAGAATTCGGC--CCAACGCATCCGCCTGCAGAGCGGAACCCCCGCCGGT -C1140 -----GGCAACAAGGCCAAGCGGCTAAGGCATGGGTCTGCAACACCCTGATC--ATCGGT - * * * * - -REF TCGATCCACAGAATTCGCACCA -A0380 TCAAATCCCAGCGAGTCCACCA -A0500 TCAAATCCCCGCGAGTCCACCA -A0501 TCAAATCCCGGCGAGTCCACCA -A0502 TCGAATCCCTGTGGGTCCACCA -A1140 TCGATCCCGTTAGGGTCCACCA -A1180 TCGATCCCGTTAGGGTCCACCA -A1540 TCGATCCCGCTAGGCTCCACCA -A1660 TCGATCCCGCTTAGCTCCACCA -A1661 TCGATCCCGCGCGCTCCCACCA -A1662 TCGATCCCGCATAGCTCCACCA -A3920 TCAAATCCTTGTATCTCCACCA -A6360 TCGACTCCGGACTCGTCCACCA -A6400 TCGATTCCGGACTCGTCCACCA -A7680 TCGATACCCGGCGCCTCCACCA -A7681 TCGATACCCGGCGCCTCCACCA -A9990 TCGATGCCCGCATCCTCCACCA -A9991 TCGATGCCCGCATTCTCCACCA -C0500 TCAAATCCGGCCCTTGGCTCCA -C1140 TCGAATCCGATTGTTGCCTCCA - ** * * * *** - diff --git a/forester/archive/RIO/others/hmmer/squid/Formats/embl b/forester/archive/RIO/others/hmmer/squid/Formats/embl deleted file mode 100644 index 717a99e..0000000 --- a/forester/archive/RIO/others/hmmer/squid/Formats/embl +++ /dev/null @@ -1,155 +0,0 @@ -ID XXPHA21 standard; DNA; PHG; 1635 BP. -XX -AC X02501; M23775; -XX -SV X02501.1 -XX -DT 28-JAN-1986 (Rel. 08, Created) -DT 12-SEP-1993 (Rel. 36, Last updated, Version 3) -XX -DE Bacteriophage 21 DNA for left end sequence with genes 1 and 2 -XX -KW overlapping genes. -XX -OS Bacteriophage 21 -OC Viruses; dsDNA viruses, no RNA stage; Tailed phages; Siphoviridae. -XX -RN [1] -RP 1-1635 -RX MEDLINE; 85237525. -RA Miller G., Feiss M.; -RT "Sequence of the left end of phage 21 DNA"; -RL J. Mol. Biol. 183:246-249(1985). -XX -DR SWISS-PROT; P36693; TERL_BPP21. -DR SWISS-PROT; P36694; TERS_BPP21. -XX -CC Data kindly reviewed (06-MAR-1986) by M. Feiss -XX -FH Key Location/Qualifiers -FH -FT source 1..1635 -FT /db_xref="taxon:10743" -FT /organism="Bacteriophage 21" -FT misc_feature 1..177 -FT /note="bacteriophage 21 cos segment" -FT misc_feature complement(31..41) -FT /note="integrative host factor (IHF) binding sequence 1" -FT misc_feature 75..85 -FT /note="IHF binding sequence 2" -FT misc_feature 175..185 -FT /note="IHF binding sequence 3" -FT RBS 178..181 -FT /note="pot. SD-sequence" -FT CDS 189..737 -FT /db_xref="SWISS-PROT:P36694" -FT /note="gp 1 (aa 1-182)" -FT /transl_table=11 -FT /protein_id="CAA26342.1" -FT /translation="MKVNKKRLAEIFNVDPRTIERWQSQGLPCASKGSKGIESVFDTAM -FT AIQWYAQRETDIENEKLRKELDDLRAAAESDLQPGTIDYERYRLTKAQADAQELKNARE -FT DGVVLETELFTFILQRVAQEISGILVRVPLTLQRKYPDISPSHLDVVKTEIAKASNVAA -FT KAGENVGGWIDDFRRAEGS" -FT RBS 699..702 -FT /note="pot. SD-sequence" -FT CDS 709..>1635 -FT /db_xref="SWISS-PROT:P36693" -FT /note="gp 2 (aa 1-309)" -FT /transl_table=11 -FT /protein_id="CAA26343.1" -FT /translation="MISDAQKAANAAGAIATGLLSLIIPVPLTTVQWANKHYYLPKESS -FT YTPGRWETLPFQVGIMNCMGNDLIRTVNLIKSARVGYTKMLLGVEAYFIEHKSRNSLLF -FT QPTDSAAEDFMKSHVEPTIRDVPALLELAPWFGRKHRDNTLTLKRFSSGVGFWCLGGAA -FT AKNYREKSVDVVCYDELSSFEPDVEKEGSPTLLGDKRIEGSVWPKSIRGSTPKIKGSCQ -FT IEKAANESAHFMRFYVPCPHCGEEQYLKFGDDASPFGLKWEKNKPESVFYLCEHHGCVI -FT HQSELDQSNGRWICENTGMWTRDGLMFF" -XX -SQ Sequence 1635 BP; 411 A; 356 C; 436 G; 432 T; 0 other; - gggcggcgac ctcgcggttt ttcactattt atgaaaattt ttcagggaaa atcgtgtcgg 60 - tacttctcga atataacttt ttgttttttt taatattgca tccgtaaagg tccgacatga 120 - aagtgtccga aaatgccttt ttctggcgtt ttcatgtcgg gccttgtatt tgataatggg 180 - ttgttttcat gaaggttaat aaaaagaggc ttgccgaaat tttcaacgtg gacccgcgga 240 - cgattgaacg ctggcagtct cagggactcc cttgcgcctc caaaggtagt aagggcattg 300 - aatctgtatt tgatactgcc atggcaattc agtggtatgc gcagagggaa actgatatcg 360 - aaaacgaaaa gctccgcaaa gaactggacg atttgcgtgc ggcagcggag tcagatttac 420 - aacccggcac cattgactat gaacgctacc ggctcacaaa agcgcaggca gatgcgcagg 480 - aactgaaaaa tgcccgtgaa gacggagtag tgctggaaac tgaactgttt accttcattc 540 - tgcaacgtgt ggcacaggag atttcgggga tacttgtgcg tgtgccgttg acattacagc 600 - gtaaatatcc ggacatttca ccatcacacc ttgatgtggt gaaaactgaa atcgcgaaag 660 - cctccaatgt tgcagctaag gccggtgaaa acgtgggcgg gtggatcgat gatttcagac 720 - gcgcagaagg cagctaatgc agccggtgcg atagctacag ggcttttatc tctcattatt 780 - cctgttccac tgacgacagt tcagtgggcc aataaacatt attaccttcc taaagagtcg 840 - tcttataccc cggggcgatg ggaaacactg ccgtttcagg ttggcatcat gaactgtatg 900 - ggcaacgatc tgattcgcac ggttaacctg attaaatctg cccgtgttgg ttatacaaag 960 - atgttgctgg gagtggaggc ttattttatt gagcataaat cacgcaacag ccttcttttt 1020 - cagcccacgg actcagctgc tgaagatttt atgaaatctc atgttgagcc aacgataagg 1080 - gatgttcctg cattgctgga gctggctcca tggttcggaa gaaaacaccg cgataatacg 1140 - ctcaccctga agcgtttttc ctccggtgtg gggttctggt gtctgggtgg tgcggcagca 1200 - aaaaactacc gtgaaaaatc cgtggatgtg gtctgttatg acgagctttc ctcgttcgaa 1260 - ccggatgttg aaaaagaggg ttcgccaacc ctgctggggg ataaacgtat tgagggctct 1320 - gtatggccaa aatccattcg cggctcgacg ccaaaaatca aaggctcctg tcagatcgaa 1380 - aaagccgcta acgagtcggc acacttcatg cgtttttatg tgccctgtcc gcactgtggg 1440 - gaggagcagt atctgaaatt tggcgatgat gcctcgcctt tcggtcttaa gtgggagaag 1500 - aataagccag aaagtgtttt ctacctttgc gagcatcatg gctgtgtgat ccatcagtct 1560 - gagcttgacc agagtaacgg gcggtggatc tgtgaaaaca cgggcatgtg gacccgtgac 1620 - ggcctgatgt ttttc 1635 -// -ID XXPHI80 standard; DNA; PHG; 233 BP. -XX -AC X01639; -XX -SV X01639.1 -XX -DT 02-JUL-1986 (Rel. 09, Created) -DT 02-JUL-1986 (Rel. 09, Last updated, Version 1) -XX -DE Bacteriophage phi 80 DNA-fragment with replication origin -XX -KW origin of replication. -XX -OS Bacteriophage phi-80 -OC Viruses; dsDNA viruses, no RNA stage; Tailed phages; Siphoviridae; -OC Lambda phage group; bacteriophage lambda. -XX -RN [1] -RP 1-233 -RX MEDLINE; 79135017. -RA Grosschedl R., Hobom G.; -RT "DNA sequences and structural homologies of the replication origins of -RT lambdoid bacteriophages"; -RL Nature 277:621-627(1979). -XX -FH Key Location/Qualifiers -FH -FT source 1..233 -FT /db_xref="taxon:10713" -FT /organism="Bacteriophage phi-80" -FT rep_origin 40..187 -FT /note="origin of replication of phi 21" -FT misc_feature 40..128 -FT /note="pot. binding site for initiator protein" -FT repeat_region 50..55 -FT /note="multiple repeated sequence I" -FT misc_feature 61..66 -FT /note="inverted repeat of sequence I" -FT repeat_region 71..76 -FT /note="direct repeat of I" -FT misc_feature 82..87 -FT /note="inverted repeat of I" -FT repeat_region 92..97 -FT /note="direct repeat of I" -FT misc_feature 103..108 -FT /note="inverted repeat of I" -FT repeat_region 113..117 -FT /note="imp. direct repeat of I" -FT misc_feature 129..155 -FT /note="pot. region of replicational primer start site" -FT misc_feature 156..187 -FT /note="pot. binding site for initiator protein" -XX -SQ Sequence 233 BP; 91 A; 51 C; 48 G; 43 T; 0 other; - ggaccaaata aaaacatctc agaatggtgc atcctcaaaa cgagggaaaa tcccctaaaa 60 - cgagggataa aacatccctc aaattggggg attgctatcc ctcaaaacag ggggacacaa 120 - aagacactat tacaaaagaa aaaagaaaag attattcgtc agagaattct ggcgaatcct 180 - ctgaccagcc agaaaacgac ctttctgtgg tgaaaccgga tgctgcaatt cag 233 -// diff --git a/forester/archive/RIO/others/hmmer/squid/Formats/fasta b/forester/archive/RIO/others/hmmer/squid/Formats/fasta deleted file mode 100644 index 783cf51..0000000 --- a/forester/archive/RIO/others/hmmer/squid/Formats/fasta +++ /dev/null @@ -1,26 +0,0 @@ ->AC3.1 CE05131 (CAMBRIDGE) -MAQTLLSRHFRGVNCYFIFLNFAGWLMDLHLSTFMQFIPLFPVFGGYCTGLLTQIFRIDDSFQTTYTAFTICLVASALNS -CFVRKHQAISKISSKYLLDNVTYCIVIFLLNIYPVIAASLLYLSMLNKSEQVELVKSVYPNLVDKFASLPNYVVFDSNIW -AIVFFAFIFFGCTYTLVLIVTTTYQMFKILDDNRKHISASNYAKHRATLRSLLAQFTTCFLIVGPASLFSLLVVIRYEHS -QVATHWTIVALTLHSSANAIIIPLRIISISTVYSPEYKNSNGAKYCSNNNNPLTSNFPSFKLVNKLFRLLMILFYYFKLK -VPDLLSFCDFTHFDNPESKFYKIHIAKIRLNCSLIF ->AC3.2 CE05132 UDP-GLUCURONOSYLTRANSFERASE (CAMBRIDGE) -MLHFLSVLRSEETNFLKISKLKKLKTCILNFSIKYGLFEFVKVNHQISILGMYTFLFLLLSLLAVDAGKILVYSPSISRS -HLISNGRIADALVDAGHDVVMFITEYEPLTEFTGTKKAKVITMKGFSTKFAEDMDGIGEYLLSSSRLSFLERLMFEKTCT -GACDDLMTRREELEQLRAYNFDVAFSEQIDLCGVGIVRYLGIKNHLWISTTPIMDAVSYNLGIPAPSSYVPTIEENDNGD -KMDFWQRTFNLYMKIGSILIHRYGTDGTTEVFRKYIPDFPNVREIAANSSLCFVNSDEVLDLPRPTITKAIYVGGLGIPK -VSKPLDKKFTNIMSKGKEGVVIISLGSIIPFGDLPAAAKEGVLRAIQEISDYHFLIKIAKGDNNTKKLVEGIKNVDVAEW -LPQVDILSHPRLKLFVMHGGINGLVETAIQAVPTVIVPVFADQFRNGRMVEKRGIGKVLLKLDIGYESFKNTVLTVLNTP -SYKKNAIRIGKMMRDKPFSPEERLTKWTQFAIDHGVLEELHVEGSRLNTIIYYNLDVIAFVLFVFVAVLHVFIYAFKFLC -CDCYDLISYSSPSSCSFSSILVYSPSISRSHLISNGRIADALVDAGHDVVMFITEYEPLTEFTGTKKAKVRSTMIIQWTI -LGSTLLLIQEQIFWKGLCTKNGSLIFVMVICFKILNTKSNILNLDLMARREELEQLRAYNFDVAFSEQIDLCGVRIVRYL -GIKNHLWISTTPIMDAVSYNLGIPAPSSYVPTIEENDNGDKMDFWQRTFSLYMKIGAILIHRYATDSTTEVFRKYIPDFP -NVREIAANSSLCFVNSDEVLDLPRLTITKTIYVGGLGTPNISQHLDNVFAKIMSKGKRGVIIISLGSFVQFGDFPVNIKK -EVFRAISELSEYHFLIKISKDDTNTKTLTKEISNVDLVHWFPQVDLLSNPRLKLFIMHGGINGLVEKFF ->AC3.3 CE05133 (CAMBRIDGE) -MRFIAIAALIASSVLLAEATTIRDKRQSCGCAPRVQPSCSCQRTTYTQPQQYSCSCQNTAPVQKSCSCAQPVQQQTYQIQ -ASQCAPACQQSCQNQCQSAPSVSQCQSTCQQSCQTSSCYTPTTPAPVQCQPSCMPACEQSCVVQTPAPVQCVPQCQQQCQ -QQCVQTQPIQQCQPQCQQQCVQQCAPTTTAAPQIIKINMEISAQCVPQCQQSCQQQCVQQQVPAQQCNQQCTQQCQTTCQ -QAVPQCQQQCAPQCQQPSAPQCQQCQNTCQQAAPVCQQQCAPQCQQQSAPACQQCQTSCQQTQQCQQQCTPQCQQPSAPQ -CQQCQSACQAPVATTAAPQVVTIILEASVSQSAQCEPQCQQSCQQQCVQQQQPMQQCAPACTQSCSQSCSAAQPAQMPCQ -TQSVNSCSCQQNYSPCGNGQCCKRK diff --git a/forester/archive/RIO/others/hmmer/squid/Formats/formattest.pl b/forester/archive/RIO/others/hmmer/squid/Formats/formattest.pl deleted file mode 100755 index 9eafaac..0000000 --- a/forester/archive/RIO/others/hmmer/squid/Formats/formattest.pl +++ /dev/null @@ -1,97 +0,0 @@ -#! /usr/local/bin/perl - -$binpath = shift; - -# Suck in the regression data on our file format test suite. -# - -print "Format test suite...\t"; - -open(DAT,"regression.dat") || die "failed to open regression.dat"; -$nfiles = 0; -while () { - if (/^\#/) { next; } - if (/^(\S+)\s+(\S+)\s+(\S+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\S+)\s+(\S+)\s+(\S+)/) { - $filename[$nfiles] = $1; - $format[$nfiles] = $2; - $seqtype[$nfiles] = $3; - $nseq[$nfiles] = $4; - $nres[$nfiles] = $5; - $shortest[$nfiles] = $6; - $longest[$nfiles] = $7; - if ($8 eq "yes") { $autodetect[$nfiles] = 1; } else { $autodetect[$nfiles] = 0; } - if ($9 eq "yes") { $is_alignment[$nfiles] = 1; } else { $is_alignment[$nfiles] = 0; } - if ($10 eq "yes") { $is_singleseq[$nfiles] = 1; } else { $is_singleseq[$nfiles] = 0; } - $nfiles++; - } -} -close(DAT); - -# Test 1. -# Run seqstat on every file in two modes; -# autodetecting (if format allows it), then forcing a format with --informat. -# -for ($i = 0; $i < $nfiles; $i++) { - if ($autodetect[$i]) { - $output = `$binpath/seqstat $filename[$i]`; - if ($? != 0) { die "seqstat failed, autodetecting, on $filename[$i]"; } - ($ns, $nr, $fr, $to) = &parse_seqstat($output); - if ($ns != $nseq[$i] || - $nr != $nres[$i] || - $fr != $shortest[$i] || - $to != $longest[$i]) - { die "seqstat regression failed, autodetecting, on $filename[$i]"; } - } - $output = `$binpath/seqstat --informat $format[$i] $filename[$i]`; - if ($? != 0) { die "seqstat failed, using --informat, on $filename[$i]"; } - ($ns, $nr, $fr, $to) = &parse_seqstat($output); - if ($ns != $nseq[$i] || - $nr != $nres[$i] || - $fr != $shortest[$i] || - $to != $longest[$i]) - { die "seqstat regression failed, using --informat, on $filename[$i]"; } -} - -# Test 2. -# Reformatting tests. -# -for ($i = 0; $i < $nfiles; $i++) { - for ($j = 0; $j < $nfiles; $j++) { - if (! $is_alignment[$i] && $is_alignment[$j]) { next; } # can't convert unaligned to aligned - if (! $is_singleseq[$i] && $is_singleseq[$j]) { next; } # can't convert multiple seqs to single seq format - - `$binpath/sreformat --informat $format[$i] $format[$j] $filename[$i] > formattest.tmp`; - if ($? != 0) { die "sreformat failed ($format[$i] to $format[$j]) on $filename[$i]"; } - $output = `$binpath/seqstat --informat $format[$j] formattest.tmp`; - if ($? != 0) { die "seqstat failed after sreformat ($format[$i] to $format[$j]) on $filename[$i]"; } - ($ns, $nr, $fr, $to) = &parse_seqstat($output); - if ($ns != $nseq[$i] || - $nr != $nres[$i] || - $fr != $shortest[$i] || - $to != $longest[$i]) - { die "seqstat regression failed after sreformat ($format[$i] to $format[$j]) on $filename[$i]"; } - } -} - -print "passed.\n"; -unlink "formattest.tmp"; - - -# Function: parse_seqstat(file) -# -# Returns the number of sequences in the file, -# and their maximum and minimum length, and their avg. len. -# Dies if 'seqstat' fails. -# -sub parse_seqstat { - local($output) = shift; - my ($nseq, $nres, $fromlen, $tolen); - - if ($output =~ /Number of sequences:\s+(\d+)/) {$nseq = $1; } - if ($output =~ /Total # residues:\s+(\d+)/) {$nres = $1; } - if ($output =~ /Smallest:\s+(\d+)/) {$fromlen = $1; } - if ($output =~ /Largest:\s+(\d+)/) {$tolen = $1; } - ($nseq, $nres, $fromlen, $tolen); -} - - diff --git a/forester/archive/RIO/others/hmmer/squid/Formats/gcg b/forester/archive/RIO/others/hmmer/squid/Formats/gcg deleted file mode 100644 index 0affa1f..0000000 --- a/forester/archive/RIO/others/hmmer/squid/Formats/gcg +++ /dev/null @@ -1,397 +0,0 @@ - Free format documentation can precede a GCG file. - In principle it's possible for this documentation to confuse - Babelfish autodetection, if it looks like the header of - a different sequence file format. - -HIVHXB2CG Length: 9718 March 10, 1993 14:11 Type: N Check: 1730 .. - - 1 TGGAAGGGCT AATTCACTCC CAACGAAGAC AAGATATCCT TGATCTGTGG - - 51 ATCTACCACA CACAAGGCTA CTTCCCTGAT TAGCAGAACT ACACACCAGG - - 101 GCCAGGGATC AGATATCCAC TGACCTTTGG ATGGTGCTAC AAGCTAGTAC - - 151 CAGTTGAGCC AGAGAAGTTA GAAGAAGCCA ACAAAGGAGA GAACACCAGC - - 201 TTGTTACACC CTGTGAGCCT GCATGGAATG GATGACCCGG AGAGAGAAGT - - 251 GTTAGAGTGG AGGTTTGACA GCCGCCTAGC ATTTCATCAC ATGGCCCGAG - - 301 AGCTGCATCC GGAGTACTTC AAGAACTGCT GACATCGAGC TTGCTACAAG - - 351 GGACTTTCCG CTGGGGACTT TCCAGGGAGG CGTGGCCTGG GCGGGACTGG - - 401 GGAGTGGCGA GCCCTCAGAT CCTGCATATA AGCAGCTGCT TTTTGCCTGT - - 451 ACTGGGTCTC TCTGGTTAGA CCAGATCTGA GCCTGGGAGC TCTCTGGCTA - - 501 ACTAGGGAAC CCACTGCTTA AGCCTCAATA AAGCTTGCCT TGAGTGCTTC - - 551 AAGTAGTGTG TGCCCGTCTG TTGTGTGACT CTGGTAACTA GAGATCCCTC - - 601 AGACCCTTTT AGTCAGTGTG GAAAATCTCT AGCAGTGGCG CCCGAACAGG - - 651 GACCTGAAAG CGAAAGGGAA ACCAGAGCTC TCTCGACGCA GGACTCGGCT - - 701 TGCTGAAGCG CCCGCACGGC AAGAGGCGAG GGGCGGCGAC TGGTGAGTAC - - 751 GCCAAAAATT TTGACTAGCG GAGGCTAGAA GGAGAGAGAT GGGTGCGAGA - - 801 GCGTCAGTAT TAAGCGGGGG AGAATTAGAT CGATGGGAAA AAATTCGGTT - - 851 AAGGCCAGGG GGAAAGAAAA AATATAAATT AAAACATATA GTATGGGCAA - - 901 GCAGGGAGCT AGAACGATTC GCAGTTAATC CTGGCCTGTT AGAAACATCA - - 951 GAAGGCTGTA GACAAATACT GGGACAGCTA CAACCATCCC TTCAGACAGG - - 1001 ATCAGAAGAA CTTAGATCAT TATATAATAC AGTAGCAACC CTCTATTGTG - - 1051 TGCATCAAAG GATAGAGATA AAAGACACCA AGGAAGCTTT AGACAAGATA - - 1101 GAGGAAGAGC AAAACAAAAG TAAGAAAAAA GCACAGCAAG CAGCAGCTGA - - 1151 CACAGGACAC AGCAATCAGG TCAGCCAAAA TTACCCTATA GTGCAGAACA - - 1201 TCCAGGGGCA AATGGTACAT CAGGCCATAT CACCTAGAAC TTTAAATGCA - - 1251 TGGGTAAAAG TAGTAGAAGA GAAGGCTTTC AGCCCAGAAG TGATACCCAT - - 1301 GTTTTCAGCA TTATCAGAAG GAGCCACCCC ACAAGATTTA AACACCATGC - - 1351 TAAACACAGT GGGGGGACAT CAAGCAGCCA TGCAAATGTT AAAAGAGACC - - 1401 ATCAATGAGG AAGCTGCAGA ATGGGATAGA GTGCATCCAG TGCATGCAGG - - 1451 GCCTATTGCA CCAGGCCAGA TGAGAGAACC AAGGGGAAGT GACATAGCAG - - 1501 GAACTACTAG TACCCTTCAG GAACAAATAG GATGGATGAC AAATAATCCA - - 1551 CCTATCCCAG TAGGAGAAAT TTATAAAAGA TGGATAATCC TGGGATTAAA - - 1601 TAAAATAGTA AGAATGTATA GCCCTACCAG CATTCTGGAC ATAAGACAAG - - 1651 GACCAAAGGA ACCCTTTAGA GACTATGTAG ACCGGTTCTA TAAAACTCTA - - 1701 AGAGCCGAGC AAGCTTCACA GGAGGTAAAA AATTGGATGA CAGAAACCTT - - 1751 GTTGGTCCAA AATGCGAACC CAGATTGTAA GACTATTTTA AAAGCATTGG - - 1801 GACCAGCGGC TACACTAGAA GAAATGATGA CAGCATGTCA GGGAGTAGGA - - 1851 GGACCCGGCC ATAAGGCAAG AGTTTTGGCT GAAGCAATGA GCCAAGTAAC - - 1901 AAATTCAGCT ACCATAATGA TGCAGAGAGG CAATTTTAGG AACCAAAGAA - - 1951 AGATTGTTAA GTGTTTCAAT TGTGGCAAAG AAGGGCACAC AGCCAGAAAT - - 2001 TGCAGGGCCC CTAGGAAAAA GGGCTGTTGG AAATGTGGAA AGGAAGGACA - - 2051 CCAAATGAAA GATTGTACTG AGAGACAGGC TAATTTTTTA GGGAAGATCT - - 2101 GGCCTTCCTA CAAGGGAAGG CCAGGGAATT TTCTTCAGAG CAGACCAGAG - - 2151 CCAACAGCCC CACCAGAAGA GAGCTTCAGG TCTGGGGTAG AGACAACAAC - - 2201 TCCCCCTCAG AAGCAGGAGC CGATAGACAA GGAACTGTAT CCTTTAACTT - - 2251 CCCTCAGGTC ACTCTTTGGC AACGACCCCT CGTCACAATA AAGATAGGGG - - 2301 GGCAACTAAA GGAAGCTCTA TTAGATACAG GAGCAGATGA TACAGTATTA - - 2351 GAAGAAATGA GTTTGCCAGG AAGATGGAAA CCAAAAATGA TAGGGGGAAT - - 2401 TGGAGGTTTT ATCAAAGTAA GACAGTATGA TCAGATACTC ATAGAAATCT - - 2451 GTGGACATAA AGCTATAGGT ACAGTATTAG TAGGACCTAC ACCTGTCAAC - - 2501 ATAATTGGAA GAAATCTGTT GACTCAGATT GGTTGCACTT TAAATTTTCC - - 2551 CATTAGCCCT ATTGAGACTG TACCAGTAAA ATTAAAGCCA GGAATGGATG - - 2601 GCCCAAAAGT TAAACAATGG CCATTGACAG AAGAAAAAAT AAAAGCATTA - - 2651 GTAGAAATTT GTACAGAGAT GGAAAAGGAA GGGAAAATTT CAAAAATTGG - - 2701 GCCTGAAAAT CCATACAATA CTCCAGTATT TGCCATAAAG AAAAAAGACA - - 2751 GTACTAAATG GAGAAAATTA GTAGATTTCA GAGAACTTAA TAAGAGAACT - - 2801 CAAGACTTCT GGGAAGTTCA ATTAGGAATA CCACATCCCG CAGGGTTAAA - - 2851 AAAGAAAAAA TCAGTAACAG TACTGGATGT GGGTGATGCA TATTTTTCAG - - 2901 TTCCCTTAGA TGAAGACTTC AGGAAGTATA CTGCATTTAC CATACCTAGT - - 2951 ATAAACAATG AGACACCAGG GATTAGATAT CAGTACAATG TGCTTCCACA - - 3001 GGGATGGAAA GGATCACCAG CAATATTCCA AAGTAGCATG ACAAAAATCT - - 3051 TAGAGCCTTT TAGAAAACAA AATCCAGACA TAGTTATCTA TCAATACATG - - 3101 GATGATTTGT ATGTAGGATC TGACTTAGAA ATAGGGCAGC ATAGAACAAA - - 3151 AATAGAGGAG CTGAGACAAC ATCTGTTGAG GTGGGGACTT ACCACACCAG - - 3201 ACAAAAAACA TCAGAAAGAA CCTCCATTCC TTTGGATGGG TTATGAACTC - - 3251 CATCCTGATA AATGGACAGT ACAGCCTATA GTGCTGCCAG AAAAAGACAG - - 3301 CTGGACTGTC AATGACATAC AGAAGTTAGT GGGGAAATTG AATTGGGCAA - - 3351 GTCAGATTTA CCCAGGGATT AAAGTAAGGC AATTATGTAA ACTCCTTAGA - - 3401 GGAACCAAAG CACTAACAGA AGTAATACCA CTAACAGAAG AAGCAGAGCT - - 3451 AGAACTGGCA GAAAACAGAG AGATTCTAAA AGAACCAGTA CATGGAGTGT - - 3501 ATTATGACCC ATCAAAAGAC TTAATAGCAG AAATACAGAA GCAGGGGCAA - - 3551 GGCCAATGGA CATATCAAAT TTATCAAGAG CCATTTAAAA ATCTGAAAAC - - 3601 AGGAAAATAT GCAAGAATGA GGGGTGCCCA CACTAATGAT GTAAAACAAT - - 3651 TAACAGAGGC AGTGCAAAAA ATAACCACAG AAAGCATAGT AATATGGGGA - - 3701 AAGACTCCTA AATTTAAACT GCCCATACAA AAGGAAACAT GGGAAACATG - - 3751 GTGGACAGAG TATTGGCAAG CCACCTGGAT TCCTGAGTGG GAGTTTGTTA - - 3801 ATACCCCTCC CTTAGTGAAA TTATGGTACC AGTTAGAGAA AGAACCCATA - - 3851 GTAGGAGCAG AAACCTTCTA TGTAGATGGG GCAGCTAACA GGGAGACTAA - - 3901 ATTAGGAAAA GCAGGATATG TTACTAATAG AGGAAGACAA AAAGTTGTCA - - 3951 CCCTAACTGA CACAACAAAT CAGAAGACTG AGTTACAAGC AATTTATCTA - - 4001 GCTTTGCAGG ATTCGGGATT AGAAGTAAAC ATAGTAACAG ACTCACAATA - - 4051 TGCATTAGGA ATCATTCAAG CACAACCAGA TCAAAGTGAA TCAGAGTTAG - - 4101 TCAATCAAAT AATAGAGCAG TTAATAAAAA AGGAAAAGGT CTATCTGGCA - - 4151 TGGGTACCAG CACACAAAGG AATTGGAGGA AATGAACAAG TAGATAAATT - - 4201 AGTCAGTGCT GGAATCAGGA AAGTACTATT TTTAGATGGA ATAGATAAGG - - 4251 CCCAAGATGA ACATGAGAAA TATCACAGTA ATTGGAGAGC AATGGCTAGT - - 4301 GATTTTAACC TGCCACCTGT AGTAGCAAAA GAAATAGTAG CCAGCTGTGA - - 4351 TAAATGTCAG CTAAAAGGAG AAGCCATGCA TGGACAAGTA GACTGTAGTC - - 4401 CAGGAATATG GCAACTAGAT TGTACACATT TAGAAGGAAA AGTTATCCTG - - 4451 GTAGCAGTTC ATGTAGCCAG TGGATATATA GAAGCAGAAG TTATTCCAGC - - 4501 AGAAACAGGG CAGGAAACAG CATATTTTCT TTTAAAATTA GCAGGAAGAT - - 4551 GGCCAGTAAA AACAATACAT ACTGACAATG GCAGCAATTT CACCGGTGCT - - 4601 ACGGTTAGGG CCGCCTGTTG GTGGGCGGGA ATCAAGCAGG AATTTGGAAT - - 4651 TCCCTACAAT CCCCAAAGTC AAGGAGTAGT AGAATCTATG AATAAAGAAT - - 4701 TAAAGAAAAT TATAGGACAG GTAAGAGATC AGGCTGAACA TCTTAAGACA - - 4751 GCAGTACAAA TGGCAGTATT CATCCACAAT TTTAAAAGAA AAGGGGGGAT - - 4801 TGGGGGGTAC AGTGCAGGGG AAAGAATAGT AGACATAATA GCAACAGACA - - 4851 TACAAACTAA AGAATTACAA AAACAAATTA CAAAAATTCA AAATTTTCGG - - 4901 GTTTATTACA GGGACAGCAG AAATTCACTT TGGAAAGGAC CAGCAAAGCT - - 4951 CCTCTGGAAA GGTGAAGGGG CAGTAGTAAT ACAAGATAAT AGTGACATAA - - 5001 AAGTAGTGCC AAGAAGAAAA GCAAAGATCA TTAGGGATTA TGGAAAACAG - - 5051 ATGGCAGGTG ATGATTGTGT GGCAAGTAGA CAGGATGAGG ATTAGAACAT - - 5101 GGAAAAGTTT AGTAAAACAC CATATGTATG TTTCAGGGAA AGCTAGGGGA - - 5151 TGGTTTTATA GACATCACTA TGAAAGCCCT CATCCAAGAA TAAGTTCAGA - - 5201 AGTACACATC CCACTAGGGG ATGCTAGATT GGTAATAACA ACATATTGGG - - 5251 GTCTGCATAC AGGAGAAAGA GACTGGCATT TGGGTCAGGG AGTCTCCATA - - 5301 GAATGGAGGA AAAAGAGATA TAGCACACAA GTAGACCCTG AACTAGCAGA - - 5351 CCAACTAATT CATCTGTATT ACTTTGACTG TTTTTCAGAC TCTGCTATAA - - 5401 GAAAGGCCTT ATTAGGACAC ATAGTTAGCC CTAGGTGTGA ATATCAAGCA - - 5451 GGACATAACA AGGTAGGATC TCTACAATAC TTGGCACTAG CAGCATTAAT - - 5501 AACACCAAAA AAGATAAAGC CACCTTTGCC TAGTGTTACG AAACTGACAG - - 5551 AGGATAGATG GAACAAGCCC CAGAAGACCA AGGGCCACAG AGGGAGCCAC - - 5601 ACAATGAATG GACACTAGAG CTTTTAGAGG AGCTTAAGAA TGAAGCTGTT - - 5651 AGACATTTTC CTAGGATTTG GCTCCATGGC TTAGGGCAAC ATATCTATGA - - 5701 AACTTATGGG GATACTTGGG CAGGAGTGGA AGCCATAATA AGAATTCTGC - - 5751 AACAACTGCT GTTTATCCAT TTTCAGAATT GGGTGTCGAC ATAGCAGAAT - - 5801 AGGCGTTACT CGACAGAGGA GAGCAAGAAA TGGAGCCAGT AGATCCTAGA - - 5851 CTAGAGCCCT GGAAGCATCC AGGAAGTCAG CCTAAAACTG CTTGTACCAA - - 5901 TTGCTATTGT AAAAAGTGTT GCTTTCATTG CCAAGTTTGT TTCATAACAA - - 5951 AAGCCTTAGG CATCTCCTAT GGCAGGAAGA AGCGGAGACA GCGACGAAGA - - 6001 GCTCATCAGA ACAGTCAGAC TCATCAAGCT TCTCTATCAA AGCAGTAAGT - - 6051 AGTACATGTA ACGCAACCTA TACCAATAGT AGCAATAGTA GCATTAGTAG - - 6101 TAGCAATAAT AATAGCAATA GTTGTGTGGT CCATAGTAAT CATAGAATAT - - 6151 AGGAAAATAT TAAGACAAAG AAAAATAGAC AGGTTAATTG ATAGACTAAT - - 6201 AGAAAGAGCA GAAGACAGTG GCAATGAGAG TGAAGGAGAA ATATCAGCAC - - 6251 TTGTGGAGAT GGGGGTGGAG ATGGGGCACC ATGCTCCTTG GGATGTTGAT - - 6301 GATCTGTAGT GCTACAGAAA AATTGTGGGT CACAGTCTAT TATGGGGTAC - - 6351 CTGTGTGGAA GGAAGCAACC ACCACTCTAT TTTGTGCATC AGATGCTAAA - - 6401 GCATATGATA CAGAGGTACA TAATGTTTGG GCCACACATG CCTGTGTACC - - 6451 CACAGACCCC AACCCACAAG AAGTAGTATT GGTAAATGTG ACAGAAAATT - - 6501 TTGACATGTG GAAAAATGAC ATGGTAGAAC AGATGCATGA GGATATAATC - - 6551 AGTTTATGGG ATCAAAGCCT AAAGCCATGT GTAAAATTAA CCCCACTCTG - - 6601 TGTTAGTTTA AAGTGCACTG ATTTGAAGAA TGATACTAAT ACCAATAGTA - - 6651 GTAGCGGGAG AATGATAATG GAGAAAGGAG AGATAAAAAA CTGCTCTTTC - - 6701 AATATCAGCA CAAGCATAAG AGGTAAGGTG CAGAAAGAAT ATGCATTTTT - - 6751 TTATAAACTT GATATAATAC CAATAGATAA TGATACTACC AGCTATAGCT - - 6801 TGACAAGTTG TAACACCTCA GTCATTACAC AGGCCTGTCC AAAGGTATCC - - 6851 TTTGAGCCAA TTCCCATACA TTATTGTGCC CCGGCTGGTT TTGCGATTCT - - 6901 AAAATGTAAT AATAAGACGT TCAATGGAAC AGGACCATGT ACAAATGTCA - - 6951 GCACAGTACA ATGTACACAT GGAATTAGGC CAGTAGTATC AACTCAACTG - - 7001 CTGTTAAATG GCAGTCTAGC AGAAGAAGAG GTAGTAATTA GATCTGTCAA - - 7051 TTTCACGGAC AATGCTAAAA CCATAATAGT ACAGCTGAAC ACATCTGTAG - - 7101 AAATTAATTG TACAAGACCC AACAACAATA CAAGAAAAAG AATCCGTATC - - 7151 CAGAGAGGAC CAGGGAGAGC ATTTGTTACA ATAGGAAAAA TAGGAAATAT - - 7201 GAGACAAGCA CATTGTAACA TTAGTAGAGC AAAATGGAAT AACACTTTAA - - 7251 AACAGATAGA TAGCAAATTA AGAGAACAAT TCGGAAATAA TAAAACAATA - - 7301 ATCTTTAAGC AATCCTCAGG AGGGGACCCA GAAATTGTAA CGCACAGTTT - - 7351 TAATTGTGGA GGGGAATTTT TCTACTGTAA TTCAACACAA CTGTTTAATA - - 7401 GTACTTGGTT TAATAGTACT TGGAGTACTG AAGGGTCAAA TAACACTGAA - - 7451 GGAAGTGACA CAATCACCCT CCCATGCAGA ATAAAACAAA TTATAAACAT - - 7501 GTGGCAGAAA GTAGGAAAAG CAATGTATGC CCCTCCCATC AGTGGACAAA - - 7551 TTAGATGTTC ATCAAATATT ACAGGGCTGC TATTAACAAG AGATGGTGGT - - 7601 AATAGCAACA ATGAGTCCGA GATCTTCAGA CTTGGAGGAG GAGATATGAG - - 7651 GGACAATTGG AGAAGTGAAT TATATAAATA TAAAGTAGTA AAAATTGAAC - - 7701 CATTAGGAGT AGCACCCACC AAGGCAAAGA GAAGAGTGGT GCAGAGAGAA - - 7751 AAAAGAGCAG TGGGAATAGG AGCTTTGTTC CTTGGGTTCT TGGGAGCAGC - - 7801 AGGAAGCACT ATGGGCGCAG CCTCAATGAC GCTGACGGTA CAGGCCAGAC - - 7851 AATTATTGTC TGGTATAGTG CAGCAGCAGA ACAATTTGCT GAGGGCTATT - - 7901 GAGGCGCAAC AGCATCTGTT GCAACTCACA GTCTGGGGCA TCAAGCAGCT - - 7951 CCAAGCAAGA ATCCTAGCTG TGGAAAGATA CCTAAAGGAT CAACAGCTCC - - 8001 TAGGGATTTG GGGTTGCTCT GGAAAACTCA TTTGCACCAC TGCTGTGCCT - - 8051 TGGAATGCTA GTTGGAGTAA TAAATCTCTG GAACAGATCT GGAATCACAC - - 8101 GACCTGGATG GAGTGGGACA GAGAAATTAA CAATTACACA AGCTTAATAC - - 8151 ACTCCTTAAT TGAAGAATCG CAAAACCAGC AAGAAAAGAA TGAACAAGAA - - 8201 TTATTGGAAT TAGATAAATG GGCAAGTTTG TGGAATTGGT TTAACATAAC - - 8251 AAATTGGCTG TGGTATATAA AATTATTCAT AATGATAGTA GGAGGCTTGG - - 8301 TAGGTTTAAG AATAGTTTTT GCTGTACTTT CTATAGTGAA TAGAGTTAGG - - 8351 CAGGGATATT CACCATTATC GTTTCAGACC CACCTCCCAA TCCCGAGGGG - - 8401 ACCCGACAGG CCCGAAGGAA TAGAAGAAGA AGGTGGAGAG AGAGACAGAG - - 8451 ACAGATCCAT TCGATTAGTG AACGGATCCT TGGCACTTAT CTGGGACGAT - - 8501 CTGCGGAGCC TGTGCCTCTT CAGCTACCAC CGCTTGAGAG ACTTACTCTT - - 8551 GATTGTAACG AGGATTGTGG AACTTCTGGG ACGCAGGGGG TGGGAAGCCC - - 8601 TCAAATATTG GTGGAATCTC CTACAGTATT GGAGTCAGGA ACTAAAGAAT - - 8651 AGTGCTGTTA GCTTGCTCAA TGCCACAGCC ATAGCAGTAG CTGAGGGGAC - - 8701 AGATAGGGTT ATAGAAGTAG TACAAGGAGC TTGTAGAGCT ATTCGCCACA - - 8751 TACCTAGAAG AATAAGACAG GGCTTGGAAA GGATTTTGCT ATAAGATGGG - - 8801 TGGCAAGTGG TCAAAAAGTA GTGTGATTGG ATGGCTTACT GTAAGGGAAA - - 8851 GAATGAGACG AGCTGAGCCA GCAGCAGATG GGGTGGGAGC AGCATCTCGA - - 8901 GACCTGGAAA AACATGGAGC AATCACAAGT AGCAACACAG CAGCTACCAA - - 8951 TGCTGCTTGT GCCTGGCTAG AAGCACAAGA GGAGGAGGAG GTGGGTTTTC - - 9001 CAGTCACACC TCAGGTACCT TTAAGACCAA TGACTTACAA GGCAGCTGTA - - 9051 GATCTTAGCC ACTTTTTAAA AGAAAAGGGG GGACTGGAAG GGCTAATTCA - - 9101 CTCCCAAAGA AGACAAGATA TCCTTGATCT GTGGATCTAC CACACACAAG - - 9151 GCTACTTCCC TGATTGACAG AACTACACAC CAGGGCCAGG GGTCAGATAT - - 9201 CCACTGACCT TTGGATGGTG CTACAAGCTA GTACCAGTTG AGCCAGATAA - - 9251 GATAGAAGAG GCCAATAAAG GAGAGAACAC CAGCTTGTTA CACCCTGTGA - - 9301 GCCTGCATGG GATGGATGAC CCGGAGAGAG AAGTGTTAGA GTGGAGGTTT - - 9351 GACAGCCGCC TAGCATTTCA TCACGTGGCC CGAGAGCTGC ATCCGGAGTA - - 9401 CTTCAAGAAC TGCTGACATC GAGCTTGCTA CAAGGGACTT TCCGCTGGGG - - 9451 ACTTTCCAGG GAGGCGTGGC CTGGGCGGGA CTGGGGAGTG GCGAGCCCTC - - 9501 AGATCCTGCA TATAAGCAGC TGCTTTTTGC CTGTACTGGG TCTCTCTGGT - - 9551 TAGACCAGAT CTGAGCCTGG GAGCTCTCTG GCTAACTAGG GAACCCACTG - - 9601 CTTAAGCCTC AATAAAGCTT GCCTTGAGTG CTTCAAGTAG TGTGTGCCCG - - 9651 TCTGTTGTGT GACTCTGGTA ACTAGAGATC CCTCAGACCC TTTTAGTCAG - - 9701 TGTGGAAAAT CTCTAGCA - diff --git a/forester/archive/RIO/others/hmmer/squid/Formats/gcgdata.1 b/forester/archive/RIO/others/hmmer/squid/Formats/gcgdata.1 deleted file mode 100644 index 93c01ff..0000000 --- a/forester/archive/RIO/others/hmmer/squid/Formats/gcgdata.1 +++ /dev/null @@ -1,60 +0,0 @@ ->>>>104K_THEPA 8/92 ASCII Len: 924 -P15711 theileria parva. 104 kd microneme-rhoptry antigen. 8/92 -MKFLILLFNILCLFPVLAADNHGVGPQGASGVDPITFDINSNQTGPAFLTAVEMAGVKYLQVQHGSNVNIHRLVEGNVVIWENASTPLYTGAIVTNNDGPYMAYVEVLGDPNLQFFIKSGDAWVTLSEHEYLAKLQEIRQAVHIESVFSLNMAFQLENNKYEVETHAKNGANMVTFIPRNGHICKMVYHKNVRIYKATGNDTVTSVVGFFRGLRLLLINVFSIDDNGMMSNRYFQHVDDKYVPISQKNYETGIVKLKDYKHAYHPVDLDIKDIDYTMFHLADATYHEPCFKIIPNTGFCITKLFDGDQVLYESFNPLIHCINEVHIYDRNNGSIICLHLNYSPPSYKAYLVLKDTGWEATTHPLLEEKIEELQDQRACELDVNFISDKDLYVAALTNADLNYTMVTPRPHRDVIRVSDGSEVLWYYEGLDNFLVCAWIYVSDGVASLVHLRIKDRIPANNDIYVLKGDLYWTRITKIQFTQEIKRLVKKSKKKLAPITEEDSDKHDEPPEGPGASGLPPKAPGDKEGSEGHKGPSKGSDSSKEGKKPGSGKKPGPAREHKPSKIPTLSKKPSGPKDPKHPRDPKEPRKSKSPRTASPTRRPSPKLPQLSKLPKSTSPRSPPPPTRPSSPERPEGTKIIKTSKPPSPKPPFDPSFKEKFYDDYSKAASRSKETKTTVVLDESFESILKETLPETPGTPFTTPRPVPPKRPRTPESPFEPPKDPDSPSTSPSEFFTPPESKRTRFHETPADTPLPDVTAELFKEPDVTAETKSPDEAMKRPRSPSEYEDTSPGDYPSLPMKRHRLERLRLTTTEMETDPGRMAKDASGKPVKLKRSKSFDDLTTVELAPEPKASRIVVDDEGTEADDEETHPPEERQKTEVRRRRPPKKPSKSPRPSKPKKPKKPDSAYIPSILAILVVSLIVGIL ->>>>10KD_VIGUN 2/95 ASCII Len: 75 -P18646 vigna unguiculata (cowpea). 10 kd protein precursor (clone psas10). 2/95 -MEKKSIAGLCFLFLVLFVAQEVVVQSEAKTCENLVDTYRGPCFTTGSCDDHCKNKEHLLSGRCRDDVRCWCTRNC ->>>>110K_PLAKN 2/94 ASCII Len: 296 -P13813 plasmodium knowlesi. 110 kd antigen (pk110) (fragment). 2/94 -FNSNMLRGSVCEEDVSLMTSIDNMIEEIDFYEKEIYKGSHSGGVIKGMDYDLEDDENDEDEMTEQMVEEVADHITQDMIDEVAHHVLDNITHDMAHMEEIVHGLSGDVTQIKEIVQKVNVAVEKVKHIVETEETQKTVEPEQIEETQNTVEPEQTEETQKTVEPEQTEETQNTVEPEQIEETQKTVEPEQTEEAQKTVEPEQTEETQKTVEPEQTEETQKTVEPEQTEETQKTVEPEQTEETQKTVEPEQTEETQKTVEPEQTEETQKTVEPEQTEETQNTVEPEPTQETQNTVEP ->>>>11S3_HELAN 2/94 ASCII Len: 493 -P19084 helianthus annuus (common sunflower). 11s globulin seed storage protein g3 precursor (helianthinin g3). 2/94 -MASKATLLLAFTLLFATCIARHQQRQQQQNQCQLQNIEALEPIEVIQAEAGVTEIWDAYDQQFQCAWSILFDTGFNLVAFSCLPTSTPLFWPSSREGVILPGCRRTYEYSQEQQFSGEGGRRGGGEGTFRTVIRKLENLKEGDVVAIPTGTAHWLHNDGNTELVVVFLDTQNHENQLDENQRRFFLAGNPQAQAQSQQQQQRQPRQQSPQRQRQRQRQGQGQNAGNIFNGFTPELIAQSFNVDQETAQKLQGQNDQRGHIVNVGQDLQIVRPPQDRRSPRQQQEQATSPRQQQEQQQGRRGGWSNGVEETICSMKFKVNIDNPSQADFVNPQAGSIANLNSFKFPILEHLRLSVERGELRPNAIQSPHWTINAHNLLYVTEGALRVQIVDNQGNSVFDNELREGQVVVIPQNFAVIKRANEQGSRWVSFKTNDNAMIANLAGRVSASAASPLTLWANRYQLSREEAQQLKFSQRETVLFAPSFSRGQGIRASR ->>>>11SB_CUCMA 11/90 ASCII Len: 480 -P13744 cucurbita maxima (pumpkin) (winter squash). 11s globulin beta subunit precursor. 11/90 -MARSSLFTFLCLAVFINGCLSQIEQQSPWEFQGSEVWQQHRYQSPRACRLENLRAQDPVRRAEAEAIFTEVWDQDNDEFQCAGVNMIRHTIRPKGLLLPGFSNAPKLIFVAQGFGIRGIAIPGCAETYQTDLRRSQSAGSAFKDQHQKIRPFREGDLLVVPAGVSHWMYNRGQSDLVLIVFADTRNVANQIDPYLRKFYLAGRPEQVERGVEEWERSSRKGSSGEKSGNIFSGFADEFLEEAFQIDGGLVRKLKGEDDERDRIVQVDEDFEVLLPEKDEEERSRGRYIESESESENGLEETICTLRLKQNIGRSVRADVFNPRGGRISTANYHTLPILRQVRLSAERGVLYSNAMVAPHYTVNSHSVMYATRGNARVQVVDNFGQSVFDGEVREGQVLMIPQNFVVIKRASDRGFEWIAFKTNDNAITNLLAGRVSQMRMLPLGVLSNMYRISREEAQRLKYGQQEMRVLSPGRSQGRRE ->>>>120K_RICRI 10/94 ASCII Len: 1299 -P14914 rickettsia rickettsii. 120 kd surface-exposed protein. 10/94 -MVIQSANATGQVNFRHIVDVGADGTTAFKTAASKVTITQDSNFGNTDFGNLAAQIKVPNAITLTGNFTGDASNPGNTAGVITFDANGTLESASADANVAVTNNITAIEASGAGVVQLSGTHAAELRLGNAGSIFKLADGTVINGKVNQTALVGGALAAGTITLDGSATITGDIGNAGGAAALQRITLANDAKKTLTLGGANIIGAGGGTIDLQANGGTIKLTSTQNNIVVDFDLAIATDQTGVVDASSLTNAQTLTINGKIGTIGANNKTLGQFNIGSSKTVLSNGNVAINELVIGNDGAVQFAHDTYLITRTTNAAGQGKIIFNPVVNNGTTLAAGTNLGSATNPLAEINFGSKGVNVDTVLNVGEGVNLYATNITTTDANVGSFVFNAGGTNIVSGTVGGQQGNKFNTVALENGTTVKFLGNATFNGNTTIAANSTLQIGGNYTADCVASADGTGIVEFVNTGPITVTLNKEAAPVNALKQITVSGPGNVVINEIGNAGNHHGAVTDTIAFENSSLGAVVFLPRGIPFNDAGNTMPLTIKSTVGNKTAKGFDVPSVVVLGVDSVIADGQVIVDQNNIVGLGLGSDNGIIVNATTLYAGISTLNNNQGTVTLSGGVPNTPGTVYGLGTGIGASKFKQVTFTTDYNNLGNIIATNATINDGVTVTTGGIAGIGFDGKITLGSVNGNGNVRFADGILSNSTSMIGTTKANNGTVTYLGNAFVGNIGDSDTPVASVRFTGSDSGAGLQGNIYSQVIDFGTYNLGIVNSNIILGGGTTAINGKIDLVTNTLTFASGTSTWGNNTSIETTLTLANGNIGHIVILEGAQVNTTTTGTTTIKVQDNANANFSGTQTYTLIQGGARFNGTLGSPNFAVTGSNRFVNYSLIRAANQDYVITRTNNAENVVTNDIANSPFGGAPGVDQNVTTFVNATNTAAYNNLLLAKNSANSANFVGAIVTDTSAAITNVQLDLAKDIQAQLGNRLGALRYLGTPETAEMADLKLEHIGSVAAGDEAIDNVAYGIWAKPFYTDAHQSKKGGLAGYKAKTTGVVIGLDTLANDNLMIGAAIGITKTDIKHQDYKKGDKTDVNGFSFSLYGAQQLVKNFFAQGSAIFSLNQVKNKSQRYFFDANGNMSKQIAAGHYDNMTFGGNLTVGYDYNAMQGVLVTPMAGLSYLKSSDENYKETGTTVANKQVNSKFSDRTDLIVGAKVAGSTMNRTDLAVYPEVHAFVVHKVTGRLSKTQSVLDGQVTPCINQPDRTTKTSYNLGLSASIRSDAKMEYGIGYDAQISSKYTAHQGTLKVRVNF ->>>>128U_DROME 2/94 ASCII Len: 368 -P32234 drosophila melanogaster (fruit fly). gtp-binding protein 128up. 2/94 -MITILEKISAIESEMARTQKNKATSAHLGLLKANVAKLRRELISPKGGGGGTGEAGFEVAKTGDARVGFVGFPSVGKSTLLSNLAGVYSEVAAYEFTTLTTVPGCIKYKGAKIQLLDLPGIIEGAKDGKGRGRQVIAVARTCNLIFMVLDCLKPLGHKKLLEHELEGFGIRLNKKPPNIYYKRKDKGGINLNSMVPQSELDTDLVKTILSEYKIHNADITLRYDATSDDLIDVIEGNRIYIPCIYLLNKIDQISIEELDVIYKIPHCVPISAHHHWNFDDLLELMWEYLRLQRIYTKPKGQLPDYNSPVVLHNERTSIEDFCNKLHRSIAKEFKYALVWGSSVKHQPQKVGIEHVLNDEDVVQIVKKV ->>>>12AH_CLOS4 8/91 ASCII Len: 29 -P21215 clostridium sp. (strain c 48-50). 12-alpha-hydroxysteroid dehydrogenase (ec 1.1.1.176) (fragment). 8/91 -MIFDGKVAIITGGGKAKSIGYGIAVAYAK ->>>>12KD_MYCLE 2/95 ASCII Len: 156 -P15878 mycobacterium leprae. 12 kd protein. 2/95 -MNDIIALKFHISLNATTWIGRIGMVILPLLVYFITYRWCIGLQRSDRAVLEHGIETGIIKRLPHGAYIELHQPLGPVDDHGHPIPLEYQGTAVPKRMNKLGSAGSPSSGSFLFADPVSEDAALREATHVAEQRALTALREHQDSIASSPNGERGKH ->>>>12S1_ARATH 4/90 ASCII Len: 472 -P15455 arabidopsis thaliana (mouse-ear cress). 12s seed storage protein. 4/90 -MARVSSLLSFCLTLLILFHGYAAQQGQQGQQFPNECQLDQLNALEPSHVLKSEAGRIEVWDHHAPQLRCSGVSFARYIIESKGLYLPSFFNTAKLSFVAKGRGLMGKVIPGCAETFQDSSEFQPRFEGQGQSQRFRDMHQKVEHIRSGDTIATTPGVAQWFYNDGQQPLVIVSVFDLASHQNQLDRNPRPFYLAGNNPQGQVWLQGREQQPQKNIFNGFGPEVIAQALKIDLQTAQQLQNQDDNRGNIVRVQGPFGVIRPPLRGQRPQEEEEEEGRHGRHGNGLEETICSARCTDNLDDPSRADVYKPQLGYISTLNSYDLPILRFIRLSALRGSIRQNAMVLPQWNANANAILYETDGEAQIQIVNDNGNRVFDGQVSQGQLIAVPQGFSVVKRATSNRFQWVEFKTNANAQINTLAGRTSVLRGLPLEVITNGFQISPEEARRVKFNTLETTLTHSSGPASYGRPRVAAA ->>>>12S2_ARATH 4/90 ASCII Len: 455 -P15456 arabidopsis thaliana (mouse-ear cress). 12s seed storage protein. 4/90 -MGRVSSIISFSLTLLILFNGYTAQQWPNECQLDQLNALEPSQIIKSEGGRIEVWDHHAPQLRCSGFAFERFVIEPQGLFLPTFLNAGKLTFVVHGRGLMGRVIPGCAETFMESPVFGEGQGQGQSQGFRDMHQKVEHLRCGDTIATPSGVAQWFYNNGNEPLILVAAADLASNQNQLDRNLRPFLIAGNNPQGQEWLQGRKQQKQNNIFNGFAPEILAQAFKINVETAQQLQNQQDNRGNIVKVNGPFGVIRPPLRRGEGGQQPHEIANGLEETLCTMRCTENLDDPSDADVYKPSLGYISTLNSYNLPILRLLRLSALRGSIRKNAMVLPQWNVNANAALYVTNGKAHIQMVNDNGERVFDQEISSGQLLVVPQGFSVMKHRIGEQFEWIEFKTNENAQVNTLAGRTSVMRGLPLEVITNGYQISPEEAKRVKFSTIETTLTHSSPMSYGRPRA ->>>>1433_DROME 12/92 ASCII Len: 248 -P29310 drosophila melanogaster (fruit fly). 14-3-3-like protein. 12/92 -MSTVDKEELVQKAKLAEQSERYDDMAQAMKSVTETGVELSNEERNLLSVAYKNVVGARRSSWRVISSIEQKTEASARKQQLAREYRERVEKELREICYEVLGLLDKYLIPKASNPESKVFYLKMKGDYYRYLAEVATGDARNTVVDDSQTAYQDAFDISKGKMQPTHPIRLGLALNFSVFYYEILNSPDKACQLAKQAFDDAIAELDTLNEDSYKDSTLIMQLLRDNLTLWTSDTQGDEAEPQEGGDN ->>>>1433_HORVU 2/94 ASCII Len: 262 -P29305 hordeum vulgare (barley). 14-3-3-like protein. 2/94 -MSTAEATREENVYMAKLAEQAERYEEMVEFMEKVAKTADVGELTVEERNLLSVAYKNVIGARRASWRIISSIEQKEESRGNEAYVASIKEYRTRIETELSKICDGILKLLDSHLVPSATAAESKVFYLKMKGDYHRYLAEFKAGAERKEAAENTLVAYKSAQDIALADLPTTHPIRLGLALNFSVFYYEILNSPDRACNLAKQAFDEAIAELDSLGEESYKDSTLIMQLLRDNLTLWTSDNAEEGGDEIKEAASKPEGEGHS ->>>>1433_MAIZE 6/94 ASCII Len: 61 -P29306 zea mays (maize). 14-3-3-like protein (fragment). 6/94 -ILNSPDRACNLAKQAFDEAISELDSLGEESYKDSTLIMQLLXDNLTLWTSDTNEDGGDEIK ->>>>1433_OENHO 12/92 ASCII Len: 260 -P29307 oenothera hookeri (hooker's evening primrose). 14-3-3-like protein. 12/92 -MATAPSPREENVYLAKLAEQAERYEEMVEFMEKVCAAADSEELTVEERNLLSVAYKNVIGARRASWRIISSIEQKEESRGNDDHVSTIRDYRSKIETELSNICGGILKLLDSRLIPSAASGDSKVFYLKMKGDYHRYLAEFKTGAERKEAAESTLSAYKAAQDIANAELAPTHPIRLGLALNFSVFYYEILNSPDRACNLANEAFDEAIAELDTLEEESYKDSTLIMQLLRDNLTLWTSDNQDDGGDEIKEAAPKPDEQY ->>>>1433_ORYSA 6/94 ASCII Len: 260 -Q06967 oryza sativa (rice). 14-3-3-like protein s94. 6/94 -MSPAEASREENVYMAKLAEQAERYEEMVEFMEKVAKTTDVGELTVEERNLLSVAYKNVIGARRASWRIISSIEQKEESRGNEAYVASIKEYRSRIETELSKICDGILKLLDSHLVPSATAAESNVFYLKMKGDYHRYLAEFKSGAERKEAAENTLVAYKSAQDIALADLPTTHPIRLGLALNLSVFYYEILNSPDRACNLAKQAFDDAIAELDTLGEESYKDSTLIMQLLRDNLTLWTSDNAEDGGDEIKEAAKPEGEGH ->>>>1433_SPIOL 12/92 ASCII Len: 220 -P29308 spinacia oleracea (spinach). 14-3-3-like protein (fragment). 12/92 -RNLLSVAYKNVVGARRASWRIISSIEQKEESRGNEDHVSVIRDYRSRIEKELSDNCDGILKLLDTKLVPAASSGDSKVFYLKMKGDYHRYLAEFKTGAQRKEAAESTLTAYKAAQDIANAELAPTHPIRLGLALNFSVFYYEILNSPDRACNLAKQAFVEAIAELDTLGEDSYKDSTLIMQLLRDNLTLWTSDMQDEAADEITEEAAKQQKAVNNNKIAY ->>>>1433_XENLA 12/92 ASCII Len: 235 -P29309 xenopus laevis (african clawed frog). 14-3-3-like protein (fragment). 12/92 -AKLSEQAERYDDMAASMKAVTELGAELSNEERNLLSVAYKNVVGARRSSWRVISSIEQKTEGNDKRQQMAREYREKVETELQDICKDVLDLLDRFLVPNATPPESKVFYLKMKGDYYRYLSEVASGDSKQETVASSQQAYQEAFEISKSEMQPTHPIRLGLALNFSVFYYEILNSPEKACSLAKSAFDEAIRELDTLNEESYKDSTLIMQLLRDNLTLWTSENQGEEADNVEGDN ->>>>1434_ARATH 2/94 ASCII Len: 259 -Q01525 arabidopsis thaliana (mouse-ear cress). 14-3-3-like protein gf14. 2/94 -MASGREELVYMAKLAEQAERYEEMVEFMEKVSAAVDGDELTVEERNLLSVAYKNVIGARRASWRIISSIEQKEESRGNDDHVTAIREYRSKIETELSGICDGILKLLDSRLIPAAASGDSKVFYLKMKGDYHRYLAEFKTGQERKDAAEHTLAAYKSAQDIANAELAPTHPIRLGLALNFSVFYYEILNSPDRACNLAKQAFDEAIAELDTLGEESYKDSTLIMQLLRDNLTLWTSDMQDDAADEIKEAAAPKPTEEQQ ->>>>1434_MAIZE 6/94 ASCII Len: 248 -Q01526 zea mays (maize). 14-3-3-like protein gf14-12. 6/94 -MAKLAEQAERYEEMVEFMEKVAKTVDSEELTVEERNLLSVAYKNVIGARRASWRIISSIEQKEEGRGNEDRVTLIKDYRGKIETELTKICDGILKLLESHLVPSSTAPESKVFYLKMKGDYYRYLAEFKTGAERKDAAENTMVAYKAAQDIALAELAPTHPIRLGLALNFSVFYYEILNSPDRACSLAKQAFDEAISELDTLSEESYKDSTLIMQLLHDNLTLWTSDISEDPAEEIREAPKHDLSEGQ diff --git a/forester/archive/RIO/others/hmmer/squid/Formats/gcgdata.2 b/forester/archive/RIO/others/hmmer/squid/Formats/gcgdata.2 deleted file mode 100644 index b51258533c9e23a2cfcf4237e17b3409524b9195..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2192 zcmbW2do-JQ9>&LzG13qv+UagqUV}p`$X!BQQVg27RP4|l7Ms39IwX;}Rz;~ZGwlp* zUCKJj?CPE=MRaI8Gzf1a*}6>`dR3I760Hc;x^CF*IqjaA-Lw1O`_J$De7@&>p6Bb~ z;o-sX4hX(xNeT02|l!Dy);fiKjWg7pBJCe27?9Bh4$jd!^EVbD1b)mHte> zU}VWZul0H#u?xW1!|Ie_X?uqaI@clf#;x*orE=Xr+}>Gh{UAqvrDyPF*A&JA8CjU` zY*)Er^xEyKg}Q7vNTigl{?v0-Sa+E@T3_8sR%Forky_7u0LZoGZaAb z)JnGcEHkleNq?K}nf_mx6G?{S2eyJm^yiTt8{aa6-` zQh72z?&`owlq)&fCHFI1CTD>Y>8;+w-UY4Ww}LS1{XOczDGAa>9OV5%UtIvBPp}aA zr+Zjh6%m>|HH*kTrFfoegC=6sUk95%w#iwh4&_B-f3%wkdOZkjS@oG)CR-P#YaubQ zq0M^aq=F3%5cF5`ku=5oy;6eer)sKJjYPH+&oLHeHFTuo#<`1%TtqQ6j>z8Zz;r5! zXROg#y|_lqmgh}&ntq|T%~1P2=sHu~?`D15LGscDt;y_not^CAcutC45M)^IRRul= zJF&2MGWdMBwK0-uq-AY>HAa9TkuDjp`qw&$XFdA}z$P)f@hKXkcp}oG*A>=Z&p3Ka zX^*cVPW=3Wz6A_Hn}-H$3soj`ZAGseRlJF5pFX_0Ugheto;(?~)ptjaDKRCg`%Dk` zc-2hBdRAsz%Pf&@t$#eIKDGOUx{V=c_9e9E;x4P)v54$5>$?Wk*Zex&VLlc5vFJ$O zj2H{WMA138E;Jqt7SWr{004?JmHJL(@HjF&5Ran}@xZ}YPBJLu{wp)F3A*e!5&=#E z7@N8sA`v$k1U}~rfq!O75YKgtz`KxuIFJVd3_u{{Cxcv`(~bmu-fTj4cy;{Gix{>l z4GrI0p7_^&KLYpkqAT8e!{Z;ajYGq~i{qk8a3Fz9A>n?7$N!DejxZMB@Dl|Ipbm|5 zRJ1Kl!^4+GpT0E07nm&wOK}L6j$*S}_;)T5m zI?C{>z0qdALY*c5P_V%}?`QT5+j_I&_{@GW*Qo)#UDH!lzCW@h>1F7hE>z%wGS$=H z9g($WR)pTaRGrC^etVa6)EsU#?mux^8e>;u*M0(a@)7enhH0A~@6!?|7tcvP5za-| z78kbU)$YbuCHU0GLg=}fh-Z}%Gziruw~QV(>k$|K{AqhuJ>0!SkDEJJ9+h7F!FPKy z^CCkFs_WbD{66k7HURfYfMHUW@A7cHj2y>4GOrh4J!Ej9El|Nux6mj3aJLlBs4caa zjIvdq@v{QRTiN{%ZU?kjF&cq`-7Q7JxcHGcie#kbC@a)$3-%$OEe!7BeYaJ{+e za!S!$N$GA)X{2vOs<_lJym)Qccn;@(12esED2A8AHoqx zB+@S#MA!2|ZXB1lUH9AVE(8S$Tuy8HSJDf})&hr@BWq!3-Is(D;41Xi5Z6CqTeD;WZcg(gEyQh0hAa}hG{$e+F?3&+^ z9*0M6e#p`<&(gI289 - /number=1 -BASE COUNT 30 a 99 c 80 g 80 t -ORIGIN - 1 gtccccgcgg gccttgtcct gattggctgt ccctgcgggc cttgtcctga ttggctgtgc - 61 ccgactccgt ataacataaa tagaggcgtc gagtcgcgcg ggcattactg cagcggacta - 121 cacttgggtc gagatggctc gcttcgtggt ggtggccctg ctcgtgctac tctctctgtc - 181 tggcctggag gctatccagc gtaagtctct cctcccgtcc ggcgctggtc cttcccctcc - 241 cgctcccacc ctctgtagcc gtctctgtgc tctctggttt cgttacctc -// - -LOCUS AAB2MCG2 1276 bp DNA PRI 06-JUL-1998 -DEFINITION Aotus azarai beta-2-microglobulin precursor, gene, exons 2 and 3 - and complete cds. -ACCESSION AF032093 AF032094 -VERSION AF032093.1 GI:3287308 -KEYWORDS . -SEGMENT 2 of 2 -SOURCE Azara's night monkey. - ORGANISM Aotus azarai - Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; - Mammalia; Eutheria; Primates; Platyrrhini; Cebidae; Aotinae; Aotus. -REFERENCE 1 (bases 1 to 1276) - AUTHORS Canavez,F.C., Ladasky,J.J., Muniz,J.A.C., Seuanez,H.N., Parham,P. - and Cavanez,F.C. - TITLE beta2-Microglobulin in neotropical primates (Platyrrhini) - JOURNAL Immunogenetics 48 (2), 133-140 (1998) - MEDLINE 98298008 -REFERENCE 2 (bases 1 to 1276) - AUTHORS Canavez,F.C., Ladasky,J.J., Seuanez,H.N. and Parham,P. - TITLE Direct Submission - JOURNAL Submitted (31-OCT-1997) Structural Biology, Stanford University, - Fairchild Building Campus West Dr. Room D-100, Stanford, CA - 94305-5126, USA -COMMENT On Jul 2, 1998 this sequence version replaced gi:3265029 - gi:3265028. -FEATURES Location/Qualifiers - source 1..1276 - /organism="Aotus azarai" - /db_xref="taxon:30591" - mRNA join(AF032092.1:<134..200,66..344,1023..>1050) - /product="beta-2-microglobulin precursor" - CDS join(AF032092.1:134..200,66..344,1023..1036) - /codon_start=1 - /product="beta-2-microglobulin precursor" - /protein_id="AAC52107.1" - /db_xref="GI:3289965" - /translation="MARFVVVALLVLLSLSGLEAIQRXPKIQVYSRHPAENGKPNFLN - CYVSGFHPSDIEVDLLKNGKKIEKVEHSDLSFSKDWSFYLLYYTEFTPNEKDEYACRV - SHVTLSTPKTVKWDRNM" - mat_peptide join(AF032092.1:194..200,66..344,1023..1033) - /product="beta-2-microglobulin" - intron <1..65 - /number=1 - variation 3 - /note="allele 1" - /replace="g" - exon 66..344 - /number=2 - intron 345..1022 - /number=2 - exon 1023..1050 - /number=3 - intron 1051..>1276 - /number=3 -BASE COUNT 353 a 253 c 269 g 400 t 1 others -ORIGIN - 1 caagttatcc gtaattgaaa taccctggta attaatattc atttgtcttt tcctgatttt - 61 ttcaggtrct ccaaagattc aggtttactc acgtcatccg gcagagaatg gaaagccaaa - 121 ttttctgaat tgctatgtgt ctgggtttca tccgtccgac attgaagttg acttactgaa - 181 gaatggaaag aaaattgaaa aagtggagca ttcagacttg tctttcagca aggactggtc - 241 tttctatctc ttgtactaca ccgagtttac ccccaatgaa aaagatgagt atgcctgccg - 301 tgtgagccat gtgactttat caacacccaa gacagtaaag tggggtaagt cttacgttct - 361 tttgtaggct gctgaaagtt gtgtatgggt agtcatgtca taaagctgct ttgatataaa - 421 aaaaattcgt ctatggccat actgccctga atgagtccca tcccgtctga taaaaaaaaa - 481 tcttcatatt gggattgtca gggaatgtgc ttaaagatca gattagagac aacggctgag - 541 agagcgctgc acagcattct tctgaaccag cagtttccct gcagctgagc agggagcagc - 601 agcagcagtt gcacaaatac atatgcactc ctaacacttc ttacctactg acttcctcag - 661 ctttcgtggc agctttaggt atatttagca ctaatgaaca tcaggaaggt ataggccttt - 721 ctttgtaaat ccttctatcc tagcatccta taatcctgga ctcctccagt actctctggc - 781 tggattggta tctgaggcta gtaggtgggg cttgttcctg ctgggtagct ccaaacaagg - 841 tattcatgga taggaacagc agcctatttt gccagcctta tttcttaata gttttagaaa - 901 tctgttagta cgtggtgttt tttgttttgt tttgttttaa cacagtgtaa acaaaaagta - 961 catgtatttt aaaagtaaaa cttaatgtct tcctttttct ttctccactg tctttttcat - 1021 agatcgaaac atgtaaccag catcatggag gtaagttctt gaccttaatt aaatgttttt - 1081 tgtttcactg gggactattt atagacagcc ctaacatgat aaccctcact atgtggagaa - 1141 cattgacaga gtagcatttt agcaggcaaa gaggaatcct atagggttac attccctttt - 1201 cctgtggagt ggcatgaaaa aggtatgtgg ccccagctgt ggccacatta ctgactctac - 1261 agggagggca aaggaa -// -LOCUS AACCOSIV1 168 bp DNA PRI 26-JAN-1998 -DEFINITION Aotus azarai cytochrome c oxidase subunit IV gene, exon 3. -ACCESSION AF042765 -VERSION AF042765.1 GI:2809514 -KEYWORDS . -SEGMENT 1 of 3 -SOURCE Azara's night monkey. - ORGANISM Aotus azarai - Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; - Mammalia; Eutheria; Primates; Platyrrhini; Cebidae; Aotinae; Aotus. -REFERENCE 1 (bases 1 to 168) - AUTHORS Wu,W., Goodman,M., Lomax,M.I. and Grossman,L.I. - TITLE Molecular evolution of cytochrome c oxidase subunit IV: evidence - for positive selection in simian primates - JOURNAL J. Mol. Evol. 44 (5), 477-491 (1997) - MEDLINE 97277139 -REFERENCE 2 (bases 1 to 168) - AUTHORS Wu,W., Goodman,M., Lomax,M.I. and Grossman,L.I. - TITLE Direct Submission - JOURNAL Submitted (14-JAN-1998) CMMG, Wayne State University, 540 E. - Canfield, Detroit, MI 48201, USA -FEATURES Location/Qualifiers - source 1..168 - /organism="Aotus azarai" - /db_xref="taxon:30591" - exon 1..168 - /number=3 -BASE COUNT 40 a 42 c 54 g 32 t -ORIGIN - 1 gaagtgttgt gaagagcgaa gactatgcgc tcccaagtta tgtggatcgg cgtgactatc - 61 ccttgcccga cgtggcccat gtcaggcacc tgtcggccag ccagaaggcc ttgaaggaga - 121 aggagaaggc ctcctggagc agcctctcca tggatgagaa agtcgagt -// - -LOCUS AACCOSIV2 132 bp DNA PRI 26-JAN-1998 -DEFINITION Aotus azarai cytochrome c oxidase subunit IV gene, exon 4. -ACCESSION AF042766 -VERSION AF042766.1 GI:2809515 -KEYWORDS . -SEGMENT 2 of 3 -SOURCE Azara's night monkey. - ORGANISM Aotus azarai - Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; - Mammalia; Eutheria; Primates; Platyrrhini; Cebidae; Aotinae; Aotus. -REFERENCE 1 (bases 1 to 132) - AUTHORS Wu,W., Goodman,M., Lomax,M.I. and Grossman,L.I. - TITLE Molecular evolution of cytochrome c oxidase subunit IV: evidence - for positive selection in simian primates - JOURNAL J. Mol. Evol. 44 (5), 477-491 (1997) - MEDLINE 97277139 -REFERENCE 2 (bases 1 to 132) - AUTHORS Wu,W., Goodman,M., Lomax,M.I. and Grossman,L.I. - TITLE Direct Submission - JOURNAL Submitted (14-JAN-1998) CMMG, Wayne State University, 540 E. - Canfield, Detroit, MI 48201, USA -FEATURES Location/Qualifiers - source 1..132 - /organism="Aotus azarai" - /db_xref="taxon:30591" - exon 1..132 - /number=4 -BASE COUNT 30 a 25 c 38 g 39 t -ORIGIN - 1 tgtatcgtat tcagttcaag gagagctttg ctgagatgaa caggggctcc aatgagtgga - 61 agacggttgt gggtgctgcc atgttcttca tcggcttcac agcaattctt atcatcttgg - 121 agaagcgcta tg -// - -LOCUS AACCOSIV3 137 bp DNA PRI 26-JAN-1998 -DEFINITION Aotus azarai cytochrome c oxidase subunit IV gene, exon 5; and - partial cds. -ACCESSION AF042767 -VERSION AF042767.1 GI:2809516 -KEYWORDS . -SEGMENT 3 of 3 -SOURCE Azara's night monkey. - ORGANISM Aotus azarai - Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; - Mammalia; Eutheria; Primates; Platyrrhini; Cebidae; Aotinae; Aotus. -REFERENCE 1 (bases 1 to 137) - AUTHORS Wu,W., Goodman,M., Lomax,M.I. and Grossman,L.I. - TITLE Molecular evolution of cytochrome c oxidase subunit IV: evidence - for positive selection in simian primates - JOURNAL J. Mol. Evol. 44 (5), 477-491 (1997) - MEDLINE 97277139 -REFERENCE 2 (bases 1 to 137) - AUTHORS Wu,W., Goodman,M., Lomax,M.I. and Grossman,L.I. - TITLE Direct Submission - JOURNAL Submitted (14-JAN-1998) CMMG, Wayne State University, 540 E. - Canfield, Detroit, MI 48201, USA -FEATURES Location/Qualifiers - source 1..137 - /organism="Aotus azarai" - /db_xref="taxon:30591" - mRNA join(AF042765.1:<1..168,AF042766.1:1..132,1..>137) - /product="cytochrome c oxidase subunit IV" - CDS join(AF042765.1:<1..168,AF042766.1:1..132,1..137) - /codon_start=3 - /product="cytochrome c oxidase subunit IV" - /protein_id="AAB97755.1" - /db_xref="GI:2809518" - /translation="SVVKSEDYALPSYVDRRDYPLPDVAHVRHLSASQKALKEKEKAS - WSSLSMDEKVELYRIQFKESFAEMNRGSNEWKTVVGAAMFFIGFTAILIILEKRYVYG - PLPHTFDKEWVAMQTKRMLDLKVNPVDGLASKWDYDKKEWKK" - exon 1..137 - /number=5 -BASE COUNT 36 a 36 c 43 g 22 t -ORIGIN - 1 tgtacggccc cctcccgcac acctttgaca aagagtgggt ggccatgcag accaagagga - 61 tgctggacct gaaggtgaac cctgtcgatg gcctcgcctc caagtgggac tacgacaaga - 121 aggagtggaa gaagtga -// -LOCUS AAU18601 1771 bp DNA PRI 17-JAN-1997 -DEFINITION Aotus azarae interphotoreceptor retinoid-binding protein (IRBP) - gene, intron 1, complete sequence. -ACCESSION U18601 -VERSION U18601.1 GI:624187 -KEYWORDS . -SOURCE Azara's night monkey. - ORGANISM Aotus azarai - Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; - Mammalia; Eutheria; Primates; Platyrrhini; Cebidae; Aotinae; Aotus. -REFERENCE 1 (bases 1 to 1771) - AUTHORS Harada,M.L., Schneider,H., Schneider,M.P., Sampaio,I., - Czelusniak,J. and Goodman,M. - TITLE DNA evidence on the phylogenetic systematics of New World monkeys: - support for the sister-grouping of Cebus and Saimiri from two - unlinked nuclear genes - JOURNAL Mol. Phylogenet. Evol. 4 (3), 331-349 (1995) - MEDLINE 96111507 -REFERENCE 2 (bases 1 to 1771) - AUTHORS Harada,M.L. - TITLE Direct Submission - JOURNAL Submitted (13-DEC-1994) Universidade Federal do Para, Departamento - de Genetica, Campus Universitario do Guama, Belem, Para, 66075-900, - Brazil -FEATURES Location/Qualifiers - source 1..1771 - /organism="Aotus azarai" - /db_xref="taxon:30591" - /tissue_type="lymphocytes" - /dev_stage="adult" - gene 1..1771 - /gene="IRBP" - intron 1..1771 - /gene="IRBP" - /note="interphotoreceptor retinoid-binding protein" - /number=1 -BASE COUNT 384 a 443 c 533 g 411 t -ORIGIN - 1 gtgagaccca agggagacct ggccgggccc agtcctggga gtgagttgac ctgtcgtttg - 61 cacatgcagg gctctgtgca caatgcgtga caatggcttt tagatttgtt ctcatgctta - 121 agttgtggcc agttgagtcc tttcctcttt ccatccactg ttccatccac tctctgggac - 181 cctggtgctg ctgtagaacc tccgtagaac attcatgtta ggttggtgtg aaagtacttt - 241 taattgcaaa acccacaatt acttttgcat caacctcacg ggaagccagt ttggaagcct - 301 cgggatagac agagtttcag ccttggctgg gtggaaggtg agcgttggcg gggcttctca - 361 tcgtcagtgt gggagaagag gccaacatgt ggcagaggtg gcggtgggct tcaccgcgtg - 421 ccccaccgca ggccgagagc tccgcccggg cagcactcac tccacgctgt tctcctacct - 481 gtggctttgc tgcattgtca cagttgggca gggcagcatg tgtcatgaat cccttgcaag - 541 gagggtctga gactggggtt gggtgcaggc agtttgtctg ggaggtggtt gctgaagcag - 601 gtgtgaagga gggagcaggg agagtgagat aggaaggtga caggcaggtc cctcaaagct - 661 gttctgctga agccaggacg ctgacaagtg tggggatgct cccaggcaca gttctctgcg - 721 ggcgggcccc agggctcctg tcccgctttg gccaagagtt gccctgagga cataactcgg - 781 ggtggggcag gctcccctct cttggagaag gcctgagctg agggtggaaa gacaggatgg - 841 tgctgtggga gagcctgtca gtggggccag gtgcagctga aatcagaggg ggctgagagt - 901 gccaacggca tctgttacag aattctcatc cccattttgc ataactgagg cccagagagg - 961 tgcagagggg agtggcctgg agccagagag ctgtgactga aggcagggca gggcctggag - 1021 ggcagtgtct ctgtcagcac aggctccttg ccccagtcca gctcaccaag tcctgccgcc - 1081 ctcccgcagc cttagagagg gaggaagagg tgcatccaca tggaagtagc ctgtgctagg - 1141 ctttcagaat acccagtttc caaattaatt gcttcttcct ttctggtata gccaaggttc - 1201 acaatttgga gtcagatgtg gattcagatg ctggctccac cacttattga ctgtgtaacc - 1261 tgggactagt tacttaatct cactgtgctt cagtttttcc gtggaaaaga tggggaccat - 1321 gttatctcct gtacaggtgg ctgtgaggat gacgataagc tctgcaaagt gcttagtaca - 1381 gggccaggca cctgttaaag gtaactaaca tcttccaatc ctgccccagt ggaggggaag - 1441 ataagcttag agatgttggg aagtatctgg cgaggttgga cgaatcagag aggagaccat - 1501 tcctgggcct tccagctctg aacaccagag cagacaggag catcctctgc aaggaggctt - 1561 cccatggatc acacatgtcc cagtggcatg tcacatccca gacatgccac tgggaaagtc - 1621 ccaggtgcct actgactcct tcagaaatgt cagttcctgt cccatgccct taatatttcc - 1681 catgacataa aggcgatcca tggcacctgc tttcctgggc tcgaaaaccg gctgccctcc - 1741 tgacactgag caggacctcc aactcttgca g -// diff --git a/forester/archive/RIO/others/hmmer/squid/Formats/msf b/forester/archive/RIO/others/hmmer/squid/Formats/msf deleted file mode 100644 index 8ef0bcd..0000000 --- a/forester/archive/RIO/others/hmmer/squid/Formats/msf +++ /dev/null @@ -1,266 +0,0 @@ - - - MSF: 171 Type: P Check: 4694 .. - - Name: GLB2_MORMR oo Len: 171 Check: 6522 Weight: 2.7687 - Name: GLBZ_CHITH oo Len: 171 Check: 6733 Weight: 2.9329 - Name: HBA2_BOSMU oo Len: 171 Check: 5006 Weight: 0.6394 - Name: HBA2_GALCR oo Len: 171 Check: 6652 Weight: 0.5183 - Name: HBA4_SALIR oo Len: 171 Check: 5434 Weight: 1.9511 - Name: HBAD_CHLME oo Len: 171 Check: 6621 Weight: 1.2927 - Name: HBAD_PASMO oo Len: 171 Check: 8118 Weight: 1.2927 - Name: HBAZ_HORSE oo Len: 171 Check: 8382 Weight: 1.6223 - Name: HBA_AILME oo Len: 171 Check: 5402 Weight: 0.4145 - Name: HBA_ANSSE oo Len: 171 Check: 3688 Weight: 0.8315 - Name: HBA_COLLI oo Len: 171 Check: 4420 Weight: 0.8557 - Name: HBA_ERIEU oo Len: 171 Check: 5528 Weight: 0.8390 - Name: HBA_FRAPO oo Len: 171 Check: 4136 Weight: 0.5014 - Name: HBA_MACFA oo Len: 171 Check: 5986 Weight: 0.2233 - Name: HBA_MACSI oo Len: 171 Check: 6064 Weight: 0.2233 - Name: HBA_MESAU oo Len: 171 Check: 5499 Weight: 0.6722 - Name: HBA_PAGLA oo Len: 171 Check: 6189 Weight: 0.5388 - Name: HBA_PHACO oo Len: 171 Check: 5129 Weight: 0.5014 - Name: HBA_PONPY oo Len: 171 Check: 5894 Weight: 0.3907 - Name: HBA_PROLO oo Len: 171 Check: 5810 Weight: 0.4145 - Name: HBA_TRIOC oo Len: 171 Check: 6427 Weight: 0.6883 - Name: HBB1_VAREX oo Len: 171 Check: 7239 Weight: 1.1252 - Name: HBB2_TRICR oo Len: 171 Check: 7790 Weight: 1.9629 - Name: HBB2_XENTR oo Len: 171 Check: 9537 Weight: 1.4685 - Name: HBBL_RANCA oo Len: 171 Check: 7490 Weight: 1.4685 - Name: HBB_CALAR oo Len: 171 Check: 6568 Weight: 0.4226 - Name: HBB_COLLI oo Len: 171 Check: 5043 Weight: 0.7672 - Name: HBB_EQUHE oo Len: 171 Check: 6101 Weight: 0.6734 - Name: HBB_LARRI oo Len: 171 Check: 5673 Weight: 0.7672 - Name: HBB_MANSP oo Len: 171 Check: 7148 Weight: 0.4226 - Name: HBB_ORNAN oo Len: 171 Check: 6249 Weight: 0.6615 - Name: HBB_RABIT oo Len: 171 Check: 7043 Weight: 0.5259 - Name: HBB_SPECI oo Len: 171 Check: 3680 Weight: 0.5422 - Name: HBB_SPETO oo Len: 171 Check: 4246 Weight: 0.5422 - Name: HBB_SUNMU oo Len: 171 Check: 5601 Weight: 0.6734 - Name: HBB_TACAC oo Len: 171 Check: 7133 Weight: 0.6615 - Name: HBB_TRIIN oo Len: 171 Check: 4009 Weight: 0.8445 - Name: HBB_TUPGL oo Len: 171 Check: 7197 Weight: 0.7375 - Name: HBB_URSMA oo Len: 171 Check: 7200 Weight: 0.4695 - Name: HBE_PONPY oo Len: 171 Check: 5902 Weight: 1.0101 - Name: HBF1_URECA oo Len: 171 Check: 6462 Weight: 2.9329 - Name: LGB1_PEA oo Len: 171 Check: 4791 Weight: 2.0005 - Name: LGB1_VICFA oo Len: 171 Check: 7426 Weight: 2.0005 - Name: MYG_ESCGI oo Len: 171 Check: 9170 Weight: 0.7432 - Name: MYG_HORSE oo Len: 171 Check: 1290 Weight: 0.7432 - Name: MYG_LYCPI oo Len: 171 Check: 1107 Weight: 0.8773 - Name: MYG_MOUSE oo Len: 171 Check: 1320 Weight: 1.0018 - Name: MYG_MUSAN oo Len: 171 Check: 5461 Weight: 2.3158 - Name: MYG_PROGU oo Len: 171 Check: 1450 Weight: 0.7629 - Name: MYG_SAISC oo Len: 171 Check: 1728 Weight: 0.7629 - -// - - - -GLB2_MORMR ...PIVD..S GSVSPLSDAE KNKIRAAW.D IVYKNYEKNG VDILVKFFTG -GLBZ_CHITH MKFIILALCV AAASALSGDQ IGLVQST.YG KVKG....DS VGILYAVFKA -HBA2_BOSMU ...V...... .....LSAAD KGNVKAAW.G KVGGHAAEYG AEALERMFLS -HBA2_GALCR ...V...... .....LSPTD KSNVKAAW.E KVGAHAGDYG AEALERMFLS -HBA4_SALIR ...S...... .....LSAKD KANVKAIW.G KILPKSDEIG EQALSRMLVV -HBAD_CHLME ...M...... .....LTADD KKLLTQLW.E KVAGHQEEFG SEALQRMFLT -HBAD_PASMO ...M...... .....LTAED KKLIQQIW.G KLGGAEEEIG ADALWRMFHS -HBAZ_HORSE ...S...... .....LTKAE RTMVVSIW.G KISMQADAVG TEALQRLFSS -HBA_AILME ...V...... .....LSPAD KTNVKATW.D KIGGHAGEYG GEALERTFAS -HBA_ANSSE ...V...... .....LSAAD KGNVKTVF.G KIGGHAEEYG AETLQRMFQT -HBA_COLLI ...V...... .....LSAND KSNVKAVF.A KIGGQAGDLG GEALERLFIT -HBA_ERIEU ...V...... .....LSATD KANVKTFW.G KLGGHGGEYG GEALDRMFQA -HBA_FRAPO ...V...... .....LSAAD KNNVKGIF.G KISSHAEDYG AEALERMFIT -HBA_MACFA ...V...... .....LSPAD KTNVKAAW.G KVGGHAGEYG AEALERMFLS -HBA_MACSI ...V...... .....LSPAD KTNVKDAW.G KVGGHAGEYG AEALERMFLS -HBA_MESAU ...V...... .....LSAKD KTNISEAW.G KIGGHAGEYG AEALERMFFV -HBA_PAGLA ...V...... .....LSSAD KNNIKATW.D KIGSHAGEYG AEALERTFIS -HBA_PHACO ...V...... .....LSAAD KNNVKGIF.T KIAGHAEEYG AEALERMFIT -HBA_PONPY ...V...... .....LSPAD KTNVKTAW.G KVGAHAGDYG AEALERMFLS -HBA_PROLO ...V...... .....LSPAD KANIKATW.D KIGGHAGEYG GEALERTFAS -HBA_TRIOC ...V...... .....LSAND KTNVKTVF.T KITGHAEDYG AETLERMFIT -HBB1_VAREX ...V...... ....HWTAEE KQLICSLW.G KI..DVGLIG GETLAGLLVI -HBB2_TRICR ...V...... ....HLTAED RKEIAAIL.G KV..NVDSLG GQCLARLIVV -HBB2_XENTR ...V...... ....HWTAEE KATIASVW.G KV..DIEQDG HDALSRLLVV -HBBL_RANCA ...V...... ....HWTAEE KAVINSVW.Q KV..DVEQDG HEALTRLFIV -HBB_CALAR ...V...... ....HLTGEE KSAVTALW.G KV..NVDEVG GEALGRLLVV -HBB_COLLI ...V...... ....HWSAEE KQLITSIW.G KV..NVADCG AEALARLLIV -HBB_EQUHE ...V...... ....QLSGEE KAAVLALW.D KV..NEEEVG GEALGRLLVV -HBB_LARRI ...V...... ....HWSAEE KQLITGLW.G KV..NVADCG AEALARLLIV -HBB_MANSP ...V...... ....HLTPEE KTAVTTLW.G KV..NVDEVG GEALGRLLVV -HBB_ORNAN ...V...... ....HLSGGE KSAVTNLW.G KV..NINELG GEALGRLLVV -HBB_RABIT ...V...... ....HLSSEE KSAVTALW.G KV..NVEEVG GEALGRLLVV -HBB_SPECI ...V...... ....HLSDGE KNAISTAW.G KV..HAAEVG AEALGRLLVV -HBB_SPETO ...V...... ....HLTDGE KNAISTAW.G KV..NAAEIG AEALGRLLVV -HBB_SUNMU ...V...... ....HLSGEE KACVTGLW.G KV..NEDEVG AEALGRLLVV -HBB_TACAC ...V...... ....HLSGSE KTAVTNLW.G HV..NVNELG GEALGRLLVV -HBB_TRIIN ...V...... ....HLTPEE KALVIGLW.A KV..NVKEYG GEALGRLLVV -HBB_TUPGL ...V...... ....HLSGEE KAAVTGLW.G KV..DLEKVG GQSLGSLLIV -HBB_URSMA ...V...... ....HLTGEE KSLVTGLW.G KV..NVDEVG GEALGRLLVV -HBE_PONPY ...V...... ....HFTAEE KAAVTSLW.S KM..NVEEAG GEALGRLLVV -HBF1_URECA .......... ....GLTTAQ IKAIQDHWFL NIKGCLQAAA DSIFFKYLTA -LGB1_PEA GFTDKQEALV NSSSE.FKQN LPGYSILFYT IVLEKAP..A AKGL...... -LGB1_VICFA GFTEKQEALV NSSSQLFKQN PSNYSVLFYT IILQKAP..T AKAM...... -MYG_ESCGI ...V...... .....LSDAE WQLVLNIW.A KVEADVAGHG QDILIRLFKG -MYG_HORSE ...G...... .....LSDGE WQQVLNVW.G KVEADIAGHG QEVLIRLFTG -MYG_LYCPI ...G...... .....LSDGE WQIVLNIW.G KVETDLAGHG QEVLIRLFKN -MYG_MOUSE ...G...... .....LSDGE WQLVLNVW.G KVEADLAGHG QEVLIGLFKT -MYG_MUSAN .......... ........VD WEKVNSVW.S AVESDLTAIG QNILLRLFEQ -MYG_PROGU ...G...... .....LSDGE WQLVLNVW.G KVEGDLSGHG QEVLIRLFKG -MYG_SAISC ...G...... .....LSDGE WQLVLNIW.G KVEADIPSHG QEVLISLFKG - - -GLB2_MORMR TPAAQAFFPK FKGLTTADAL KKSSDVRWHA ERIINAVNDA VKSMDDTEKM -GLBZ_CHITH DPTIQAAFPQ FVGK.DLDAI KGGAEFSTHA GRIVGFLGGV IDDLP...NI -HBA2_BOSMU FPTTKTYFPH FD.LSH.... .GSAQVKGHG AKVAAALTKA VGHLDD...L -HBA2_GALCR FPTTKTYFPH FD.LSH.... .GSTQVKGHG KKVADALTNA VLHVDD...M -HBA4_SALIR YPQTKAYFSH WASVAP.... .GSAPVKKHG ITIMNQIDDC VGHMDD...L -HBAD_CHLME YPQTKTYFPH FD.LHP.... .GSEQVRGHG KKVAAALGNA VKSLDN...L -HBAD_PASMO YPSTKTYFPH FD.LSQ.... .GSDQIRGHG KKVVAALSNA IKNLDN...L -HBAZ_HORSE YPQTKTYFPH FD.LHE.... .GSPQLRAHG SKVAAAVGDA VKSIDN...V -HBA_AILME FPTTKTYFPH FD.LSP.... .GSAQVKAHG KKVADALTTA VGHLDD...L -HBA_ANSSE FPQTKTYFPH FD.LQP.... .GSAQIKAHG KKVAAALVEA ANHIDD...I -HBA_COLLI YPQTKTYFPH FD.LSH.... .GSAQIKGHG KKVAEALVEA ANHIDD...I -HBA_ERIEU HPTTKTYFPH FD.LNP.... .GSAQVKGHG KKVADALTTA VNNLDD...V -HBA_FRAPO YPSTKTYFPH FD.LSH.... .GSAQVKGHG KKVVAALIEA ANHIDD...I -HBA_MACFA FPTTKTYFPH FD.LSH.... .GSAQVKGHG KKVADALTLA VGHVDD...M -HBA_MACSI FPTTKTYFPH FD.LSH.... .GSAQVKGHG KKVADALTLA VGHVDD...M -HBA_MESAU YPTTKTYFPH FD.VSH.... .GSAQVKGHG KKVADALTNA VGHLDD...L -HBA_PAGLA FPTTKTYFPH FD.LSH.... .GSAQVKAHG KKVADALTLA VGHLED...L -HBA_PHACO YPSTKTYFPH FD.LSH.... .GSAQIKGHG KKVVAALIEA VNHIDD...I -HBA_PONPY FPTTKTYFPH FD.LSH.... .GSAQVKDHG KKVADALTNA VAHVDD...M -HBA_PROLO FPTTKTYFPH FD.LSP.... .GSAQVKAHG KKVADALTLA VGHLDD...L -HBA_TRIOC YPPTKTYFPH FD.LHH.... .GSAQIKAHG KKVVGALIEA VNHIDD...I -HBB1_VAREX YPWTQRQFSH FGNLSSPTAI AGNPRVKAHG KKVLTSFGDA IKNLDN...I -HBB2_TRICR NPWSRRYFHD FGDLSSCDAI CRNPKVLAHG AKVMRSIVEA TKHLDN...L -HBB2_XENTR YPWTQRYFSS FGNLSNVSAV SGNVKVKAHG NKVLSAVGSA IQHLDD...V -HBBL_RANCA YPWTQRYFST FGDLSSPAAI AGNPKVHAHG KKILGAIDNA IHNLDD...V -HBB_CALAR YPWTQRFFES FGDLSTPDAV MNNPKVKAHG KKVLGAFSDG LTHLDN...L -HBB_COLLI YPWTQRFFSS FGNLSSATAI SGNPNVKAHG KKVLTSFGDA VKNLDN...I -HBB_EQUHE YPWTQRFFDS FGDLSNPAAV MGNPKVKAHG KKVLHSFGEG VHHLDN...L -HBB_LARRI YPWTQRFFAS FGNLSSPTAI NGNPMVRAHG KKVLTSFGEA VKNLDN...I -HBB_MANSP YPWTQRFFDS FGDLSSPDAV MGNPKVKAHG KKVLGAFSDG LNHLDN...L -HBB_ORNAN YPWTQRFFEA FGDLSSAGAV MGNPKVKAHG AKVLTSFGDA LKNLDD...L -HBB_RABIT YPWTQRFFES FGDLSSANAV MNNPKVKAHG KKVLAAFSEG LSHLDN...L -HBB_SPECI YPWTQRFFDS FGDLSSASAV MGNAKVKAHG KKVIDSFSNG LKHLDN...L -HBB_SPETO YPWTQRFFDS FGDLSSASAV MGNAKVKAHG KKVIDSFSNG LKHLDN...L -HBB_SUNMU YPWTQRFFDS FGDLSSASAV MGNPKVKAHG KKVLHSLGEG VANLDN...L -HBB_TACAC YPWTQRFFES FGDLSSADAV MGNAKVKAHG AKVLTSFGDA LKNLDN...L -HBB_TRIIN YPWTQRFFEH FGDLSSASAI MNNPKVKAHG EKVFTSFGDG LKHLED...L -HBB_TUPGL YPWTQRFFDS FGDLSSPSAV MSNPKVKAHG KKVLTSFSDG LNHLDN...L -HBB_URSMA YPWTQRFFDS FGDLSSADAI MNNPKVKAHG KKVLNSFSDG LKNLDN...L -HBE_PONPY YPWTQRFFDS FGNLSSPSAI LGNPKVKAHG KKVLTSFGDA IKNMDN...L -HBF1_URECA YPGDLAFFHK FSSV.PLYGL RSNPAYKAQT LTVINYLDKV VDALGG..NA -LGB1_PEA .......... FSFLKDTAGV EDSPKLQAHA EQVFGLVRDS AAQLRTKGEV -LGB1_VICFA .......... FSFLKDSAGV VDSPKLGAHA EKVFGMVRDS AVQLRATGEV -MYG_ESCGI HPETLEKFDK FKHLKTEAEM KASEDLKKHG NTVLTALGGI LKKKGH...H -MYG_HORSE HPETLEKFDK FKHLKTEAEM KASEDLKKHG TVVLTALGGI LKKKGH...H -MYG_LYCPI HPETLDKFDK FKHLKTEDEM KGSEDLKKHG NTVLTALGGI LKKKGH...H -MYG_MOUSE HPETLDKFDK FKNLKSEEDM KGSEDLKKHG CTVLTALGTI LKKKGQ...H -MYG_MUSAN YPESQNHFPK FKN.KSLGEL KDTADIKAQA DTVLSALGNI VKKKGS...H -MYG_PROGU HPETLEKFDK FKHLKAEDEM RASEELKKHG TTVLTALGGI LKKKGQ...H -MYG_SAISC HPETLEKFDK FKHLKSEDEM KASEELKKHG TTVLTALGGI LKKKGQ...H - - -GLB2_MORMR SMKLQELSVK HAQSFYVDRQ YFKVLAGII. ........AD TTAPGDAGFE -GLBZ_CHITH GKHVDALVAT H.KPRGVTHA QFNNFRAAFI AYLKGHV..D YTAAVEAAWG -HBA2_BOSMU PGALSELSDL HAHKLRVDPV NFKLLSHSLL VTLASHLPSD FTPAVHASLD -HBA2_GALCR PSALSALSDL HAHKLRVDPV NFKLLRHCLL VTLACHHPAE FTPAVHASLD -HBA4_SALIR FGFLTKLSEL HATKLRVDPT NFKILAHNLI VVIAAYFPAE FTPEIHLSVD -HBAD_CHLME SQALSELSNL HAYNLRVDPA NFKLLAQCFQ VVLATHLGKD YSPEMHAAFD -HBAD_PASMO SQALSELSNL HAYNLRVDPV NFKFLSQCLQ VSLATRLGKE YSPEVHSAVD -HBAZ_HORSE AGALAKLSEL HAYILRVDPV NFKFLSHCLL VTLASRLPAD FTADAHAAWD -HBA_AILME PGALSALSDL HAHKLRVDPV NFKLLSHCLL VTLASHHPAE FTPAVHASLD -HBA_ANSSE AGALSKLSDL HAQKLRVDPV NFKFLGHCFL VVLAIHHPSL LTPEVHASMD -HBA_COLLI AGALSKLSDL HAQKLRVDPV NFKLLGHCFL VVVAVHFPSL LTPEVHASLD -HBA_ERIEU PGALSALSDL HAHKLRVDPV NFKLLSHCLL VTLALHHPAD FTPAVHASLD -HBA_FRAPO AGTLSKLSDL HAHKLRVDPV NFKLLGQCFL VVVAIHHPSA LTPEVHASLD -HBA_MACFA PQALSALSDL HAHKLRVDPV NFKLLSHCLL VTLAAHLPAE FTPAVHASLD -HBA_MACSI PQALSALSDL HAHKLRVDPV NFKLLSHCLL VTLAAHLPAE FTPAVHASLD -HBA_MESAU PGALSALSDL HAHKLRVDPV NFKLLSHCLL VTLANHHPAD FTPAVHASLD -HBA_PAGLA PNALSALSDL HAYKLRVDPV NFKLLSHCLL VTLACHHPAE FTPAVHSALD -HBA_PHACO TGTLSKLSDL HAHKLRVDPV NFKLLGQCFL VVVAIHHPSA LTPEVHASLD -HBA_PONPY PNALSALSDL HAHKLRVDPV NFKLLSHCLL VTLAAHLPAE FTPAVHASLD -HBA_PROLO PGALSALSDL HAYKLRVDPV NFKLLSHCLL VTLACHHPAE FTPAVHASLD -HBA_TRIOC AGALSKLSDL HAQKLRVDPV NFKLLGQCFL VVVAIHHPSV LTPEVHASLD -HBB1_VAREX KDTFAKLSEL HCDKLHVDPT NFKLLGNVLV IVLADHHGKE FTPAHHAAYQ -HBB2_TRICR REYYADLSVT HSLKFYVDPE NFKLFSGIVI VCLALTLQTD FSCHKQLAFE -HBB2_XENTR KSHLKGLSKS HAEDLHVDPE NFKRLADVLV IVLAAKLGSA FTPQVQAVWE -HBBL_RANCA KGTLHDLSEE HANELHVDPE NFRRLGEVLI VVLGAKLGKA FSPQVQHVWE -HBB_CALAR KGTFAHLSEL HCDKLHVDPE NFRLLGNVLV CVLAHHFGKE FTPVVQAAYQ -HBB_COLLI KGTFAQLSEL HCDKLHVDPE NFRLLGDILV IILAAHFGKD FTPECQAAWQ -HBB_EQUHE KGTFAQLSEL HCDKLHVDPE NFRLLGNVLV VVLARHFGKD FTPELQASYQ -HBB_LARRI KNTFAQLSEL HCDKLHVDPE NFRLLGDILI IVLAAHFAKD FTPDSQAAWQ -HBB_MANSP KGTFAQLSEL HCDKLHVDPE NFKLLGNVLV CVLAHHFGKE FTPQVQAAYQ -HBB_ORNAN KGTFAKLSEL HCDKLHVDPE NFNRLGNVLI VVLARHFSKD FSPEVQAAWQ -HBB_RABIT KGTFAKLSEL HCDKLHVDPE NFRLLGNVLV IVLSHHFGKE FTPQVQAAYQ -HBB_SPECI KGTFASLSEL HCDKLHVDPE NFKLLGNMIV IVMAHHLGKD FTPEAQAAFQ -HBB_SPETO KGTFASLSEL HCDKLHVDPE NFKLLGNMIV IVMAHHLGKD FTPEAQAAFQ -HBB_SUNMU KGTFAKLSEL HCDKLHVDPE NFRLLGNVLV VVLASKFGKE FTPPVQAAFQ -HBB_TACAC KGTFAKLSEL HCDKLHVDPE NFNRLGNVLV VVLARHFSKE FTPEAQAAWQ -HBB_TRIIN KGAFAELSEL HCDKLHVDPE NFRLLGNVLV CVLARHFGKE FSPEAQAAYQ -HBB_TUPGL KGTFAKLSEL HCDKLHVDPE NFRLLGNVLV RVLACNFGPE FTPQVQAAFQ -HBB_URSMA KGTFAKLSEL HCDKLHVDPE NFKLLGNVLV CVLAHHFGKE FTPQVQAAYQ -HBE_PONPY KTTFAKLSEL HCDKLHVDPE NFKLLGNVMV IILATHFGKE FTPEVQAAWQ -HBF1_URECA GALMKAKVPS H.DAMGITPK HFGQLLKLVG GVFQEEF..S ADPTTVAAWG -LGB1_PEA VLGNATLGAI HVQKGVTNP. HFVVVKEALL QTIKKASGNN WSEELNTAWE -LGB1_VICFA VLDGKD.GSI HIQKGVLDP. HFVVVKEALL KTIKEASGDK WSEELSAAWE -MYG_ESCGI EAELKPLAQS HATKHKIPIK YLEFISDAII HVLHSRHPGD FGADAQAAMN -MYG_HORSE EAELKPLAQS HATKHKIPIK YLEFISDAII HVLHSKHPGN FGADAQGAMT -MYG_LYCPI EAELKPLAQS HATKHKIPVK YLEFISDAII QVLQNKHSGD FHADTEAAMK -MYG_MOUSE AAEIQPLAQS HATKHKIPVK YLEFISEIII EVLKKRHSGD FGADAQGAMS -MYG_MUSAN SQPVKALAAT HITTHKIPPH YFTKITTIAV DVLSEMYPSE MNAQVQAAFS -MYG_PROGU AAELAPLAQS HATKHKIPVK YLEFISEAII QVLQSKHPGD FGADAQGAMS -MYG_SAISC EAELKPLAQS HATKHKIPVK YLELISDAIV HVLQKKHPGD FGADAQGAMK - - -GLB2_MORMR KLMSMICILL SSAY...... . -GLBZ_CHITH ATFDAFFGAV FAK....... M -HBA2_BOSMU KFLANVSTVL TSKYR..... . -HBA2_GALCR KFMASVSTVL TSKYR..... . -HBA4_SALIR KFLQQLALAL AEKYR..... . -HBAD_CHLME KFLSAVAAVL AEKYR..... . -HBAD_PASMO KFMSAVASVL AEKYR..... . -HBAZ_HORSE KFLSIVSSVL TEKYR..... . -HBA_AILME KFFSAVSTVL TSKYR..... . -HBA_ANSSE KFLCAVATVL TAKYR..... . -HBA_COLLI KFVLAVGTVL TAKYR..... . -HBA_ERIEU KFLATVATVL TSKYR..... . -HBA_FRAPO KFLCAVGNVL TAKYR..... . -HBA_MACFA KFLASVSTVL TSKYR..... . -HBA_MACSI KFLASVSTVL TSKYR..... . -HBA_MESAU KFFASVSTVL TSKYR..... . -HBA_PAGLA KFFSAVSTVL TSKYR..... . -HBA_PHACO KFLCAVGTVL TAKYR..... . -HBA_PONPY KFLASVSTVL TSKYR..... . -HBA_PROLO KFFTSVSTVL TSKYR..... . -HBA_TRIOC KFLCAVGNVL SAKYR..... . -HBB1_VAREX KLVNVVSHSL ARRYH..... . -HBB2_TRICR KLMKGVSHAL GHGY...... . -HBB2_XENTR KLNATLVAAL SHGYF..... . -HBBL_RANCA KFIAVLVDAL SHSYH..... . -HBB_CALAR KVVAGVANAL AHKYH..... . -HBB_COLLI KLVRVVAHAL ARKYH..... . -HBB_EQUHE KVVAGVANAL AHKYH..... . -HBB_LARRI KLVRVVAHAL ARKYH..... . -HBB_MANSP KVVAGVANAL AHKYH..... . -HBB_ORNAN KLVSGVAHAL GHKYH..... . -HBB_RABIT KVVAGVANAL AHKYH..... . -HBB_SPECI KVVAGVANAL AHKYH..... . -HBB_SPETO KVVAGVANAL SHKYH..... . -HBB_SUNMU KVVAGVANAL AHKYH..... . -HBB_TACAC KLVSGVSHAL AHKYH..... . -HBB_TRIIN KVVAGVANAL AHKYH..... . -HBB_TUPGL KVVAGVANAL AHKYH..... . -HBB_URSMA KVVAGVANAL AHKYH..... . -HBE_PONPY KLVSAVAIAL AHKYH..... . -HBF1_URECA DAAGVLVAAM .......... K -LGB1_PEA VAYDGLATAI KKAMKT.... A -LGB1_VICFA VAYDGLATAI K....A.... A -MYG_ESCGI KALELFRKDI AAKYKELGFQ G -MYG_HORSE KALELFRNDI AAKYKELGFQ G -MYG_LYCPI KALELFRNDI AAKYKELGFQ G -MYG_MOUSE KALELFRNDI AAKYKELGFQ G -MYG_MUSAN GAFKIICSDI EKEYKAANFQ G -MYG_PROGU KALELFRNDI AAKYKELGFQ G -MYG_SAISC KALELFRNDM AAKYKELGFQ G - diff --git a/forester/archive/RIO/others/hmmer/squid/Formats/phylip b/forester/archive/RIO/others/hmmer/squid/Formats/phylip deleted file mode 100644 index 32646bd..0000000 --- a/forester/archive/RIO/others/hmmer/squid/Formats/phylip +++ /dev/null @@ -1,204 +0,0 @@ - 50 171 -GLB2_MORMR...PIVD..SGSVSPLSDAEKNKIRAAW.DIVYKNYEKNGVDILVKFFTG -GLBZ_CHITHMKFIILALCVAAASALSGDQIGLVQST.YGKVKG....DSVGILYAVFKA -HBA2_BOSMU...V...........LSAADKGNVKAAW.GKVGGHAAEYGAEALERMFLS -HBA2_GALCR...V...........LSPTDKSNVKAAW.EKVGAHAGDYGAEALERMFLS -HBA4_SALIR...S...........LSAKDKANVKAIW.GKILPKSDEIGEQALSRMLVV -HBAD_CHLME...M...........LTADDKKLLTQLW.EKVAGHQEEFGSEALQRMFLT -HBAD_PASMO...M...........LTAEDKKLIQQIW.GKLGGAEEEIGADALWRMFHS -HBAZ_HORSE...S...........LTKAERTMVVSIW.GKISMQADAVGTEALQRLFSS -HBA_AILME ...V...........LSPADKTNVKATW.DKIGGHAGEYGGEALERTFAS -HBA_ANSSE ...V...........LSAADKGNVKTVF.GKIGGHAEEYGAETLQRMFQT -HBA_COLLI ...V...........LSANDKSNVKAVF.AKIGGQAGDLGGEALERLFIT -HBA_ERIEU ...V...........LSATDKANVKTFW.GKLGGHGGEYGGEALDRMFQA -HBA_FRAPO ...V...........LSAADKNNVKGIF.GKISSHAEDYGAEALERMFIT -HBA_MACFA ...V...........LSPADKTNVKAAW.GKVGGHAGEYGAEALERMFLS -HBA_MACSI ...V...........LSPADKTNVKDAW.GKVGGHAGEYGAEALERMFLS -HBA_MESAU ...V...........LSAKDKTNISEAW.GKIGGHAGEYGAEALERMFFV -HBA_PAGLA ...V...........LSSADKNNIKATW.DKIGSHAGEYGAEALERTFIS -HBA_PHACO ...V...........LSAADKNNVKGIF.TKIAGHAEEYGAEALERMFIT -HBA_PONPY ...V...........LSPADKTNVKTAW.GKVGAHAGDYGAEALERMFLS -HBA_PROLO ...V...........LSPADKANIKATW.DKIGGHAGEYGGEALERTFAS -HBA_TRIOC ...V...........LSANDKTNVKTVF.TKITGHAEDYGAETLERMFIT -HBB1_VAREX...V..........HWTAEEKQLICSLW.GKI..DVGLIGGETLAGLLVI -HBB2_TRICR...V..........HLTAEDRKEIAAIL.GKV..NVDSLGGQCLARLIVV -HBB2_XENTR...V..........HWTAEEKATIASVW.GKV..DIEQDGHDALSRLLVV -HBBL_RANCA...V..........HWTAEEKAVINSVW.QKV..DVEQDGHEALTRLFIV -HBB_CALAR ...V..........HLTGEEKSAVTALW.GKV..NVDEVGGEALGRLLVV -HBB_COLLI ...V..........HWSAEEKQLITSIW.GKV..NVADCGAEALARLLIV -HBB_EQUHE ...V..........QLSGEEKAAVLALW.DKV..NEEEVGGEALGRLLVV -HBB_LARRI ...V..........HWSAEEKQLITGLW.GKV..NVADCGAEALARLLIV -HBB_MANSP ...V..........HLTPEEKTAVTTLW.GKV..NVDEVGGEALGRLLVV -HBB_ORNAN ...V..........HLSGGEKSAVTNLW.GKV..NINELGGEALGRLLVV -HBB_RABIT ...V..........HLSSEEKSAVTALW.GKV..NVEEVGGEALGRLLVV -HBB_SPECI ...V..........HLSDGEKNAISTAW.GKV..HAAEVGAEALGRLLVV -HBB_SPETO ...V..........HLTDGEKNAISTAW.GKV..NAAEIGAEALGRLLVV -HBB_SUNMU ...V..........HLSGEEKACVTGLW.GKV..NEDEVGAEALGRLLVV -HBB_TACAC ...V..........HLSGSEKTAVTNLW.GHV..NVNELGGEALGRLLVV -HBB_TRIIN ...V..........HLTPEEKALVIGLW.AKV..NVKEYGGEALGRLLVV -HBB_TUPGL ...V..........HLSGEEKAAVTGLW.GKV..DLEKVGGQSLGSLLIV -HBB_URSMA ...V..........HLTGEEKSLVTGLW.GKV..NVDEVGGEALGRLLVV -HBE_PONPY ...V..........HFTAEEKAAVTSLW.SKM..NVEEAGGEALGRLLVV -HBF1_URECA..............GLTTAQIKAIQDHWFLNIKGCLQAAADSIFFKYLTA -LGB1_PEA GFTDKQEALVNSSSE.FKQNLPGYSILFYTIVLEKAP..AAKGL...... -LGB1_VICFAGFTEKQEALVNSSSQLFKQNPSNYSVLFYTIILQKAP..TAKAM...... -MYG_ESCGI ...V...........LSDAEWQLVLNIW.AKVEADVAGHGQDILIRLFKG -MYG_HORSE ...G...........LSDGEWQQVLNVW.GKVEADIAGHGQEVLIRLFTG -MYG_LYCPI ...G...........LSDGEWQIVLNIW.GKVETDLAGHGQEVLIRLFKN -MYG_MOUSE ...G...........LSDGEWQLVLNVW.GKVEADLAGHGQEVLIGLFKT -MYG_MUSAN ..................VDWEKVNSVW.SAVESDLTAIGQNILLRLFEQ -MYG_PROGU ...G...........LSDGEWQLVLNVW.GKVEGDLSGHGQEVLIRLFKG -MYG_SAISC ...G...........LSDGEWQLVLNIW.GKVEADIPSHGQEVLISLFKG - -TPAAQAFFPKFKGLTTADALKKSSDVRWHAERIINAVNDAVKSMDDTEKM -DPTIQAAFPQFVGK.DLDAIKGGAEFSTHAGRIVGFLGGVIDDLP...NI -FPTTKTYFPHFD.LSH.....GSAQVKGHGAKVAAALTKAVGHLDD...L -FPTTKTYFPHFD.LSH.....GSTQVKGHGKKVADALTNAVLHVDD...M -YPQTKAYFSHWASVAP.....GSAPVKKHGITIMNQIDDCVGHMDD...L -YPQTKTYFPHFD.LHP.....GSEQVRGHGKKVAAALGNAVKSLDN...L -YPSTKTYFPHFD.LSQ.....GSDQIRGHGKKVVAALSNAIKNLDN...L -YPQTKTYFPHFD.LHE.....GSPQLRAHGSKVAAAVGDAVKSIDN...V -FPTTKTYFPHFD.LSP.....GSAQVKAHGKKVADALTTAVGHLDD...L -FPQTKTYFPHFD.LQP.....GSAQIKAHGKKVAAALVEAANHIDD...I -YPQTKTYFPHFD.LSH.....GSAQIKGHGKKVAEALVEAANHIDD...I -HPTTKTYFPHFD.LNP.....GSAQVKGHGKKVADALTTAVNNLDD...V -YPSTKTYFPHFD.LSH.....GSAQVKGHGKKVVAALIEAANHIDD...I -FPTTKTYFPHFD.LSH.....GSAQVKGHGKKVADALTLAVGHVDD...M -FPTTKTYFPHFD.LSH.....GSAQVKGHGKKVADALTLAVGHVDD...M -YPTTKTYFPHFD.VSH.....GSAQVKGHGKKVADALTNAVGHLDD...L -FPTTKTYFPHFD.LSH.....GSAQVKAHGKKVADALTLAVGHLED...L -YPSTKTYFPHFD.LSH.....GSAQIKGHGKKVVAALIEAVNHIDD...I -FPTTKTYFPHFD.LSH.....GSAQVKDHGKKVADALTNAVAHVDD...M -FPTTKTYFPHFD.LSP.....GSAQVKAHGKKVADALTLAVGHLDD...L -YPPTKTYFPHFD.LHH.....GSAQIKAHGKKVVGALIEAVNHIDD...I -YPWTQRQFSHFGNLSSPTAIAGNPRVKAHGKKVLTSFGDAIKNLDN...I -NPWSRRYFHDFGDLSSCDAICRNPKVLAHGAKVMRSIVEATKHLDN...L -YPWTQRYFSSFGNLSNVSAVSGNVKVKAHGNKVLSAVGSAIQHLDD...V -YPWTQRYFSTFGDLSSPAAIAGNPKVHAHGKKILGAIDNAIHNLDD...V -YPWTQRFFESFGDLSTPDAVMNNPKVKAHGKKVLGAFSDGLTHLDN...L -YPWTQRFFSSFGNLSSATAISGNPNVKAHGKKVLTSFGDAVKNLDN...I -YPWTQRFFDSFGDLSNPAAVMGNPKVKAHGKKVLHSFGEGVHHLDN...L -YPWTQRFFASFGNLSSPTAINGNPMVRAHGKKVLTSFGEAVKNLDN...I -YPWTQRFFDSFGDLSSPDAVMGNPKVKAHGKKVLGAFSDGLNHLDN...L -YPWTQRFFEAFGDLSSAGAVMGNPKVKAHGAKVLTSFGDALKNLDD...L -YPWTQRFFESFGDLSSANAVMNNPKVKAHGKKVLAAFSEGLSHLDN...L -YPWTQRFFDSFGDLSSASAVMGNAKVKAHGKKVIDSFSNGLKHLDN...L -YPWTQRFFDSFGDLSSASAVMGNAKVKAHGKKVIDSFSNGLKHLDN...L -YPWTQRFFDSFGDLSSASAVMGNPKVKAHGKKVLHSLGEGVANLDN...L -YPWTQRFFESFGDLSSADAVMGNAKVKAHGAKVLTSFGDALKNLDN...L -YPWTQRFFEHFGDLSSASAIMNNPKVKAHGEKVFTSFGDGLKHLED...L -YPWTQRFFDSFGDLSSPSAVMSNPKVKAHGKKVLTSFSDGLNHLDN...L -YPWTQRFFDSFGDLSSADAIMNNPKVKAHGKKVLNSFSDGLKNLDN...L -YPWTQRFFDSFGNLSSPSAILGNPKVKAHGKKVLTSFGDAIKNMDN...L -YPGDLAFFHKFSSV.PLYGLRSNPAYKAQTLTVINYLDKVVDALGG..NA -..........FSFLKDTAGVEDSPKLQAHAEQVFGLVRDSAAQLRTKGEV -..........FSFLKDSAGVVDSPKLGAHAEKVFGMVRDSAVQLRATGEV -HPETLEKFDKFKHLKTEAEMKASEDLKKHGNTVLTALGGILKKKGH...H -HPETLEKFDKFKHLKTEAEMKASEDLKKHGTVVLTALGGILKKKGH...H -HPETLDKFDKFKHLKTEDEMKGSEDLKKHGNTVLTALGGILKKKGH...H -HPETLDKFDKFKNLKSEEDMKGSEDLKKHGCTVLTALGTILKKKGQ...H -YPESQNHFPKFKN.KSLGELKDTADIKAQADTVLSALGNIVKKKGS...H -HPETLEKFDKFKHLKAEDEMRASEELKKHGTTVLTALGGILKKKGQ...H -HPETLEKFDKFKHLKSEDEMKASEELKKHGTTVLTALGGILKKKGQ...H - -SMKLQELSVKHAQSFYVDRQYFKVLAGII.........ADTTAPGDAGFE -GKHVDALVATH.KPRGVTHAQFNNFRAAFIAYLKGHV..DYTAAVEAAWG -PGALSELSDLHAHKLRVDPVNFKLLSHSLLVTLASHLPSDFTPAVHASLD -PSALSALSDLHAHKLRVDPVNFKLLRHCLLVTLACHHPAEFTPAVHASLD -FGFLTKLSELHATKLRVDPTNFKILAHNLIVVIAAYFPAEFTPEIHLSVD -SQALSELSNLHAYNLRVDPANFKLLAQCFQVVLATHLGKDYSPEMHAAFD -SQALSELSNLHAYNLRVDPVNFKFLSQCLQVSLATRLGKEYSPEVHSAVD -AGALAKLSELHAYILRVDPVNFKFLSHCLLVTLASRLPADFTADAHAAWD -PGALSALSDLHAHKLRVDPVNFKLLSHCLLVTLASHHPAEFTPAVHASLD -AGALSKLSDLHAQKLRVDPVNFKFLGHCFLVVLAIHHPSLLTPEVHASMD -AGALSKLSDLHAQKLRVDPVNFKLLGHCFLVVVAVHFPSLLTPEVHASLD -PGALSALSDLHAHKLRVDPVNFKLLSHCLLVTLALHHPADFTPAVHASLD -AGTLSKLSDLHAHKLRVDPVNFKLLGQCFLVVVAIHHPSALTPEVHASLD -PQALSALSDLHAHKLRVDPVNFKLLSHCLLVTLAAHLPAEFTPAVHASLD -PQALSALSDLHAHKLRVDPVNFKLLSHCLLVTLAAHLPAEFTPAVHASLD -PGALSALSDLHAHKLRVDPVNFKLLSHCLLVTLANHHPADFTPAVHASLD -PNALSALSDLHAYKLRVDPVNFKLLSHCLLVTLACHHPAEFTPAVHSALD -TGTLSKLSDLHAHKLRVDPVNFKLLGQCFLVVVAIHHPSALTPEVHASLD -PNALSALSDLHAHKLRVDPVNFKLLSHCLLVTLAAHLPAEFTPAVHASLD -PGALSALSDLHAYKLRVDPVNFKLLSHCLLVTLACHHPAEFTPAVHASLD -AGALSKLSDLHAQKLRVDPVNFKLLGQCFLVVVAIHHPSVLTPEVHASLD -KDTFAKLSELHCDKLHVDPTNFKLLGNVLVIVLADHHGKEFTPAHHAAYQ -REYYADLSVTHSLKFYVDPENFKLFSGIVIVCLALTLQTDFSCHKQLAFE -KSHLKGLSKSHAEDLHVDPENFKRLADVLVIVLAAKLGSAFTPQVQAVWE -KGTLHDLSEEHANELHVDPENFRRLGEVLIVVLGAKLGKAFSPQVQHVWE -KGTFAHLSELHCDKLHVDPENFRLLGNVLVCVLAHHFGKEFTPVVQAAYQ -KGTFAQLSELHCDKLHVDPENFRLLGDILVIILAAHFGKDFTPECQAAWQ -KGTFAQLSELHCDKLHVDPENFRLLGNVLVVVLARHFGKDFTPELQASYQ -KNTFAQLSELHCDKLHVDPENFRLLGDILIIVLAAHFAKDFTPDSQAAWQ -KGTFAQLSELHCDKLHVDPENFKLLGNVLVCVLAHHFGKEFTPQVQAAYQ -KGTFAKLSELHCDKLHVDPENFNRLGNVLIVVLARHFSKDFSPEVQAAWQ -KGTFAKLSELHCDKLHVDPENFRLLGNVLVIVLSHHFGKEFTPQVQAAYQ -KGTFASLSELHCDKLHVDPENFKLLGNMIVIVMAHHLGKDFTPEAQAAFQ -KGTFASLSELHCDKLHVDPENFKLLGNMIVIVMAHHLGKDFTPEAQAAFQ -KGTFAKLSELHCDKLHVDPENFRLLGNVLVVVLASKFGKEFTPPVQAAFQ -KGTFAKLSELHCDKLHVDPENFNRLGNVLVVVLARHFSKEFTPEAQAAWQ -KGAFAELSELHCDKLHVDPENFRLLGNVLVCVLARHFGKEFSPEAQAAYQ -KGTFAKLSELHCDKLHVDPENFRLLGNVLVRVLACNFGPEFTPQVQAAFQ -KGTFAKLSELHCDKLHVDPENFKLLGNVLVCVLAHHFGKEFTPQVQAAYQ -KTTFAKLSELHCDKLHVDPENFKLLGNVMVIILATHFGKEFTPEVQAAWQ -GALMKAKVPSH.DAMGITPKHFGQLLKLVGGVFQEEF..SADPTTVAAWG -VLGNATLGAIHVQKGVTNP.HFVVVKEALLQTIKKASGNNWSEELNTAWE -VLDGKD.GSIHIQKGVLDP.HFVVVKEALLKTIKEASGDKWSEELSAAWE -EAELKPLAQSHATKHKIPIKYLEFISDAIIHVLHSRHPGDFGADAQAAMN -EAELKPLAQSHATKHKIPIKYLEFISDAIIHVLHSKHPGNFGADAQGAMT -EAELKPLAQSHATKHKIPVKYLEFISDAIIQVLQNKHSGDFHADTEAAMK -AAEIQPLAQSHATKHKIPVKYLEFISEIIIEVLKKRHSGDFGADAQGAMS -SQPVKALAATHITTHKIPPHYFTKITTIAVDVLSEMYPSEMNAQVQAAFS -AAELAPLAQSHATKHKIPVKYLEFISEAIIQVLQSKHPGDFGADAQGAMS -EAELKPLAQSHATKHKIPVKYLELISDAIVHVLQKKHPGDFGADAQGAMK - -KLMSMICILLSSAY....... -ATFDAFFGAVFAK.......M -KFLANVSTVLTSKYR...... -KFMASVSTVLTSKYR...... -KFLQQLALALAEKYR...... -KFLSAVAAVLAEKYR...... -KFMSAVASVLAEKYR...... -KFLSIVSSVLTEKYR...... -KFFSAVSTVLTSKYR...... -KFLCAVATVLTAKYR...... -KFVLAVGTVLTAKYR...... -KFLATVATVLTSKYR...... -KFLCAVGNVLTAKYR...... -KFLASVSTVLTSKYR...... -KFLASVSTVLTSKYR...... -KFFASVSTVLTSKYR...... -KFFSAVSTVLTSKYR...... -KFLCAVGTVLTAKYR...... -KFLASVSTVLTSKYR...... -KFFTSVSTVLTSKYR...... -KFLCAVGNVLSAKYR...... -KLVNVVSHSLARRYH...... -KLMKGVSHALGHGY....... -KLNATLVAALSHGYF...... -KFIAVLVDALSHSYH...... -KVVAGVANALAHKYH...... -KLVRVVAHALARKYH...... -KVVAGVANALAHKYH...... -KLVRVVAHALARKYH...... -KVVAGVANALAHKYH...... -KLVSGVAHALGHKYH...... -KVVAGVANALAHKYH...... -KVVAGVANALAHKYH...... -KVVAGVANALSHKYH...... -KVVAGVANALAHKYH...... -KLVSGVSHALAHKYH...... -KVVAGVANALAHKYH...... -KVVAGVANALAHKYH...... -KVVAGVANALAHKYH...... -KLVSAVAIALAHKYH...... -DAAGVLVAAM..........K -VAYDGLATAIKKAMKT....A -VAYDGLATAIK....A....A -KALELFRKDIAAKYKELGFQG -KALELFRNDIAAKYKELGFQG -KALELFRNDIAAKYKELGFQG -KALELFRNDIAAKYKELGFQG -GAFKIICSDIEKEYKAANFQG -KALELFRNDIAAKYKELGFQG -KALELFRNDMAAKYKELGFQG diff --git a/forester/archive/RIO/others/hmmer/squid/Formats/pir b/forester/archive/RIO/others/hmmer/squid/Formats/pir deleted file mode 100644 index 0999703..0000000 --- a/forester/archive/RIO/others/hmmer/squid/Formats/pir +++ /dev/null @@ -1,249 +0,0 @@ - - P R O T E I N S E Q U E N C E D A T A B A S E - of PIR-International - - Section 1. Fully Classified Entries - Release 63.00, December 30, 1999 - 20032 sequences, 7820966 residues - - Protein Information Resource (PIR)* - National Biomedical Research Foundation - 3900 Reservoir Road, N.W., - Washington, DC 20007, USA - -International Protein Information Munich Information Center for - Database in Japan (JIPID) Protein Sequences (MIPS) - Science University of Tokyo GSF-Forschungszentrum f. Umwelt und Gesundheit - 2669 Yamazaki, Noda 278, Japan am Max-Planck-Instut f. Biochemie - Am Klopferspitz 18, D-82152 Martinsried, FRG - - This database may be redistributed without prior consent, provided that - this notice be given to each user and that the words "Derived from" shall - precede this notice if the database has been altered by the redistributor. - - *PIR is a registered mark of NBRF. - - -\\\ -ENTRY CCHU #type complete -TITLE cytochrome c - human -ORGANISM #formal_name Homo sapiens #common_name man -DATE 24-Apr-1984 #sequence_revision 30-Sep-1991 #text_change - 28-Jun-1999 -ACCESSIONS A31764; A05676; I55192; A00001 -REFERENCE A31764 - #authors Evans, M.J.; Scarpulla, R.C. - #journal Proc. Natl. Acad. Sci. U.S.A. (1988) 85:9625-9629 - #title The human somatic cytochrome c gene: two classes of processed - pseudogenes demarcate a period of rapid molecular - evolution. - #cross-references MUID:89071748 - #accession A31764 - ##molecule_type DNA - ##residues 1-105 ##label EVA - ##cross-references GB:M22877; NID:g181241; PIDN:AAA35732.1; PID:g181242 -REFERENCE A05676 - #authors Matsubara, H.; Smith, E.L. - #journal J. Biol. Chem. (1963) 238:2732-2753 - #title Human heart cytochrome c. Chymotryptic peptides, tryptic - peptides, and the complete amino acid sequence. - #accession A05676 - ##molecule_type protein - ##residues 2-28;29-46;47-100;101-105 ##label MATS -REFERENCE A00001 - #authors Matsubara, H.; Smith, E.L. - #journal J. Biol. Chem. (1962) 237:3575-3576 - #title The amino acid sequence of human heart cytochrome c. - #contents annotation - #note 66-Leu is found in 10% of the molecules in pooled protein -REFERENCE I55192 - #authors Tanaka, Y.; Ashikari, T.; Shibano, Y.; Amachi, T.; Yoshizumi, - H.; Matsubara, H. - #journal J. Biochem. (1988) 103:954-961 - #title Construction of a human cytochrome c gene and its functional - expression in Saccharomyces cerevisiae. - #cross-references MUID:89008207 - #accession I55192 - ##status translated from GB/EMBL/DDBJ - ##molecule_type mRNA - ##residues 78-105 ##label RES - ##cross-references GB:D00265; NID:g2897691; PIDN:BAA00187.1; - PID:d1000635; PID:g219557 -GENETICS - #introns 57/1 -CLASSIFICATION #superfamily cytochrome c; cytochrome c homology -KEYWORDS acetylated amino end; chromoprotein; electron transfer; heme; - iron; mitochondrion; oxidative phosphorylation; - polymorphism; respiratory chain -FEATURE - 2-105 #product cytochrome c #status experimental #label MAT\ - 5-99 #domain cytochrome c homology #label CYC\ - 2 #modified_site acetylated amino end (Gly) (in mature - form) #status experimental\ - 15,18 #binding_site heme (Cys) (covalent) #status - experimental\ - 19,81 #binding_site heme iron (His, Met) (axial ligands) - #status predicted -SUMMARY #length 105 #molecular-weight 11749 #checksum 3247 -SEQUENCE - 5 10 15 20 25 30 - 1 M G D V E K G K K I F I M K C S Q C H T V E K G G K H K T G - 31 P N L H G L F G R K T G Q A P G Y S Y T A A N K N K G I I W - 61 G E D T L M E Y L E N P K K Y I P G T K M I F V G I K K K E - 91 E R A D L I A Y L K K A T N E -/// -ENTRY CCCZ #type complete -TITLE cytochrome c - chimpanzee (tentative sequence) -ORGANISM #formal_name Pan troglodytes #common_name chimpanzee -DATE 17-Mar-1987 #sequence_revision 17-Mar-1987 #text_change - 25-Apr-1997 -ACCESSIONS A00002 -REFERENCE A94601 - #authors Needleman, S.B. - #submission submitted to the Atlas, October 1968 - #accession A00002 - ##molecule_type protein - ##residues 1-104 ##label NEE -REFERENCE A94455 - #authors Needleman, S.B.; Margoliash, E. - #citation unpublished results, 1966, cited by Margoliash, E., and - Fitch, W.M., Ann. N.Y. Acad. Sci. 151, 359-381, 1968 - #contents annotation; compositions of chymotryptic peptides -CLASSIFICATION #superfamily cytochrome c; cytochrome c homology -KEYWORDS acetylated amino end; chromoprotein; electron transfer; heme; - iron; mitochondrion; oxidative phosphorylation; respiratory - chain -FEATURE - 4-98 #domain cytochrome c homology #label CYC\ - 1 #modified_site acetylated amino end (Gly) #status - predicted\ - 14,17 #binding_site heme (Cys) (covalent) #status predicted\ - 18,80 #binding_site heme iron (His, Met) (axial ligands) - #status predicted -SUMMARY #length 104 #molecular-weight 11617 #checksum 9501 -SEQUENCE - 5 10 15 20 25 30 - 1 G D V E K G K K I F I M K C S Q C H T V E K G G K H K T G P - 31 N L H G L F G R K T G Q A P G Y S Y T A A N K N K G I I W G - 61 E D T L M E Y L E N P K K Y I P G T K M I F V G I K K K E E - 91 R A D L I A Y L K K A T N E -/// -ENTRY CCMQR #type complete -TITLE cytochrome c - rhesus macaque (tentative sequence) -ORGANISM #formal_name Macaca mulatta #common_name rhesus macaque -DATE 17-Mar-1987 #sequence_revision 17-Mar-1987 #text_change - 25-Apr-1997 -ACCESSIONS A00003 -REFERENCE A00003 - #authors Rothfus, J.A.; Smith, E.L. - #journal J. Biol. Chem. (1965) 240:4277-4283 - #title Amino acid sequence of rhesus monkey heart cytochrome c. - #cross-references MUID:66045191 - #contents compositions of chymotryptic peptides; sequences of residues - 55-61 and 68-70 - #accession A00003 - ##molecule_type protein - ##residues 1-104 ##label ROT -CLASSIFICATION #superfamily cytochrome c; cytochrome c homology -KEYWORDS acetylated amino end; chromoprotein; electron transfer; heme; - iron; mitochondrion; oxidative phosphorylation; respiratory - chain -FEATURE - 4-98 #domain cytochrome c homology #label CYC\ - 1 #modified_site acetylated amino end (Gly) #status - experimental\ - 14,17 #binding_site heme (Cys) (covalent) #status predicted\ - 18,80 #binding_site heme iron (His, Met) (axial ligands) - #status predicted -SUMMARY #length 104 #molecular-weight 11605 #checksum 9512 -SEQUENCE - 5 10 15 20 25 30 - 1 G D V E K G K K I F I M K C S Q C H T V E K G G K H K T G P - 31 N L H G L F G R K T G Q A P G Y S Y T A A N K N K G I T W G - 61 E D T L M E Y L E N P K K Y I P G T K M I F V G I K K K E E - 91 R A D L I A Y L K K A T N E -/// -ENTRY CCMKP #type complete -TITLE cytochrome c - spider monkey -ORGANISM #formal_name Ateles sp. #common_name spider monkey -DATE 17-Dec-1982 #sequence_revision 17-Dec-1982 #text_change - 25-Apr-1997 -ACCESSIONS A00004 -REFERENCE A00004 - #authors Margoliash, E. - #citation unpublished results, cited by Shelnutt, J.A., Rousseau, D.L., - Dethmers, J.K., and Margoliash, E., Biochemistry 20, - 6485-6497, 1981 - #accession A00004 - ##molecule_type protein - ##residues 1-104 ##label MAR -CLASSIFICATION #superfamily cytochrome c; cytochrome c homology -KEYWORDS acetylated amino end; chromoprotein; electron transfer; heme; - iron; mitochondrion; oxidative phosphorylation; respiratory - chain -FEATURE - 4-98 #domain cytochrome c homology #label CYC\ - 1 #modified_site acetylated amino end (Gly) #status - predicted\ - 14,17 #binding_site heme (Cys) (covalent) #status predicted\ - 18,80 #binding_site heme iron (His, Met) (axial ligands) - #status predicted -SUMMARY #length 104 #molecular-weight 11710 #checksum 9066 -SEQUENCE - 5 10 15 20 25 30 - 1 G D V F K G K R I F I M K C S Q C H T V E K G G K H K T G P - 31 N L H G L F G R K T G Q A S G F T Y T E A N K N K G I I W G - 61 E D T L M E Y L E N P K K Y I P G T K M I F V G I K K K E E - 91 R A D L I A Y L K K A T N E -/// -ENTRY CCMS #type complete -TITLE cytochrome c - mouse -ORGANISM #formal_name Mus musculus #common_name house mouse -DATE 31-Dec-1990 #sequence_revision 30-Sep-1991 #text_change - 11-Jun-1999 -ACCESSIONS A23057; A04604; A00009 -REFERENCE A23057 - #authors Limbach, K.J.; Wu, R. - #journal Nucleic Acids Res. (1985) 13:617-630 - #title Characterization of a mouse somatic cytochrome c gene and - three cytochrome c pseudogenes. - #cross-references MUID:85215501 - #accession A23057 - ##molecule_type DNA - ##residues 1-105 ##label LIM - ##cross-references EMBL:X01756; NID:g50618; PIDN:CAA25899.1; PID:g50619 - ##experimental_source strain BALB/c -REFERENCE A04604 - #authors Carlson, S.S.; Mross, G.A.; Wilson, A.C.; Mead, R.T.; Wolin, - L.D.; Bowers, S.F.; Foley, N.T.; Muijsers, A.O.; - Margoliash, E. - #journal Biochemistry (1977) 16:1437-1442 - #title Primary structure of mouse, rat, and guinea pig cytochrome c. - #cross-references MUID:77134768 - #accession A04604 - ##molecule_type protein - ##residues 2-105 ##label CAR - ##experimental_source strain BALB/c -GENETICS - #introns 57/1 -CLASSIFICATION #superfamily cytochrome c; cytochrome c homology -KEYWORDS acetylated amino end; chromoprotein; electron transfer; heme; - iron; mitochondrion; oxidative phosphorylation; respiratory - chain -FEATURE - 2-105 #product cytochrome c #status experimental #label MAT\ - 5-99 #domain cytochrome c homology #label CYC\ - 2 #modified_site acetylated amino end (Gly) (in mature - form) #status experimental\ - 15,18 #binding_site heme (Cys) (covalent) #status - experimental\ - 19,81 #binding_site heme iron (His, Met) (axial ligands) - #status predicted -SUMMARY #length 105 #molecular-weight 11605 #checksum 1273 -SEQUENCE - 5 10 15 20 25 30 - 1 M G D V E K G K K I F V Q K C A Q C H T V E K G G K H K T G - 31 P N L H G L F G R K T G Q A A G F S Y T D A N K N K G I T W - 61 G E D T L M E Y L E N P K K Y I P G T K M I F A G I K K K G - 91 E R A D L I A Y L K K A T N E -/// diff --git a/forester/archive/RIO/others/hmmer/squid/Formats/regression.dat b/forester/archive/RIO/others/hmmer/squid/Formats/regression.dat deleted file mode 100644 index 5743930..0000000 --- a/forester/archive/RIO/others/hmmer/squid/Formats/regression.dat +++ /dev/null @@ -1,20 +0,0 @@ -# filename format type nseq nres short long autodetect? alignment? singleseq? -# -------- -------- ------- ---- ---- ----- ---- ---------- ---------- ---------- -fasta fasta Protein 3 1730 356 949 yes no no -genbank genbank DNA 6 3773 132 1771 yes no no -embl embl DNA 2 1868 233 1635 yes no no -swissprot embl Protein 6 2779 75 924 yes no no -gcg gcg DNA 1 9718 9718 9718 yes no yes -gcgdata.1 gcgdata Protein 20 7100 29 1299 yes no no -gcgdata.2 gcgdata DNA 7 5867 105 2886 yes no no -pir pir Protein 5 522 104 105 yes no no -stockholm.1 stockholm Protein 7 266 38 38 yes yes no -stockholm.2 stockholm Protein 16 3769 227 239 yes yes no -msf msf Protein 50 7251 141 153 yes yes no -clustal clustal DNA 20 1518 75 80 yes yes no -selex.1 selex RNA 11 201 11 29 yes yes no -selex.2 selex RNA 11 201 11 29 yes yes no -phylip phylip Protein 50 7251 141 153 yes yes no -a2m a2m Protein 50 7251 141 153 no yes no - - diff --git a/forester/archive/RIO/others/hmmer/squid/Formats/selex.1 b/forester/archive/RIO/others/hmmer/squid/Formats/selex.1 deleted file mode 100644 index bf2f905..0000000 --- a/forester/archive/RIO/others/hmmer/squid/Formats/selex.1 +++ /dev/null @@ -1,37 +0,0 @@ -#=ID r17 -#=AC PF99999 -#=DE Test of extended SELEX format -#=AU SRE, Tue Dec 1 19:17:48 1998 -#=GA -1.0 -2.0 -#=TC -3.0 -4.0 -#=NC -5.0 -6.0 - -# Derived from r17.slx -#tag name weight source src acc # from,to,olen description -#=SQ 28 1.0000 IDENT: ACCESSION 1..29::29 Sequence one -#=SQ longname 102.0000 SWISS NUMBER; 3..19::100 A long name -#=SQ 2 2.0000 OR OR 0..0::0 two -#=SQ 3 3.0000 PIR - 0..0::0 three -#=SQ 4 4.0000 OR FOR 0..0::0 - means no data -#=SQ 5 5.0000 WHATEVER MISSING 0..0::0 - -#=SQ 6 6.0000 - DATA 0..0::0 foo -#=SQ 7 7 - - 0..0::0 bar -#=SQ 8 8.0000 - - 0..0::0 baz -#=SQ 9 9.0000 - - 0..0::0 a description of several words. -#=SQ 10 10.0000 - - 0..0::0 - - -#=RF xxxxxxx xxxx xxxxxx -#=CS >>>>+>> ^^^^ <<<<<< -28 gGAGUAAGAUAGC AUCA GCAUCUUGUUCC -#=SS +++++>>>>>+>> ^^^^ <<<<<<<+++++ -longname GUUCACC AUCA GGGGAc -#=SS >>>>+>> ^^^^ <<<<<< -2 AUGGAUGCGCACC AUCA GGGCGUaucuau -3 GAUCACC AUCA GGGauc -4 GGUCACC AUCA GGGauc -5 GGACACC AUCA GGGucu -6 CACC AUCA GGG -7 GAUCACC AUCA GGGauc -8 CUCACC AUCA GGGGG -9 AUGCACC AUCA GGGCAU -10 CUCACC AUCA GGGGG diff --git a/forester/archive/RIO/others/hmmer/squid/Formats/selex.2 b/forester/archive/RIO/others/hmmer/squid/Formats/selex.2 deleted file mode 100644 index 1f8f6c7..0000000 --- a/forester/archive/RIO/others/hmmer/squid/Formats/selex.2 +++ /dev/null @@ -1,11 +0,0 @@ -28 gGAGUAAGAUAGC AUCA GCAUCUUGUUCC -longname GUUCACC AUCA GGGGAc -2 AUGGAUGCGCACC AUCA GGGCGUaucuau -3 GAUCACC AUCA GGGauc -4 GGUCACC AUCA GGGauc -5 GGACACC AUCA GGGucu -6 CACC AUCA GGG -7 GAUCACC AUCA GGGauc -8 CUCACC AUCA GGGGG -9 AUGCACC AUCA GGGCAU -10 CUCACC AUCA GGGGG diff --git a/forester/archive/RIO/others/hmmer/squid/Formats/stockholm.1 b/forester/archive/RIO/others/hmmer/squid/Formats/stockholm.1 deleted file mode 100644 index c557d53..0000000 --- a/forester/archive/RIO/others/hmmer/squid/Formats/stockholm.1 +++ /dev/null @@ -1,94 +0,0 @@ -# STOCKHOLM 1.0 -# -# This is an example of a Stockholm multiple sequence alignment -# file. It is deliberately designed to exercise many of the -# features of Stockholm format, in order to test a parser. -# -#=GF ID 14-3-3 -#=GF AC PF00244 -#=GF DE 14-3-3 proteins -#=GF AU Finn RD -#=GF AL Clustalw -#=GF SE Prosite -#=GF GA 25 25 -#=GF TC 35.40 35.40 -#=GF NC 8.80 8.80 -#=GF BM hmmbuild -f HMM SEED -#=GF BM hmmcalibrate --seed 0 HMM -#=GF RN [1] -#=GF RM 95327195 -#=GF RT Structure of a 14-3-3 protein and implications for -#=GF RT coordination of multiple signalling pathways. -#=GF RA Xiao B, Smerdon SJ, Jones DH, Dodson GG, Soneji Y, Aitken -#=GF RA A, Gamblin SJ; -#=GF RL Nature 1995;376:188-191. -#=GF RN [2] -#=GF RM 95327196 -#=GF RT Crystal structure of the zeta isoform of the 14-3-3 -#=GF RT protein. -#=GF RA Liu D, Bienkowska J, Petosa C, Collier RJ, Fu H, Liddington -#=GF RA R; -#=GF RL Nature 1995;376:191-194. -#=GF DR PROSITE; PDOC00633; -#=GF DR SMART; 14_3_3; -#=GF DR PRINTS; PR00305; -#=GF SQ 119 - -#=GS 1431_ENTHI/4-239 WT 0.42 -#=GS seq1 WT 0.40 -#=GS seq2 WT 0.41 -#=GS seq3 WT 0.43 -#=GS seq4 WT 0.44 -#=GS seq5 WT 0.45 -#=GS seq6 WT 0.46 - -#=GS seq4 AC PF00001 -#=GS seq4 DE A description of seq4. - -#=GS seq1 NEWTAG foo -#=GS seq2 NEWTAG bar -#=GS seq3 NEWTAG baz - -#=GS seq3 TAG2 foo2 -#=GS seq4 TAG2 foo3 -#=GS seq5 TAG2 foo4 - -#=GC SS_cons xxxxxxxxxxxxxxxxxxx -#=GC SA_cons xxxxxxxxxxxxxxxxxxx -#=GC New_long_tag_thingie xxxxxxxxxxxxxxxxxxx -1431_ENTHI/4-239 ACDEFGHKLMNPQRSTVWY -#=GR seq1 SS ................... -#=GR seq1 SA 0000000000000000000 -seq1 ACDEFGHKLMNPQRSTVWY -seq2 ACDEFGHKLMNPQRSTVWY -seq3 ACDEFGHKLMNPQRSTVWY -seq4 ACDEFGHKLMNPQRSTVWY -seq5 ACDEFGHKLMNPQRSTVWY -seq6 ACDEFGHKLMNPQRSTVWY -#=GR seq6 SS ................... -#=GR seq6 SA 9999999999999999999 -#=GR seq6 Invented_tag ******************* - - -#=GC SS_cons xxxxxxxxxxxxxxxxxxx -#=GC SA_cons xxxxxxxxxxxxxxxxxxx -#=GC New_long_tag_thingie xxxxxxxxxxxxxxxxxxx -1431_ENTHI/4-239 ACDEFGHKLMNPQRSTVWY -#=GR seq1 SS ................... -#=GR seq1 SA 0000000000000000000 -seq1 ACDEFGHKLMNPQRSTVWY -seq2 ACDEFGHKLMNPQRSTVWY -seq3 ACDEFGHKLMNPQRSTVWY -seq4 ACDEFGHKLMNPQRSTVWY -seq5 ACDEFGHKLMNPQRSTVWY -seq6 ACDEFGHKLMNPQRSTVWY -#=GR seq6 SS ................... -#=GR seq6 SA 9999999999999999999 -#=GR seq6 Invented_tag ******************* - -# -# And here's some trailing comments, just to -# try to confuse a parser. -# - -// \ No newline at end of file diff --git a/forester/archive/RIO/others/hmmer/squid/Formats/stockholm.2 b/forester/archive/RIO/others/hmmer/squid/Formats/stockholm.2 deleted file mode 100644 index ba224d4..0000000 --- a/forester/archive/RIO/others/hmmer/squid/Formats/stockholm.2 +++ /dev/null @@ -1,366 +0,0 @@ -# STOCKHOLM 1.0 -#=GF ID 14-3-3 -#=GF AC PF00244 -#=GF DE 14-3-3 proteins -#=GF AU Finn RD -#=GF AL Clustalw -#=GF SE Prosite -#=GF GA 25 25 -#=GF TC 35.40 35.40 -#=GF NC 8.80 8.80 -#=GF BM hmmbuild -f HMM SEED -#=GF BM hmmcalibrate --seed 0 HMM -#=GF RN [1] -#=GF RM 95327195 -#=GF RT Structure of a 14-3-3 protein and implications for -#=GF RT coordination of multiple signalling pathways. -#=GF RA Xiao B, Smerdon SJ, Jones DH, Dodson GG, Soneji Y, Aitken A, -#=GF RA Gamblin SJ; -#=GF RL Nature 1995;376:188-191. -#=GF RN [2] -#=GF RM 95327196 -#=GF RT Crystal structure of the zeta isoform of the 14-3-3 protein. -#=GF RA Liu D, Bienkowska J, Petosa C, Collier RJ, Fu H, Liddington -#=GF RA R; -#=GF RL Nature 1995;376:191-194. -#=GF RN [3] -#=GF RM 96182649 -#=GF RT Interaction of 14-3-3 with signaling proteins is mediated by -#=GF RT the recognition of phosphoserine. -#=GF RA Muslin AJ, Tanner JW, Allen PM, Shaw AS; -#=GF RL Cell 1996;84:889-897. -#=GF RN [4] -#=GF RM 97424374 -#=GF RT The 14-3-3 protein binds its target proteins with a common -#=GF RT site located towards the C-terminus. -#=GF RA Ichimura T, Ito M, Itagaki C, Takahashi M, Horigome T, Omata -#=GF RA S, Ohno S, Isobe T -#=GF RL FEBS Lett 1997;413:273-276. -#=GF RN [5] -#=GF RM 96394689 -#=GF RT Molecular evolution of the 14-3-3 protein family. -#=GF RA Wang W, Shakes DC -#=GF RL J Mol Evol 1996;43:384-398. -#=GF RN [6] -#=GF RM 96300316 -#=GF RT Function of 14-3-3 proteins. -#=GF RA Jin DY, Lyu MS, Kozak CA, Jeang KT -#=GF RL Nature 1996;382:308-308. -#=GF DR PROSITE; PDOC00633; -#=GF DR SMART; 14_3_3; -#=GF DR PRINTS; PR00305; -#=GF SQ 16 -1431_ENTHI/4-239 REDCVYTAKLAEQSERYDEMVQCMKQVAEMEA...ELSIEERNLLSVAYKNVIGAKRASWRIISSLEQKEQAKG.NDKHVEIIKGYRAKIEKELSTCCDDVLKVIQENLLPKA..STSESKVFFKKMEGDYYRYFAEFTVDEKRKEVADKSLAAYTEATEISNAELAPTHPIRLGLALNFSVFYFEIMNDADKACQLAKQAFDDAIAKLDEVPENMYKDSTLIMQLLRDNLTLWTSDACDEE -#=GS 1431_ENTHI/4-239 AC P42648 -1432_ENTHI/4-238 REDLVYLSKLAEQSERYEEMVQYMKQVAEMGT...ELSVEERNLISVAYKNVVGSRRASWRIISSLEQKEQAKG.NTQRVELIKTYRAKIEQELSQKCDDVLKIITEFLLKNS..TSIESKVFFKKMEGDYYRYYAEFTVDEKRKEVADKSLAAYQEATDTA.ASLVPTHPIRLGLALNFSVFYYQIMNDADKACQLAKEAFDEAIQKLDEVPEESYKESTLIMQLLRDNLTLWTSDMGDDE -#=GS 1432_ENTHI/4-238 AC P42649 -1433_CAEEL/5-237 VEELVQRAKLAEQAERYDDMAAAMKKVTEQGQ...ELSNEERNLLSVAYKNVVGARRSSWRVISSIEQKTEG...SEKKQQLAKEYRVKVEQELNDICQDVLKLLDEFLIVKA..GAAESKAFYLKMKGDYYRYLAEVAS.EDRAAVVEKSQKAYQEALDIAKDKMQPTHPIRLGLALNFSVFYYEILNTPEHACQLAKQAFDDAIAELDTLNEDSYKDSTLIMQLLRDNLTLWTSDVGAED -#=GS 1433_CAEEL/5-237 AC P41932 -1433_LYCES/9-246 REENVYMAKLADRAESDEEMVEFMEKVSNSLGS.EELTVEERNLLSVAYKNVIGARRASWRIISSIEQKEESRG.NEEHVNSIREYRSKIENELSKICDGILKLLDSKLIPSA..TSGDSKVFYLKMKGDYHRYLAEFKTGAERKEAAESTLTAYKAAQDIASAELAPTHPIRLGLALNFSVFYYEILNSPDRACNLAKQAFDEAIAELDTLGEESYKDSTLIMQLLRDNLTLWTSDMQDDG -#=GS 1433_LYCES/9-246 AC P93209 -1433_XENLA/1-227 .......AKLSEQAERYDDMAASMKAVTELGA...ELSNEERNLLSVAYKNVVGARRSSWRVISSIEQKTEG...NDKRQQMAREYREKVETELQDICKDVLDLLDRFLVPNA..TPPESKVFYLKMKGDYYRYLSEVASGDSKQETVASSQQAYQEAFEISKSEMQPTHPIRLGLALNFSVFYYEILNSPEKACSLAKSAFDEAIRELDTLNEESYKDSTLIMQLLRDNLTLWTSENQGEE -#=GS 1433_XENLA/1-227 AC P29309 -1434_LYCES/6-243 REENVYLAKLAEQAERYEEMIEFMEKVAKTADV.EELTVEERNLLSVAYKNVIGARRASWRIISSIEQKEESRG.NEDHVNTIKEYRSKIEADLSKICDGILSLLESNLIPSA..STAESKVFHLKMKGDYHRYLAEFKTGTERKEAAENTLLAYKSAQDIALAELAPTHPIRLGLALNFSVFYYEILNSPDRACNLAKQAFDEAISELDTLGEESYKDSTLIMQLLRDNLTLWTSDNADDV -#=GS 1434_LYCES/6-243 AC P42652 -143B_VICFA/7-242 RENFVYIAKLAEQAERYEEMVDSMKNVANLDV...ELTIEERNLLSVGYKNVIGARRASWRILSSIEQKEESKG.NDVNAKRIKEYRHKVETELSNICIDVMRVIDEHLIPSA..AAGESTVFYYKMKGDYYRYLAEFKTGNEKKEAGDQSMKAYESATTAAEAELPPTHPIRLGLALNFSVFYYEILNSPERACHLAKQAFDEAISELDTLNEESYKDSTLIMQLLRDNLTLWTSDIPEDG -#=GS 143B_VICFA/7-242 AC P42654 -143E_HUMAN/4-239 REDLVYQAKLAEQAERYDEMVESMKKVAGMDV...ELTVEERNLLSVAYKNVIGARRASWRIISSIEQKEENKG.GEDKLKMIREYRQMVETELKLICCDILDVLDKHLIPAA..NTGESKVFYYKMKGDYHRYLAEFATGNDRKEAAENSLVAYKAASDIAMTELPPTHPIRLGLALNFSVFYYEILNSPDRACRLAKAAFDDAIAELDTLSEESYKDSTLIMQLLRDNLTLWTSDMQGDG -#=GS 143E_HUMAN/4-239 AC P42655 -143F_MOUSE/3-240 REQLLQRARLAEQAERYDDMASAMKAVTELNE...PLSNEDRNLLSVAYKNVVGARRSSWRVISSIEQKTMADG.NEKKLEKVKAYREKIEKELETVCNDVLALLDKFLIKNCNDFQYESKVFYLKMKGDYYRYLAEVASGEKKNSVVEASEAAYKEAFEISKEHMQPTHPIRLGLALNFSVFYYEIQNAPEQACLLAKQAFDDAIAELDTLNEDSYKDSTLIMQLLRDNLTLWTSDQQDEE -#=GS 143F_MOUSE/3-240 AC P11576 -143R_ARATH/7-245 RDQYVYMAKLAEQAERYEEMVQFMEQLVTGATPAEELTVEERNLLSVAYKNVIGSLRAAWRIVSSIEQKEESRK.NDEHVSLVKDYRSKVESELSSVCSGILKLLDSHLIPSA..GASESKVFYLKMKGDYHRYMAEFKSGDERKTAAEDTMLAYKAAQDIAAADMAPTHPIRLGLALNFSVFYYEILNSSDKACNMAKQAFEEAIAELDTLGEESYKDSTLIMQLLRDNLTLWTSDYAGAD -#=GS 143R_ARATH/7-245 AC P42647 -143S_HUMAN/3-238 RASLIQKAKLAEQAERYEDMAAFMKGAVEKGE...ELSCEERNLLSVAYKNVVGGQRAAWRVLSSIEQKSNEEG.SEEKGPEVREYREKVETELQGVCDTVLGLLDSHLIKEA..GDAESRVFYLKMKGDYYRYLAEVATGDDKKRIIDSARSAYQEAMDISKKEMPPTNPIRLGLALNFSVFHYEIANSPEEAISLAKTTFDEAMADLHTLSEDSYKDSTLIMQLLRDNLTLWTADNAGEE -#=GS 143S_HUMAN/3-238 AC P31947 -143T_HUMAN/3-236 KTELIQKAKLAEQAERYDDMATCMKAVTEQGA...ELSNEERNLLSVAYKNVVGGRRSAWRVISSIEQKTDT...SDKKLQLIKDYREKVESELRSICTTVLELLDKYLIANA..TNPESKVFYLKMKGDYFRYLAEVACGDDRKQTIDNSQGAYQEAFDISKKEMQPTHPIRLGLALNFSVFYYEILNNPELACTLAKTAFDEAIAELDTLNEDSYKDSTLIMQLLRDNLTLWTSDSAGEE -#=GS 143T_HUMAN/3-236 AC P27348 -143Z_DROME/6-239 KEELVQKAKLAEQSERYDDMAQAMKSVTETGV...ELSNEERNLLSVAYKNVVGARRSSWRVISSIEQKTEA...SARKQQLAREYRERVEKELREICYEVLGLLDKYLIPKA..SNPESKVFYLKMKGDYYRYLAEVATGDARNTVVDDSQTAYQDAFDISKGKMQPTHPIRLGLALNFSVFYYEILNSPDKACQLAKQAFDDAIAELDTLNEDSYKDSTLIMQLLRDNLTLWTSDTQGDE -#=GS 143Z_DROME/6-239 AC P29310 -BMH1_YEAST/4-240 REDSVYLAKLAEQAERYEEMVENMKTVASSGQ...ELSVEERNLLSVAYKNVIGARRASWRIVSSIEQKEESKEKSEHQVELICSYRSKIETELTKISDDILSVLDSHLIPSA..TTGESKVFYYKMKGDYHRYLAEFSSGDAREKATNASLEAYKTASEIATTELPPTHPIRLGLALNFSVFYYEIQNSPDKACHLAKQAFDDAIAELDTLSEESYKDSTLIMQLLRDNLTLWTSDMSESG -#=GS BMH1_YEAST/4-240 AC P29311 -RA24_SCHPO/6-241 REDAVYLAKLAEQAERYEGMVENMKSVASTDQ...ELTVEERNLLSVAYKNVIGARRASWRIVSSIEQKEESKG.NTAQVELIKEYRQKIEQELDTICQDILTVLEKHLIPNA..ASAESKVFYYKMKGDYYRYLAEFAVGEKRQHSADQSLEGYKAASEIATAELAPTHPIRLGLALNFSVFYYEILNSPDRACYLAKQAFDEAISELDSLSEESYKDSTLIMQLLRDNLTLWTSDAEYSA -#=GS RA24_SCHPO/6-241 AC P42656 -RA25_SCHPO/5-240 RENSVYLAKLAEQAERYEEMVENMKKVACSND...KLSVEERNLLSVAYKNIIGARRASWRIISSIEQKEESRG.NTRQAALIKEYRKKIEDELSDICHDVLSVLEKHLIPAA..TTGESKVFYYKMKGDYYRYLAEFTVGEVCKEAADSSLEAYKAASDIAVAELPPTDPMRLGLALNFSVFYYEILDSPESACHLAKQVFDEAISELDSLSEESYKDSTLIMQLLRDNLTLWTSDAEYNQ -#=GS RA25_SCHPO/5-240 AC P42657 -// -# STOCKHOLM 1.0 -#=GF ID rrm -#=GF AC PF00076 -#=GF DE RNA recognition motif. (a.k.a. RRM, RBD, or RNP domain) -#=GF GA 14.6 0.0 -#=GF TC 14.5 14.5 -#=GF TC 14.6 0.1 - -ARP2_PLAFA/364-438 VEVTYLF....STYLVNGQTL..IYS.....N.ISVV....LVILY.... -CABA_MOUSE/77-147 MFVGGL......SWDTSKKDLKDYFT.....K.FGEV..VDCTIKMD... -GR10_BRANA/8-79 CFVGGL......AWATGDAELERTFS.....Q.FGEV..IDSKIIND... -NSR1_YEAST/170-241 IFVGRL......SWSIDDEWLKKEFE.....H.IGGV..IGARVIYE... -RT19_ARATH/33-104 LYIGGL......SPGTDEHSLKDAFS.....S.FNGV..TEARVMTN... -RO28_NICSY/99-170 LFVGNL......PYDIDSEGLAQLFQ.....Q.AGVV..EIAEVIYN... -RO33_NICSY/116-187 LYVGNL......PFSMTSSQLSEIFA.....E.AGTV..ANVEIVYD... -RO33_NICSY/219-290 LYVANL......SWALTSQGLRDAFA.....D.QPGF..MSAKVIYD... -GBP2_YEAST/221-291 VFIINL......PYSMNWQSLKDMFK.....E.CGHV..LRADVELD... -HUD_HUMAN/48-119 LIVNYL......PQNMTQEEFRSLFG.....S.IGEI..ESCKLVRD... -SXLF_DROME/127-198 LIVNYL......PQDMTDRELYALFR.....A.IGPI..NTCRIMRD... -PABP_DROME/4-75 LYVGDL......PQDVNESGLFDKFS.....S.AGPV..LSIRVCRD... -NAM8_YEAST/165-237 IFVGDL......APNVTESQLFELFI.....NRYAST..SHAKIVHD... -PUB1_YEAST/163-234 LFVGDL......NVNVDDETLRNAFK.....D.FPSY..LSGHVMWD... -TIA1_HUMAN/108-179 VFVGDL......SPQITTEDIKAAFA.....P.FGRI..SDARVVKD... -PES4_YEAST/93-164 LFIGDL......HETVTEETLKGIFK.....K.YPSF..VSAKVCLD... -NOP4_YEAST/28-98 LFVRSI......PQDVTDEQLADFFS.....N.FAPI..KHAVVVKD... -CST2_HUMAN/18-89 VFVGNI......PYEATEEQLKDIFS.....E.VGPV..VSFRLVYD... -RN15_YEAST/20-91 VYLGSI......PYDQTEEQILDLCS.....N.VGPV..INLKMMFD... -YIS1_YEAST/66-136 IFVGNI......TPDVTPEQIEDHFK.....D.CGQI..KRITLLYD... -IF4B_HUMAN/98-168 AFLGNL......PYDVTEESIKEFFR.....G.LNIS...AVRLPR.... -NSR1_YEAST/269-340 LFLGNL......SFNADRDAIFELFA.....K.HGEV..VSVRIPTH... -GBP2_YEAST/124-193 IFVRNL......TFDCTPEDLKELFG.....T.VGEV..VEADIIT.... -NOP3_YEAST/127-190 LFVRPF......PLDVQESELNEIFG.....P.FGPM..KEVKILN.... -U2AF_HUMAN/261-332 LFIGGL......PNYLNDDQVKELLT.....S.FGPL..KAFNLVKD... -U2AF_SCHPO/312-383 IYISNL......PLNLGEDQVVELLK.....P.FGDL..LSFQLIKN... -ELAV_DROME/250-322 LYVSGL......PKTMTQQELEAIFA.....P.FGAI..ITSRILQN... -SXLF_DROME/213-285 LYVTNL......PRTITDDQLDTIFG.....K.YGSI..VQKNILRD... -ELAV_DROME/404-475 IFIYNL......APETEEAALWQLFG.....P.FGAV..QSVKIVKD... -MSSP_HUMAN/31-102 LYIRGL......PPHTTDQDLVKLCQ.....P.YGKI..VSTKAILD... -NONA_DROME/304-369 LYVGNL......TNDITDDELREMFK.....P.YGEI..SEIFSNLD... -PABP_DROME/92-162 VFIKNL......DRAIDNKAIYDTFS.....A.FGNI..LSCKVATD... -PABP_DROME/183-254 VYVKNF......TEDFDDEKLKEFFE.....P.YGKI..TSYKVMS.... -PABP_SCHPO/263-333 VYIKNL......DTEITEQEFSDLFG.....Q.FGEI..TSLSLVKD... -PUB1_YEAST/342-407 AYIGNI......PHFATEADLIPLFQ.....N.FGFI..LDFKHYPE... -PUB1_YEAST/76-146 LYVGNL......DKAITEDILKQYFQ.....V.GGPI..ANIKIMID... -TIA1_HUMAN/9-78 LYVGNL......SRDVTEALILQLFS.....Q.IGPC..KNCKMIMD... -TIA1_HUMAN/216-281 VYCGGV......TSGLTEQLMRQTFS.....P.FGQI..MEIRVFPD... -EWS_HUMAN/363-442 IYVQGL......NDSVTLDDLADFFK.....Q.CGVV..K.MNKRTG... -PTB_HUMAN/186-253 IIVENL......FYPVTLDVLHQIFS.....K.FGTV....LKIIT.... -ROC_HUMAN/18-82 VFIGNL.....NTLVVKKSDVEAIFS.....K.YGKI..VGCSVHK.... -YIS5_YEAST/33-104 IYIGNL......NRELTEGDILTVFS.....E.YGVP..VDVILSRD... -RU1A_HUMAN/12-84 IYINNLNE..KIKKDELKKSLYAIFS.....Q.FGQI..LDILVSR.... -RU2B_HUMAN/9-81 IYINNMND..KIKKEELKRSLYALFS.....Q.FGHV..VDIVALK.... -CABA_MOUSE/161-231 IFVGGL......NPEATEEKIREYFG.....Q.FGEI..EAIELPID... -ROA1_BOVIN/106-176 IFVGGI......KEDTEEHHLRDYFE.....Q.YGKI..EVIEIMTD... -SQD_DROME/138-208 IFVGGL......TTEISDEEIKTYFG.....Q.FGNI..VEVEMPLD... -RB97_DROME/34-104 LFIGGL......APYTTEENLKLFYG.....Q.WGKV..VDVVVMRD... -SQD_DROME/58-128 LFVGGL......SWETTEKELRDHFG.....K.YGEI..ESINVKTD... -ROG_HUMAN/10-81 LFIGGL......NTETNEKALEAVFG.....K.YGRI..VEVLLMKD... -SFR2_CHICK/16-87 LKVDNL......TYRTSPDTLRRVFE.....K.YGRV..GDVYIPRD... -SFR1_HUMAN/17-85 IYVGNL......PPDIRTKDIEDVFY.....K.YGAI..RDIDLKNR... -SR55_DROME/5-68 VYVGGL......PYGVRERDLERFFK.....G.YGRT..RDILIKN.... -SFR3_HUMAN/12-78 VYVGNL......GNNGNKTELERAFG.....Y.YGPL..RSVWVARN... -TRA2_DROME/99-170 IGVFGL......NTNTSQHKVRELFN.....K.YGPI..ERIQMVID... -RU17_DROME/104-175 LFIARI......NYDTSESKLRREFE.....F.YGPI..KKIVLIHD... -GBP2_YEAST/351-421 IYCSNL......PFSTARSDLFDLFG.....P.IGKI..NNAELKP.... -RNP1_YEAST/37-109 LYVGNL......PKNCRKQDLRDLFE.....PNYGKI..TINMLKKK... -PES4_YEAST/305-374 IFIKNL......PTITTRDDILNFFS.....E.VGPI..KSIYLSN.... -YHH5_YEAST/315-384 ILVKNL......PSDTTQEEVLDYFS.....T.IGPI..KSVFISEK... -YHC4_YEAST/348-415 IFVGQL......DKETTREELNRRFS.....T.HGKI..QDINLIFK... -IF39_YEAST/79-157 IVVNGAPVIPSAKVPVLKKALTSLFS.....K.AGKV..VNMEFPID... -MEI2_SCHPO/197-265 LFVTNL......PRIVPYATLLELFS.....K.LGDV..KGIDTSSL... -NOP4_YEAST/292-363 VFVRNV......PYDATEESLAPHFS.....K.FGSV..KYALPVID... -MODU_DROME/260-326 VVVGLI......GPNITKDDLKTFFE.....K.VAPV..EAVTISSN... -ROF_HUMAN/113-183 VRLRGL......PFGCTKEEIVQFFS.....G.LEIV.PNGITLPVD... -MODU_DROME/342-410 LVVENVG....KHESYSSDALEKIFK.....K.FGDV..EEIDVVC.... -NUCL_CHICK/283-352 LFVKNL......TPTKDYEELRTAIK.....EFFGKK...NLQVSEV... -NONA_DROME/378-448 LRVSNL......TPFVSNELLYKSFE.....I.FGPI..ERASITVD... -PSF_HUMAN/373-443 LSVRNL......SPYVSNELLEEAFS.....Q.FGPI..ERAVVIVD... -NOP3_YEAST/202-270 ITMKNL......PEGCSWQDLKDLAR.....E.NSLE..TTFSSVN.... -SFR1_HUMAN/122-186 VVVSGL......PPSGSWQDLKDHMR.....E.AGDV..CYADVYRD... -CPO_DROME/453-526 LFVSGL......PMDAKPRELYLLFR.....A.YEGY..EGSLLKV.... -WHI3_YEAST/540-614 LYVGNL......PSDATEQELRQLFS.....G.QEGF..RRLSFRNK... -RU1A_HUMAN/210-276 LFLTNL......PEETNELMLSMLFN.....Q.FPGF..KEVRLVPG... -RU2B_HUMAN/153-220 LFLNNL......PEETNEMMLSMLFN.....Q.FPGF..KEVRLVPG... -RU1A_YEAST/229-293 LLIQNL......PSGTTEQLLSQILG.....N.EALV...EIRLVSV... -MODU_DROME/177-246 VFVTNL......PNEYLHKDLVALFA.....K.FGRL..SALQRFTN... -PR24_YEAST/43-111 VLVKNL......PKSYNQNKVYKYFK.....H.CGPI..IHVDVAD.... -MODU_DROME/422-484 ILVTNL......TSDATEADLRKVFN.....D.SGEI..ESIIMLG.... -PR24_YEAST/212-284 IMIRNL.....STELLDENLLRESFE.....G.FGSI..EKINIPAG... -SSB1_YEAST/39-114 IFIGNV......AHECTEDDLKQLFV.....EEFGDE..VSVEIPIK... -PTB_HUMAN/61-128 IHIRKL......PIDVTEGEVISLGL.....P.FGKV..TNLLMLKG... -RN12_YEAST/200-267 IVIKFQ......GPALTEEEIYSLFR.....R.YGTI....IDIFP.... -D111_ARATH/281-360 LLLRNMVG.PGQVDDELEDEVGGECA.....K.YGTV..TRVLIFE.... -U2AG_HUMAN/67-142 CAVSDVEM..QEHYDEFFEEVFTEME.....EKYGEV..EEMNVCDN... -IF39_SCHPO/41-124 VVIEGAP....VVEEAKQQDFFRFLSSKVLAK.IGKVKENGFYMPFE... -LA_DROME/151-225 AYAKGF......PLDSQISELLDFTA.....N.YDKV..VNLTMRNS... -LA_HUMAN/113-182 VYIKGF......PTDATLDDIKEWLE.....D.KGQV..LNIQMRR.... -PR24_YEAST/119-190 LWMTNF......PPSYTQRNIRDLLQ.....D.INVV.ALSIRLPSL... - -ARP2_PLAFA/364-438 ....HQKFKETVLGRNSGFGFVSYDNVISAQHAIQFMNG.Y...FVNNKY -CABA_MOUSE/77-147 ..........PNTGRSRGFGFILFKDSSSVEKVLD.QKE.H...RLDGRV -GR10_BRANA/8-79 ..........RETGRSRGFGFVTFKDEKSMKDAIDEMNG.K...ELDGRT -NSR1_YEAST/170-241 ..........RGTDRSRGYGYVDFENKSYAEKAIQEMQG.K...EIDGRP -RT19_ARATH/33-104 ..........KVTGRSRGYGFVNFISEDSANSAISAMNG.Q...ELNGFN -RO28_NICSY/99-170 ..........RETDRSRGFGFVTMSTVEEADKAVELYSQ.Y...DLNGRL -RO33_NICSY/116-187 ..........RVTDRSRGFAFVTMGSVEEAKEAIRLFDG.S...QVGGRT -RO33_NICSY/219-290 ..........RSSGRSRGFGFITFSSAEAMNSALDTMNE.V...ELEGRP -GBP2_YEAST/221-291 ...........FNGFSRGFGSVIYPTEDEMIRAIDTFNG.M...EVEGRV -HUD_HUMAN/48-119 ..........KITGQSLGYGFVNYIDPKDAEKAINTLNG.L...RLQTKT -SXLF_DROME/127-198 ..........YKTGYSFGYAFVDFTSEMDSQRAIKVLNG.I...TVRNKR -PABP_DROME/4-75 ..........VITRRSLGYAYVNFQQPADAERALDTMNF.D...LVRNKP -NAM8_YEAST/165-237 ..........QVTGMSKGYGFVKFTNSDEQQLALSEMQG.V...FLNGRA -PUB1_YEAST/163-234 ..........MQTGSSRGYGFVSFTSQDDAQNAMDSMQG.Q...DLNGRP -TIA1_HUMAN/108-179 ..........MATGKSKGYGFVSFFNKWDAENAIQQMGG.Q...WLGGRQ -PES4_YEAST/93-164 ..........SVTKKSLGHGYLNFEDKEEAEKAMEELNY.T...KVNGKE -NOP4_YEAST/28-98 ...........TNKRSRGFGFVSFAVEDDTKEALAKARK.T...KFNGHI -CST2_HUMAN/18-89 ..........RETGKPKGYGFCEYQDQETALSAMRNLNG.R...EFSGRA -RN15_YEAST/20-91 ..........PQTGRSKGYAFIEFRDLESSASAVRNLNG.Y...QLGSRF -YIS1_YEAST/66-136 ..........RNTGTPKGYGYIEFESPAYREKALQ.LNG.G...ELKGKK -IF4B_HUMAN/98-168 ........EPSNPERLKGFGYAEFEDLDSLLSALS.LNE.E...SLGNRR -NSR1_YEAST/269-340 ..........PETEQPKGFGYVQFSNMEDAKKALDALQG.E...YIDNRP -GBP2_YEAST/124-193 ...........SKGHHRGMGTVEFTKNESVQDAISKFDG.A...LFMDRK -NOP3_YEAST/127-190 .................GFAFVEFEEAESAAKAIEEVHG.K...SFANQP -U2AF_HUMAN/261-332 ..........SATGLSKGYAFCEYVDINVTDQAIAGLNG.M...QLGDKK -U2AF_SCHPO/312-383 ..........IADGSSKGFCFCEFKNPSDAEVAISGLDG.K...DTYGNK -ELAV_DROME/250-322 .........AGNDTQTKGVGFIRFDKREEATRAIIALNG.T...TPSSCT -SXLF_DROME/213-285 ..........KLTGRPRGVAFVRYNKREEAQEAISALNNVI...PEGGSQ -ELAV_DROME/404-475 ..........PTTNQCKGYGFVSMTNYDEAAMAIRALNG.Y...TMGNRV -MSSP_HUMAN/31-102 ..........KTTNKCKGYGFVDFDSPAAAQKAVSALKA.S...GVQAQK -NONA_DROME/304-369 ................KNFTFLKVDYHPNAEKAKRALDG.S...MRKGRQ -PABP_DROME/92-162 ...........EKGNSKGYGFVHFETEEAANTSIDKVNG.M...LLNGKK -PABP_DROME/183-254 ..........KEDGKSKGFGFVAFETTEAAEAAVQALNGKD...MGEGKS -PABP_SCHPO/263-333 ...........QNDKPRGFGFVNYANHECAQKAVDELND.K...EYKGKK -PUB1_YEAST/342-407 ................KGCCFIKYDTHEQAAVCIVALAN.F...PFQGRN -PUB1_YEAST/76-146 ...........KNNKNVNYAFVEYHQSHDANIALQTLNG.K...QIENNI -TIA1_HUMAN/9-78 ............TAGNDPYCFVEFHEHRHAAAALAAMNG.R...KIMGKE -TIA1_HUMAN/216-281 ................KGYSFVRFNSHESAAHAIVSVNG.T...TIEGHV -EWS_HUMAN/363-442 .QPMIHIYLDKETGKPKGDATVSYEDPPTAKAAVEWFDG.K...DFQGSK -PTB_HUMAN/186-253 ...........FTKNNQFQALLQYADPVSAQHAKLSLDG.Q...NIYNAC -ROC_HUMAN/18-82 .................GFAFVQYVNERNARAAVAGEDG.R...MIAGQV -YIS5_YEAST/33-104 ..........ENTGESQGFAYLKYEDQRSTILAVDNLNG.F...KIGGRA -RU1A_HUMAN/12-84 ............SLKMRGQAFVIFKEVSSATNALRSMQG.F...PFYDKP -RU2B_HUMAN/9-81 ............TMKMRGQAFVIFKELGSSTNALRQLQG.F...PFYGKP -CABA_MOUSE/161-231 ..........PKLNKRRGFVFITFKEEDPVKKVLE.KKF.H...TVSGSK -ROA1_BOVIN/106-176 ..........RGSGKKRGFAFVTFDDHDSVDKIVI.QKY.H...TVNGHN -SQD_DROME/138-208 ..........KQKSQRKGFCFITFDSEQVVTDLLK.TPK.Q...KIAGKE -RB97_DROME/34-104 ..........AATKRSRGFGFITYTKSLMVDRAQE..NRPH...IIDGKT -SQD_DROME/58-128 ..........PQTGRSRGFAFIVFTNTEAIDKVSA.ADE.H...IINSKK -ROG_HUMAN/10-81 ..........RETNKSRGFAFVTFESPADAKDAARDMNG.K...SLDGKA -SFR2_CHICK/16-87 ..........RYTKESRGFAFVRFHDKRDAEDAMDAMDG.A...VLDGRE -SFR1_HUMAN/17-85 .............RGGPPFAFVEFEDPRDAEDAVYGRDG.Y...DYDGYR -SR55_DROME/5-68 .................GYGFVEFEDYRDADDAVYELNG.K...ELLGER -SFR3_HUMAN/12-78 ...............PPGFAFVEFEDPRDAADAVRELDG.R...TLCGCR -TRA2_DROME/99-170 ..........AQTQRSRGFCFIYFEKLSDARAAKDSCSG.I...EVDGRR -RU17_DROME/104-175 ..........QESGKPKGYAFIEYEHERDMHAAYKHADG.K...KIDSKR -GBP2_YEAST/351-421 ..........QENGQPTGVAVVEYENLVDADFCIQKLNN.Y...NYGGCS -RNP1_YEAST/37-109 ..........PLKKPLKRFAFIEFQEGVNLKKVKEKMNG.K...IFMNEK -PES4_YEAST/305-374 ...........ATKVKYLWAFVTYKNSSDSEKAIKRYNN.F...YFRGKK -YHH5_YEAST/315-384 ............QANTPHKAFVTYKNEEESKKAQKCLNK.T...IFKNHT -YHC4_YEAST/348-415 ..............PTNIFAFIKYETEEAAAAALESENH.A...IFLNKT -IF39_YEAST/79-157 ..........EATGKTKGFLFVECGSMNDAKKIIKSFHGKR...LDLKHR -MEI2_SCHPO/197-265 ..............STDGICIVAFFDIRQAIQAAKSLRSQR...FFNDRL -NOP4_YEAST/292-363 ..........KSTGLAKGTAFVAFKDQYTYNECIKNAPA.A...GSTSLL -MODU_DROME/260-326 ..............RLMPRAFVRLASVDDIPKALK.LHS.T...ELFSRF -ROF_HUMAN/113-183 ...........PEGKITGEAFVQFASQELAEKALG.KHK.E...RIGHRY -MODU_DROME/342-410 ..............SKAVLAFVTFKQSDAATKALAQLDG.K...TVNKFE -NUCL_CHICK/283-352 ...........RIGSSKRFGYVDFLSAEDMDKALQ.LNG.K...KLMGLE -NONA_DROME/378-448 ...........DRGKHMGEGIVEFAKKSSASACLRMCNE.K...CFFLTA -PSF_HUMAN/373-443 ...........DRGRSTGKGIVEFASKPAARKAFERCSE.G...VFLLTT -NOP3_YEAST/202-270 ............TRDFDGTGALEFPSEEILVEALERLNN.I...EFRGSV -SFR1_HUMAN/122-186 .................GTGVVEFVRKEDMTYAVRKLDN.T...KFRSHE -CPO_DROME/453-526 ........TSKNGKTASPVGFVTFHTRAGAEAAKQDLQGVR...FDPDMP -WHI3_YEAST/540-614 .......NTTSNGHSHGPMCFVEFDDVSFATRALAELYG.R...QLPRST -RU1A_HUMAN/210-276 ...............RHDIAFVEFDNEVQAGAARDALQG.F...KITQNN -RU2B_HUMAN/153-220 ...............RHDIAFVEFENDGQAGAARDALQGFK...ITPSHA -RU1A_YEAST/229-293 ................RNLAFVEYETVADATKIKNQLGS.T...YKLQNN -MODU_DROME/177-246 .............LNGNKSVLIAFDTSTGAEAVLQAKPKAL...TLGDNV -PR24_YEAST/43-111 ...........SLKKNFRFARIEFARYDGALAAIT.KTH.K...VVGQNE -MODU_DROME/422-484 .................QKAVVKFKDDEGFCKSFL.ANE.S...IVNNAP -PR24_YEAST/212-284 .........QKEHSFNNCCAFMVFENKDSAERALQ.MNR.S...LLGNRE -SSB1_YEAST/39-114 .......EHTDGHIPASKHALVKFPTKIDFDNIKENYDT.K...VVKDRE -PTB_HUMAN/61-128 ................KNQAFIEMNTEEAANTMVN.YYT.SVTPVLRGQP -RN12_YEAST/200-267 ...........PTAANNNVAKVRYRSFRGAISAKNCVSG.I...EIHNTV -D111_ARATH/281-360 ......ITEPNFPVHEAVRIFVQFSRPEETTKALVDLDG.R...YFGGRT -U2AG_HUMAN/67-142 ...........LGDHLVGNVYVKFRREEDAEKAVIDLNN.R...WFNGQP -IF39_SCHPO/41-124 ......EKNGK..KMSLGLVFADFENVDGADLCVQELDGKQ...ILKNHT -LA_DROME/151-225 ......YDKPTKSYKFKGSIFLTFETKDQAKAFLE.QEK.I...VYKERE -LA_HUMAN/113-182 ..........TLHKAFKGSIFVVFDSIESAKKFVE.TPG.Q...KYKETD -PR24_YEAST/119-190 ...........RFNTSRRFAYIDVTSKEDARYCVEKLNG.L...KIEGYT - -ARP2_PLAFA/364-438 LKV -CABA_MOUSE/77-147 IDP -GR10_BRANA/8-79 ITV -NSR1_YEAST/170-241 INC -RT19_ARATH/33-104 ISV -RO28_NICSY/99-170 LTV -RO33_NICSY/116-187 VKV -RO33_NICSY/219-290 LRL -GBP2_YEAST/221-291 LEV -HUD_HUMAN/48-119 IKV -SXLF_DROME/127-198 LKV -PABP_DROME/4-75 IRI -NAM8_YEAST/165-237 IKV -PUB1_YEAST/163-234 LRI -TIA1_HUMAN/108-179 IRT -PES4_YEAST/93-164 IRI -NOP4_YEAST/28-98 LRV -CST2_HUMAN/18-89 LRV -RN15_YEAST/20-91 LKC -YIS1_YEAST/66-136 IAV -IF4B_HUMAN/98-168 IRV -NSR1_YEAST/269-340 VRL -GBP2_YEAST/124-193 LMV -NOP3_YEAST/127-190 LEV -U2AF_HUMAN/261-332 LLV -U2AF_SCHPO/312-383 LHA -ELAV_DROME/250-322 DPI -SXLF_DROME/213-285 PLS -ELAV_DROME/404-475 LQV -MSSP_HUMAN/31-102 AKQ -NONA_DROME/304-369 LRV -PABP_DROME/92-162 VYV -PABP_DROME/183-254 LYV -PABP_SCHPO/263-333 LYV -PUB1_YEAST/342-407 LRT -PUB1_YEAST/76-146 VKI -TIA1_HUMAN/9-78 VKV -TIA1_HUMAN/216-281 VKC -EWS_HUMAN/363-442 LKV -PTB_HUMAN/186-253 CTL -ROC_HUMAN/18-82 LDI -YIS5_YEAST/33-104 LKI -RU1A_HUMAN/12-84 MRI -RU2B_HUMAN/9-81 MRI -CABA_MOUSE/161-231 CEI -ROA1_BOVIN/106-176 CEV -SQD_DROME/138-208 VDV -RB97_DROME/34-104 VEA -SQD_DROME/58-128 VDP -ROG_HUMAN/10-81 IKV -SFR2_CHICK/16-87 LRV -SFR1_HUMAN/17-85 LRV -SR55_DROME/5-68 VVV -SFR3_HUMAN/12-78 VRV -TRA2_DROME/99-170 IRV -RU17_DROME/104-175 VLV -GBP2_YEAST/351-421 LQI -RNP1_YEAST/37-109 IVI -PES4_YEAST/305-374 LLV -YHH5_YEAST/315-384 IWV -YHC4_YEAST/348-415 MHV -IF39_YEAST/79-157 LFL -MEI2_SCHPO/197-265 LYF -NOP4_YEAST/292-363 IGD -MODU_DROME/260-326 ITV -ROF_HUMAN/113-183 IEV -MODU_DROME/342-410 WKL -NUCL_CHICK/283-352 IKL -NONA_DROME/378-448 SLR -PSF_HUMAN/373-443 TPR -NOP3_YEAST/202-270 ITV -SFR1_HUMAN/122-186 GET -CPO_DROME/453-526 QTI -WHI3_YEAST/540-614 VSS -RU1A_HUMAN/210-276 AMK -RU2B_HUMAN/153-220 MKI -RU1A_YEAST/229-293 DVT -MODU_DROME/177-246 LSV -PR24_YEAST/43-111 IIV -MODU_DROME/422-484 IFI -PR24_YEAST/212-284 ISV -SSB1_YEAST/39-114 IHI -PTB_HUMAN/61-128 IYI -RN12_YEAST/200-267 LHI -D111_ARATH/281-360 VRA -U2AG_HUMAN/67-142 IHA -IF39_SCHPO/41-124 FVV -LA_DROME/151-225 LLR -LA_HUMAN/113-182 LLI -PR24_YEAST/119-190 LVT -// diff --git a/forester/archive/RIO/others/hmmer/squid/Formats/swissprot b/forester/archive/RIO/others/hmmer/squid/Formats/swissprot deleted file mode 100644 index 1f1220e..0000000 --- a/forester/archive/RIO/others/hmmer/squid/Formats/swissprot +++ /dev/null @@ -1,317 +0,0 @@ -ID 100K_RAT STANDARD; PRT; 889 AA. -AC Q62671; -DT 01-NOV-1997 (Rel. 35, Created) -DT 01-NOV-1997 (Rel. 35, Last sequence update) -DT 15-JUL-1999 (Rel. 38, Last annotation update) -DE 100 KD PROTEIN (EC 6.3.2.-). -OS Rattus norvegicus (Rat). -OC Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Mammalia; -OC Eutheria; Rodentia; Sciurognathi; Muridae; Murinae; Rattus. -RN [1] -RP SEQUENCE FROM N.A. -RC STRAIN=WISTAR; TISSUE=TESTIS; -RX MEDLINE; 92253337. -RA MUELLER D., REHBEIN M., BAUMEISTER H., RICHTER D.; -RT "Molecular characterization of a novel rat protein structurally -RT related to poly(A) binding proteins and the 70K protein of the U1 -RT small nuclear ribonucleoprotein particle (snRNP)."; -RL Nucleic Acids Res. 20:1471-1475(1992). -RN [2] -RP ERRATUM. -RA MUELLER D., REHBEIN M., BAUMEISTER H., RICHTER D.; -RL Nucleic Acids Res. 20:2624-2624(1992). -CC -!- FUNCTION: E3 UBIQUITIN-PROTEIN LIGASE WHICH ACCEPTS UBIQUITIN FROM -CC AN E2 UBIQUITIN-CONJUGATING ENZYME IN THE FORM OF A THIOESTER AND -CC THEN DIRECTLY TRANSFERS THE UBIQUITIN TO TARGETED SUBSTRATES (BY -CC SIMILARITY). THIS PROTEIN MAY BE INVOLVED IN MATURATION AND/OR -CC POST-TRANSCRIPTIONAL REGULATION OF MRNA. -CC -!- TISSUE SPECIFICITY: HIGHEST LEVELS FOUND IN TESTIS. ALSO PRESENT -CC IN LIVER, KIDNEY, LUNG AND BRAIN. -CC -!- DEVELOPMENTAL STAGE: IN EARLY POST-NATAL LIFE, EXPRESSION IN -CC THE TESTIS INCREASES TO REACH A MAXIMUM AROUND DAY 28. -CC -!- MISCELLANEOUS: A CYSTEINE RESIDUE IS REQUIRED FOR -CC UBIQUITIN-THIOLESTER FORMATION. -CC -!- SIMILARITY: CONTAINS AN HECT-TYPE E3 UBIQUITIN-PROTEIN LIGASE -CC DOMAIN. -CC -!- SIMILARITY: A CENTRAL REGION (AA 485-514) IS SIMILAR TO THE -CC C-TERMINAL DOMAINS OF MAMMALIAN AND YEAST POLY (A) RNA BINDING -CC PROTEINS (PABP). -CC -!- SIMILARITY: THE C-TERMINAL HALF SHOWS HIGH SIMILARITY TO -CC DROSOPHILA HYPERPLASMIC DISC PROTEIN AND SOME, TO HUMAN E6-AP. -CC -!- SIMILARITY: CONTAINS MIXED-CHARGE DOMAINS SIMILAR TO RNA-BINDING -CC PROTEINS. -CC -------------------------------------------------------------------------- -CC This SWISS-PROT entry is copyright. It is produced through a collaboration -CC between the Swiss Institute of Bioinformatics and the EMBL outstation - -CC the European Bioinformatics Institute. There are no restrictions on its -CC use by non-profit institutions as long as its content is in no way -CC modified and this statement is not removed. Usage by and for commercial -CC entities requires a license agreement (See http://www.isb-sib.ch/announce/ -CC or send an email to license@isb-sib.ch). -CC -------------------------------------------------------------------------- -DR EMBL; X64411; CAA45756.1; -. -DR PFAM; PF00632; HECT; 1. -DR PFAM; PF00658; PABP; 1. -KW Ubiquitin conjugation; Ligase. -FT DOMAIN 77 88 ASP/GLU-RICH (ACIDIC). -FT DOMAIN 127 150 PRO-RICH. -FT DOMAIN 420 439 ARG/GLU-RICH (MIXED CHARGE). -FT DOMAIN 448 457 ARG/ASP-RICH (MIXED CHARGE). -FT DOMAIN 485 514 PABP-LIKE. -FT DOMAIN 579 590 ASP/GLU-RICH (ACIDIC). -FT DOMAIN 786 889 HECT DOMAIN. -FT DOMAIN 827 847 PRO-RICH. -FT BINDING 858 858 UBIQUITIN (BY SIMILARITY). -SQ SEQUENCE 889 AA; 100368 MW; DD7E6C7A CRC32; - MMSARGDFLN YALSLMRSHN DEHSDVLPVL DVCSLKHVAY VFQALIYWIK AMNQQTTLDT - PQLERKRTRE LLELGIDNED SEHENDDDTS QSATLNDKDD ESLPAETGQN HPFFRRSDSM - TFLGCIPPNP FEVPLAEAIP LADQPHLLQP NARKEDLFGR PSQGLYSSSA GSGKCLVEVT - MDRNCLEVLP TKMSYAANLK NVMNMQNRQK KAGEDQSMLA EEADSSKPGP SAHDVAAQLK - SSLLAEIGLT ESEGPPLTSF RPQCSFMGMV ISHDMLLGRW RLSLELFGRV FMEDVGAEPG - SILTELGGFE VKESKFRREM EKLRNQQSRD LSLEVDRDRD LLIQQTMRQL NNHFGRRCAT - TPMAVHRVKV TFKDEPGEGS GVARSFYTAI AQAFLSNEKL PNLDCIQNAN KGTHTSLMQR - LRNRGERDRE REREREMRRS SGLRAGSRRD RDRDFRRQLS IDTRPFRPAS EGNPSDDPDP - LPAHRQALGE RLYPRVQAMQ PAFASKITGM LLELSPAQLL LLLASEDSLR ARVEEAMELI - VAHGRENGAD SILDLGLLDS SEKVQENRKR HGSSRSVVDM DLDDTDDGDD NAPLFYQPGK - RGFYTPRPGK NTEARLNCFR NIGRILGLCL LQNELCPITL NRHVIKVLLG RKVNWHDFAF - FDPVMYESLR QLILASQSSD ADAVFSAMDL AFAVDLCKEE GGGQVELIPN GVNIPVTPQN - VYEYVRKYAE HRMLVVAEQP LHAMRKGLLD VLPKNSLEDL TAEDFRLLVN GCGEVNVQML - ISFTSFNDES GENAEKLLQF KRWFWSIVER MSMTERQDLV YFWTSSPSLP ASEEGFQPMP - SITIRPPDDQ HLPTANTCIS RLYVPLYSSK QILKQKLLLA IKTKNFGFV -// -ID 104K_THEPA STANDARD; PRT; 924 AA. -AC P15711; -DT 01-APR-1990 (Rel. 14, Created) -DT 01-APR-1990 (Rel. 14, Last sequence update) -DT 01-AUG-1992 (Rel. 23, Last annotation update) -DE 104 KD MICRONEME-RHOPTRY ANTIGEN. -OS Theileria parva. -OC Eukaryota; Alveolata; Apicomplexa; Piroplasmida; Theileriidae; -OC Theileria. -RN [1] -RP SEQUENCE FROM N.A. -RC STRAIN=MUGUGA; -RX MEDLINE; 90158697. -RA IAMS K.P., YOUNG J.R., NENE V., DESAI J., WEBSTER P., -RA OLE-MOIYOI O.K., MUSOKE A.J.; -RT "Characterisation of the gene encoding a 104-kilodalton microneme- -RT rhoptry protein of Theileria parva."; -RL Mol. Biochem. Parasitol. 39:47-60(1990). -CC -!- SUBCELLULAR LOCATION: IN MICRONEME/RHOPTRY COMPLEXES. -CC -!- DEVELOPMENTAL STAGE: SPOROZOITE ANTIGEN. -CC -------------------------------------------------------------------------- -CC This SWISS-PROT entry is copyright. It is produced through a collaboration -CC between the Swiss Institute of Bioinformatics and the EMBL outstation - -CC the European Bioinformatics Institute. There are no restrictions on its -CC use by non-profit institutions as long as its content is in no way -CC modified and this statement is not removed. Usage by and for commercial -CC entities requires a license agreement (See http://www.isb-sib.ch/announce/ -CC or send an email to license@isb-sib.ch). -CC -------------------------------------------------------------------------- -DR EMBL; M29954; AAA18217.1; -. -DR PIR; A44945; A44945. -KW Antigen; Sporozoite; Repeat. -FT DOMAIN 1 19 HYDROPHOBIC. -FT DOMAIN 905 924 HYDROPHOBIC. -SQ SEQUENCE 924 AA; 103625 MW; 4563AAA0 CRC32; - MKFLILLFNI LCLFPVLAAD NHGVGPQGAS GVDPITFDIN SNQTGPAFLT AVEMAGVKYL - QVQHGSNVNI HRLVEGNVVI WENASTPLYT GAIVTNNDGP YMAYVEVLGD PNLQFFIKSG - DAWVTLSEHE YLAKLQEIRQ AVHIESVFSL NMAFQLENNK YEVETHAKNG ANMVTFIPRN - GHICKMVYHK NVRIYKATGN DTVTSVVGFF RGLRLLLINV FSIDDNGMMS NRYFQHVDDK - YVPISQKNYE TGIVKLKDYK HAYHPVDLDI KDIDYTMFHL ADATYHEPCF KIIPNTGFCI - TKLFDGDQVL YESFNPLIHC INEVHIYDRN NGSIICLHLN YSPPSYKAYL VLKDTGWEAT - THPLLEEKIE ELQDQRACEL DVNFISDKDL YVAALTNADL NYTMVTPRPH RDVIRVSDGS - EVLWYYEGLD NFLVCAWIYV SDGVASLVHL RIKDRIPANN DIYVLKGDLY WTRITKIQFT - QEIKRLVKKS KKKLAPITEE DSDKHDEPPE GPGASGLPPK APGDKEGSEG HKGPSKGSDS - SKEGKKPGSG KKPGPAREHK PSKIPTLSKK PSGPKDPKHP RDPKEPRKSK SPRTASPTRR - PSPKLPQLSK LPKSTSPRSP PPPTRPSSPE RPEGTKIIKT SKPPSPKPPF DPSFKEKFYD - DYSKAASRSK ETKTTVVLDE SFESILKETL PETPGTPFTT PRPVPPKRPR TPESPFEPPK - DPDSPSTSPS EFFTPPESKR TRFHETPADT PLPDVTAELF KEPDVTAETK SPDEAMKRPR - SPSEYEDTSP GDYPSLPMKR HRLERLRLTT TEMETDPGRM AKDASGKPVK LKRSKSFDDL - TTVELAPEPK ASRIVVDDEG TEADDEETHP PEERQKTEVR RRRPPKKPSK SPRPSKPKKP - KKPDSAYIPS ILAILVVSLI VGIL -// -ID 108_LYCES STANDARD; PRT; 102 AA. -AC Q43495; -DT 15-JUL-1999 (Rel. 38, Created) -DT 15-JUL-1999 (Rel. 38, Last sequence update) -DT 15-JUL-1999 (Rel. 38, Last annotation update) -DE PROTEIN 108 PRECURSOR. -OS Lycopersicon esculentum (Tomato). -OC Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; -OC euphyllophytes; Spermatophyta; Magnoliophyta; eudicotyledons; -OC core eudicots; Asteridae; euasterids I; Solanales; Solanaceae; -OC Solanum. -RN [1] -RP SEQUENCE FROM N.A. -RC STRAIN=CV. VF36; TISSUE=ANTHER; -RX MEDLINE; 94143497. -RA CHEN R., SMITH A.G.; -RT "Nucleotide sequence of a stamen- and tapetum-specific gene from -RT Lycopersicon esculentum."; -RL Plant Physiol. 101:1413-1413(1993). -CC -!- TISSUE SPECIFICITY: STAMEN- AND TAPETUM-SPECIFIC. -CC -!- SIMILARITY: BELONGS TO THE A9 / FIL1 FAMILY. -CC -------------------------------------------------------------------------- -CC This SWISS-PROT entry is copyright. It is produced through a collaboration -CC between the Swiss Institute of Bioinformatics and the EMBL outstation - -CC the European Bioinformatics Institute. There are no restrictions on its -CC use by non-profit institutions as long as its content is in no way -CC modified and this statement is not removed. Usage by and for commercial -CC entities requires a license agreement (See http://www.isb-sib.ch/announce/ -CC or send an email to license@isb-sib.ch). -CC -------------------------------------------------------------------------- -DR EMBL; Z14088; CAA78466.1; -. -DR MENDEL; 8853; LYCes;1133;1. -KW Signal. -FT SIGNAL 1 30 POTENTIAL. -FT CHAIN 31 102 PROTEIN 108. -FT DISULFID 41 77 BY SIMILARITY. -FT DISULFID 51 66 BY SIMILARITY. -FT DISULFID 67 92 BY SIMILARITY. -FT DISULFID 79 99 BY SIMILARITY. -SQ SEQUENCE 102 AA; 10576 MW; AFA4875A CRC32; - MASVKSSSSS SSSSFISLLL LILLVIVLQS QVIECQPQQS CTASLTGLNV CAPFLVPGSP - TASTECCNAV QSINHDCMCN TMRIAAQIPA QCNLPPLSCS AN -// -ID 10KD_VIGUN STANDARD; PRT; 75 AA. -AC P18646; -DT 01-NOV-1990 (Rel. 16, Created) -DT 01-NOV-1990 (Rel. 16, Last sequence update) -DT 01-FEB-1995 (Rel. 31, Last annotation update) -DE 10 KD PROTEIN PRECURSOR (CLONE PSAS10). -OS Vigna unguiculata (Cowpea). -OC Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; -OC euphyllophytes; Spermatophyta; Magnoliophyta; eudicotyledons; -OC core eudicots; Rosidae; eurosids I; Fabales; Fabaceae; Papilionoideae; -OC Vigna. -RN [1] -RP SEQUENCE FROM N.A. -RC TISSUE=COTYLEDON; -RX MEDLINE; 91355865. -RA ISHIBASHI N., YAMAUCHI D., MINIAMIKAWA T.; -RT "Stored mRNA in cotyledons of Vigna unguiculata seeds: nucleotide -RT sequence of cloned cDNA for a stored mRNA and induction of its -RT synthesis by precocious germination."; -RL Plant Mol. Biol. 15:59-64(1990). -CC -!- FUNCTION: THIS PROTEIN IS REQUIRED FOR GERMINATION. -CC -!- SIMILARITY: BELONGS TO THE GAMMA-PUROTHIONIN FAMILY. -CC -------------------------------------------------------------------------- -CC This SWISS-PROT entry is copyright. It is produced through a collaboration -CC between the Swiss Institute of Bioinformatics and the EMBL outstation - -CC the European Bioinformatics Institute. There are no restrictions on its -CC use by non-profit institutions as long as its content is in no way -CC modified and this statement is not removed. Usage by and for commercial -CC entities requires a license agreement (See http://www.isb-sib.ch/announce/ -CC or send an email to license@isb-sib.ch). -CC -------------------------------------------------------------------------- -DR EMBL; X16877; CAA34760.1; -. -DR PIR; S11156; S11156. -DR HSSP; P45639; 1CHL. -DR PFAM; PF00304; Gamma-thionin; 1. -DR PROSITE; PS00940; GAMMA_THIONIN; 1. -KW Germination; Signal. -FT SIGNAL 1 ? POTENTIAL. -FT CHAIN ? 75 10 KD PROTEIN. -FT DISULFID 31 75 BY SIMILARITY. -FT DISULFID 42 63 BY SIMILARITY. -FT DISULFID 48 69 BY SIMILARITY. -FT DISULFID 52 71 BY SIMILARITY. -SQ SEQUENCE 75 AA; 8523 MW; AFF911AB CRC32; - MEKKSIAGLC FLFLVLFVAQ EVVVQSEAKT CENLVDTYRG PCFTTGSCDD HCKNKEHLLS - GRCRDDVRCW CTRNC -// -ID 110K_PLAKN STANDARD; PRT; 296 AA. -AC P13813; -DT 01-JAN-1990 (Rel. 13, Created) -DT 01-JAN-1990 (Rel. 13, Last sequence update) -DT 01-FEB-1994 (Rel. 28, Last annotation update) -DE 110 KD ANTIGEN (PK110) (FRAGMENT). -OS Plasmodium knowlesi. -OC Eukaryota; Alveolata; Apicomplexa; Haemosporida; Plasmodium. -RN [1] -RP SEQUENCE FROM N.A. -RX MEDLINE; 88039002. -RA PERLER F.B., MOON A.M., QIANG B.Q., MEDA M., DALTON M., CARD C., -RA SCHMIDT-ULLRICH R., WALLACH D., LYNCH J., DONELSON J.E.; -RT "Cloning and characterization of an abundant Plasmodium knowlesi -RT antigen which cross reacts with Gambian sera."; -RL Mol. Biochem. Parasitol. 25:185-193(1987). -CC -------------------------------------------------------------------------- -CC This SWISS-PROT entry is copyright. It is produced through a collaboration -CC between the Swiss Institute of Bioinformatics and the EMBL outstation - -CC the European Bioinformatics Institute. There are no restrictions on its -CC use by non-profit institutions as long as its content is in no way -CC modified and this statement is not removed. Usage by and for commercial -CC entities requires a license agreement (See http://www.isb-sib.ch/announce/ -CC or send an email to license@isb-sib.ch). -CC -------------------------------------------------------------------------- -DR EMBL; M19152; AAA29471.1; -. -DR PIR; A54527; A54527. -KW Malaria; Antigen; Repeat. -FT NON_TER 1 1 -FT DOMAIN 131 296 13.5 X 12 AA TANDEM REPEATS OF E-E-T-Q-K- -FT T-V-E-P-E-Q-T. -SQ SEQUENCE 296 AA; 34077 MW; 666F88DF CRC32; - FNSNMLRGSV CEEDVSLMTS IDNMIEEIDF YEKEIYKGSH SGGVIKGMDY DLEDDENDED - EMTEQMVEEV ADHITQDMID EVAHHVLDNI THDMAHMEEI VHGLSGDVTQ IKEIVQKVNV - AVEKVKHIVE TEETQKTVEP EQIEETQNTV EPEQTEETQK TVEPEQTEET QNTVEPEQIE - ETQKTVEPEQ TEEAQKTVEP EQTEETQKTV EPEQTEETQK TVEPEQTEET QKTVEPEQTE - ETQKTVEPEQ TEETQKTVEP EQTEETQKTV EPEQTEETQN TVEPEPTQET QNTVEP -// -ID 11S3_HELAN STANDARD; PRT; 493 AA. -AC P19084; -DT 01-NOV-1990 (Rel. 16, Created) -DT 01-NOV-1990 (Rel. 16, Last sequence update) -DT 01-FEB-1994 (Rel. 28, Last annotation update) -DE 11S GLOBULIN SEED STORAGE PROTEIN G3 PRECURSOR (HELIANTHININ G3). -GN HAG3. -OS Helianthus annuus (Common sunflower). -OC Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; -OC euphyllophytes; Spermatophyta; Magnoliophyta; eudicotyledons; -OC core eudicots; Asteridae; euasterids II; Asterales; Asteraceae; -OC Helianthus. -RN [1] -RP SEQUENCE FROM N.A. -RX MEDLINE; 89232734. -RA VONDER HARR R.A., ALLEN R.D., COHEN E.A., NESSLER C.L., THOMAS T.L.; -RT "Organization of the sunflower 11S storage protein gene family."; -RL Gene 74:433-443(1988). -CC -!- FUNCTION: THIS IS A SEED STORAGE PROTEIN. -CC -!- SUBUNIT: HEXAMER; EACH SUBUNIT IS COMPOSED OF AN ACIDIC AND A -CC BASIC CHAIN DERIVED FROM A SINGLE PRECURSOR AND LINKED BY A -CC DISULFIDE BOND. -CC -!- SIMILARITY: BELONGS TO THE 11S SEED STORAGE PROTEINS (GLOBULINS) -CC FAMILY. -CC -------------------------------------------------------------------------- -CC This SWISS-PROT entry is copyright. It is produced through a collaboration -CC between the Swiss Institute of Bioinformatics and the EMBL outstation - -CC the European Bioinformatics Institute. There are no restrictions on its -CC use by non-profit institutions as long as its content is in no way -CC modified and this statement is not removed. Usage by and for commercial -CC entities requires a license agreement (See http://www.isb-sib.ch/announce/ -CC or send an email to license@isb-sib.ch). -CC -------------------------------------------------------------------------- -DR EMBL; M28832; AAA33374.1; -. -DR PIR; JA0089; JA0089. -DR PFAM; PF00190; Seedstore_11s; 1. -DR PROSITE; PS00305; 11S_SEED_STORAGE; 1. -KW Seed storage protein; Multigene family; Signal. -FT SIGNAL 1 20 -FT CHAIN 21 305 ACIDIC CHAIN. -FT CHAIN 306 493 BASIC CHAIN. -FT DISULFID 103 312 INTERCHAIN (ACIDIC-BASIC) (POTENTIAL). -FT DOMAIN 23 35 GLN-RICH. -FT DOMAIN 111 127 GLN/GLY-RICH. -FT DOMAIN 191 297 GLN-RICH. -SQ SEQUENCE 493 AA; 55687 MW; E79DEAAE CRC32; - MASKATLLLA FTLLFATCIA RHQQRQQQQN QCQLQNIEAL EPIEVIQAEA GVTEIWDAYD - QQFQCAWSIL FDTGFNLVAF SCLPTSTPLF WPSSREGVIL PGCRRTYEYS QEQQFSGEGG - RRGGGEGTFR TVIRKLENLK EGDVVAIPTG TAHWLHNDGN TELVVVFLDT QNHENQLDEN - QRRFFLAGNP QAQAQSQQQQ QRQPRQQSPQ RQRQRQRQGQ GQNAGNIFNG FTPELIAQSF - NVDQETAQKL QGQNDQRGHI VNVGQDLQIV RPPQDRRSPR QQQEQATSPR QQQEQQQGRR - GGWSNGVEET ICSMKFKVNI DNPSQADFVN PQAGSIANLN SFKFPILEHL RLSVERGELR - PNAIQSPHWT INAHNLLYVT EGALRVQIVD NQGNSVFDNE LREGQVVVIP QNFAVIKRAN - EQGSRWVSFK TNDNAMIANL AGRVSASAAS PLTLWANRYQ LSREEAQQLK FSQRETVLFA - PSFSRGQGIR ASR -// diff --git a/forester/archive/RIO/others/hmmer/squid/INSTALL b/forester/archive/RIO/others/hmmer/squid/INSTALL deleted file mode 100644 index 4f96254..0000000 --- a/forester/archive/RIO/others/hmmer/squid/INSTALL +++ /dev/null @@ -1,31 +0,0 @@ -Brief installation instructions for squid -SRE, Tue Jul 25 08:52:03 2000 -________________________________________________________________ - -For a source distribution (example: squid-1.7.tar.gz), on a UNIX system: - - gunzip squid-1.7.tar.gz Uncompresses the archive. - tar xf squid-1.7.tar Unpacks the archive. - (makes a new directory, squid-1.7) - cd squid-1.7 Moves into the distribution toplevel directory. - ./configure Configures the software for your system. - make Builds the binaries. - make install Installs the software. (You may need to be root.) - make clean Cleans up. - -The default is to install into /usr/local/bin and other /usr/local -subdirectories. If this isn't what you want, edit the top of the -Makefile; instructions are provided there for changing the -installation paths. - -Any failure to install on a UNIX system is a bug. Please report it. - -Man pages are provided for some programs; see Man/ subdirectory. - - - - - - - - diff --git a/forester/archive/RIO/others/hmmer/squid/Makefile.in b/forester/archive/RIO/others/hmmer/squid/Makefile.in deleted file mode 100644 index 340aa82..0000000 --- a/forester/archive/RIO/others/hmmer/squid/Makefile.in +++ /dev/null @@ -1,292 +0,0 @@ -############################################################### -# Makefile for SQUID library -# CVS $Id: Makefile.in,v 1.1.1.1 2005/03/22 08:34:26 cmzmasek Exp $ -# -# Note: The autoconf variables in this file must be coordinated -# with HMMER, if you change them, because HMMER will -# create a Makefile from this Makefile.in using its own -# configure script, not SQUID's. -# -########### -# HMMER - Biological sequence analysis with profile HMMs -# Copyright (C) 1992-1999 Washington University School of Medicine -# All Rights Reserved -# -# This source code is distributed under the terms of the -# GNU General Public License. See the files COPYING and LICENSE -# for details. -########### - -### Installation points -### -# For simple installations, just make sure ${prefix} is set correctly: -# default is /usr/local. -# -# For heterogenous computing environments, also -# set ${exec_prefix}, which gives you some flexibility -# for installing architecture dependent files (e.g. the programs). -# -# It's less likely that you'll need to set the individual -# variables BINDIR, MANDIR, etc., but they're there if you need 'em. -# -# The (simple) default configuration installs as follows: -# prefix = /usr/local -# executables in /usr/local/bin -# man pages in /usr/local/man/man1 -# header files in /usr/local/include -# libsquid.a in /usr/local/lib -# scripts in /usr/local/bin -# -# The St. Louis configuration, an example of a heterogenous -# computing environment, installs by setting: -# prefix = /usr/seshare/ -# exec_prefix = /usr/seshare/`uname` -# -# on a Linux platform, for instance, this results in: -# executables in /usr/seshare/Linux/bin -# man pages in /usr/seshare/man -# header files in /usr/seshare/include -# libsquid.a in /usr/seshare/Linux/lib -# scripts in /usr/seshare/Linux/bin -# -prefix = @prefix@ -exec_prefix = @exec_prefix@ -BINDIR = @bindir@ -MANDIR = @mandir@ -INCLUDEDIR = @includedir@ -LIBDIR = @libdir@ -SCRIPTDIR = @bindir@ - -## your compiler and compiler flags -# -CC = @CC@ -CFLAGS = @CFLAGS@ - -## other defined flags for machine-specific stuff -# -MDEFS = @MDEFS@ @DEFS@ -LIBS = @LIBS@ -lm - -## Archiver command -# -AR = ar rcv -RANLIB = @RANLIB@ - -## instructions for installing man pages -# -INSTMAN = cp -MANSUFFIX = 1 - -# Configuration for compiling in optional PVM support -# -PVMFLAG = @PVMFLAG@ -PVMLIBDIR = @PVMLIBDIR@ -PVMINCDIR = @PVMINCDIR@ -PVMLIBS = @PVMLIBS@ - -####### -## You should not need to modify below this line -####### -SHELL = /bin/sh -BASENAME = "squid" -PACKAGE = "SQUID" -RELEASE = "1.7" -RELCODE = "rel1_7" -RELEASEDATE = "July 2000" -COPYRIGHT = "Copyright \(C\) 1992-2000 HHMI/Washington University School of Medicine" -LICENSE = "Freely distributed under the GNU General Public License \(GPL\)" -LICENSETAG = gnu -COMPRESS = gzip - -PROGS = afetch\ - alistat\ - compalign\ - compstruct\ - sfetch\ - sreformat\ - revcomp\ - seqsplit\ - seqstat\ - shuffle\ - sindex\ - translate\ - weight - -MANS = alistat\ - seqstat\ - sfetch\ - shuffle\ - sreformat\ - -READMES = 00README INSTALL Makefile.in - -SCRIPTS = - -PRECONFHDRS = \ - squid.h.in\ - squidconf.h.in - -POSTCONFHDRS = \ - squid.h\ - squidconf.h\ - version.h - -HDRS = rk.h\ - sqfuncs.h\ - gki.h\ - gsi.h\ - msa.h\ - ssi.h\ - stopwatch.h - -OBJS = a2m.o\ - aligneval.o\ - alignio.o\ - clustal.o\ - cluster.o\ - dayhoff.o\ - eps.o\ - file.o\ - getopt.o\ - gki.o\ - gsi.o\ - hsregex.o\ - iupac.o\ - msa.o\ - msf.o\ - phylip.o\ - revcomp.o\ - rk.o\ - selex.o\ - seqencode.o\ - shuffle.o\ - sqerror.o\ - sqio.o\ - squidcore.o\ - sre_ctype.o\ - sre_math.o\ - sre_string.o\ - ssi.o\ - stack.o\ - stockholm.o\ - stopwatch.o\ - translate.o\ - types.o\ - weight.o - -################################################################ -# Targets that actually build the squid executables -all: version.h $(PROGS) - -$(PROGS): @EXEC_DEPENDENCY@ version.h $(OBJS) - $(CC) $(CFLAGS) $(MDEFS) $(PVMLIBDIR) -o $@ $@_main.o $(OBJS) $(PVMLIBS) $(LIBS) - -.c.o: - $(CC) $(CFLAGS) $(PVMFLAG) $(PVMINCDIR) $(MDEFS) -c $< -################################################################ - - -################################################################ -# Targets expected by packages (e.g. HMMER) that -# include SQUID as a module. -# -module: libsquid.a - -libsquid.a: version.h $(OBJS) - $(AR) libsquid.a $(OBJS) - $(RANLIB) libsquid.a - chmod 644 libsquid.a -################################################################# - - -# version.h: -# create the version.h file that will define stamps used by -# squidcore.c's Banner(), which is called by all executables to -# print a standard package/copyright/license banner; -# then puts copies of version.h in all directories that are -# going to need it. -# -version.h: - @echo "Creating version.h..." - @echo "/* version.h -- automatically generated by a Makefile. DO NOT EDIT. */" > version.h - @echo "#define PACKAGE \"$(PACKAGE)\"" >> version.h - @echo "#define RELEASE \"$(RELEASE)\"" >> version.h - @echo "#define RELEASEDATE \"$(RELEASEDATE)\"" >> version.h - @echo "#define COPYRIGHT \"$(COPYRIGHT)\"" >> version.h - @echo "#define LICENSE \"$(LICENSE)\"" >> version.h - -install: $(PROGS) libsquid.a - test -d $(LIBDIR) || mkdir -p $(LIBDIR) - test -d $(BINDIR) || mkdir -p $(BINDIR) - test -d $(SCRIPTDIR) || mkdir -p $(SCRIPTDIR) - test -d $(INCLUDEDIR)|| mkdir -p $(INCLUDEDIR) - test -d $(MANDIR)/man$(MANSUFFIX) || mkdir -p $(MANDIR)/man$(MANSUFFIX) - cp libsquid.a $(LIBDIR)/ - cp $(HDRS) $(INCLUDEDIR)/ - cp $(PROGS) $(BINDIR)/ - for scriptfile in $(SCRIPTS); do\ - cp Scripts/$$scriptfile $(SCRIPTDIR)/;\ - done - @for manpage in $(MANS); do\ - $(INSTMAN) $$manpage.man $(MANDIR)/man$(MANSUFFIX)/$$manpage.$(MANSUFFIX);\ - done - -distclean: - make clean - -rm -f Makefile libsquid.a version.h config.cache config.log config.status ${POSTCONFHDRS} - -clean: - -rm -f *.o *~ core TAGS llib-lsquid.ln $(PROGS) - -# dist: build a new distribution directory in squid-$RELEASE, and make a tarball. -# Extracts straight from the CVS repository, so you must first do -# a "cvs commit" (it checks to be sure you do, at least for the current -# working directory). -dist: -# Delete old versions of the same release -# - @if test -d ${BASENAME}-$(RELEASE); then rm -rf ${BASENAME}-$(RELEASE); fi - @if test -e ${BASENAME}-$(RELEASE).tar; then rm -f ${BASENAME}-$(RELEASE).tar; fi - @if test -e ${BASENAME}-$(RELEASE).tar.Z; then rm -f ${BASENAME}-$(RELEASE).tar.Z; fi - @if test -e ${BASENAME}-$(RELEASE).tar.gz; then rm -f ${BASENAME}-$(RELEASE).tar.gz; fi -# -# CVS tag and extract. -c: make sure we committed; -# -F: allow more than one "make dist" per rel -# prep: must have done "cvs commit", and CVSROOT must be set -# - cvs tag -c -F ${BASENAME}_${RELCODE} - cvs export -r ${BASENAME}_${RELCODE} -d ${BASENAME}-${RELEASE} ${BASENAME} -# -# Make the configure script from configure.in -# - (cd ${BASENAME}-${RELEASE}; autoconf) -# -# Include the appropriate license files -# - cp Licenses/LICENSE.${LICENSETAG} ${BASENAME}-${RELEASE}/LICENSE - cp Licenses/COPYRIGHT.${LICENSETAG} ${BASENAME}-${RELEASE}/COPYRIGHT -# -# Put license tags (short licenses) on files that need 'em (replace LICENSE keyword) -# - for file in $(READMES) *.c ${HDRS} ${PRECONFHDRS}; do\ - licenseadd.pl Licenses/$(LICENSETAG) ${BASENAME}-${RELEASE}/$$file;\ - done; -# -# Remove files/directories that aren't supposed to go out in the distro. -# Do this last, so other steps (license adding, etc.) have simple loops. -# - -rm -rf ${BASENAME}-${RELEASE}/Licenses - -rm -rf ${BASENAME}-${RELEASE}/Docs - -rm ${BASENAME}-${RELEASE}/LOG - -rm ${BASENAME}-${RELEASE}/configure.in - -rm ${BASENAME}-${RELEASE}/test_main.c -# -# pack it up! -# - tar cvf ${BASENAME}-${RELEASE}.tar ${BASENAME}-${RELEASE} - ${COMPRESS} ${BASENAME}-$(RELEASE).tar - -TAGS: - etags -t *.h *.c Makefile.in - - diff --git a/forester/archive/RIO/others/hmmer/squid/Man/afetch.man b/forester/archive/RIO/others/hmmer/squid/Man/afetch.man deleted file mode 100644 index 67074ad..0000000 --- a/forester/archive/RIO/others/hmmer/squid/Man/afetch.man +++ /dev/null @@ -1,98 +0,0 @@ -.TH "afetch" 1 "@RELEASEDATE@" "@PACKAGE@ @RELEASE@" "@PACKAGE@ Manual" - -.SH NAME -.TP -afetch - retrieve an alignment from an alignment database - -.SH SYNOPSIS -.B afetch -.I [options] -.I alignmentdb -.I key - -.PP -.B afetch --index -.I alignmentdb - -.SH DESCRIPTION - -.B afetch -retrieves the alignment named -.I key -from an alignment database in file -.I alignmentdb. - -.PP -.I alignmentdb -is a "multiple multiple alignment" file in Stockholm (e.g. native -Pfam) format. - -.PP -.I key -is either the name (ID) of the alignment, or its accession -number (AC). - -.PP -The -.I alignmentdb -file should first be SSI indexed with -.B afetch --index -for efficient retrieval. An SSI index is -not required, but alignment retrieval without one may -be painfully slow. - -.SH OPTIONS - -.TP -.B -h -Print brief help; includes version number and summary of -all options, including expert options. - -.SH EXPERT OPTIONS - -.TP -.B --index -Instead of retrieving a -.I key, -the special command -.B afetch --index -.I alignmentdb -produces an SSI index of the names and accessions -of the alignments in -the file -.I alignmentdb. -This should be run once on the -.I alignmentdb -file to prepare it for all future afetch's. - -.SH SEE ALSO - -.PP -Master man page, with full list of and guide to the -individual man pages for SQUID's auxiliary programs: see -.B squid(1). - -.SH AUTHOR - -@PACKAGE@ and its documentation is @COPYRIGHT@ -HMMER - Biological sequence analysis with profile HMMs -Copyright (C) 1992-1999 Washington University School of Medicine -All Rights Reserved - - This source code is distributed under the terms of the - GNU General Public License. See the files COPYING and LICENSE - for details. -See COPYING in the source code distribution for more details, or contact me. - -.nf -Sean Eddy -Dept. of Genetics -Washington Univ. School of Medicine -4566 Scott Ave. -St Louis, MO 63110 USA -Phone: 1-314-362-7666 -FAX : 1-314-362-7855 -Email: eddy@genetics.wustl.edu -.fi - - diff --git a/forester/archive/RIO/others/hmmer/squid/Man/alistat.man b/forester/archive/RIO/others/hmmer/squid/Man/alistat.man deleted file mode 100644 index 63d3e0d..0000000 --- a/forester/archive/RIO/others/hmmer/squid/Man/alistat.man +++ /dev/null @@ -1,138 +0,0 @@ -.TH "alistat" 1 "@RELEASEDATE@" "@PACKAGE@ @RELEASE@" "@PACKAGE@ Manual" - -.SH NAME -.TP -alistat - show statistics for a multiple alignment file - -.SH SYNOPSIS -.B alistat -.I [options] -.I alignfile - -.SH DESCRIPTION - -.B alistat -reads a multiple sequence alignment from the file -.I alignfile -in any supported format (including SELEX, GCG MSF, and -CLUSTAL), and shows a number of simple statistics about it. -These statistics include the name of the format, -the number of sequences, the total number of residues, -the average and range of the sequence lengths, the -alignment length (e.g. including gap characters). - -.PP -Also shown are some percent identities. A percent -pairwise alignment identity is defined as -.I (idents / MIN(len1, len2)) -where -.I idents -is the number of exact identities -and -.I len1, len2 -are the unaligned lengths of the two -sequences. The "average percent identity", -"most related pair", and "most unrelated pair" -of the alignment are the average, maximum, and -minimum of all -(N)(N-1)/2 pairs, respectively. -The "most distant seq" is calculated by finding -the maximum pairwise identity (best relative) for all N sequences, -then finding the minimum of these N numbers (hence, -the most outlying sequence). - -.SH OPTIONS - -.TP -.B -a -Show additional verbose information: a table with one line per -sequence showing name, length, and its highest and lowest pairwise -identity. These lines are prefixed with a * character to enable -easily -.BR grep' ing -them out and sorting them. For example, -.I alistat -a foo.slx | grep "*" | sort -n +3 -gives a ranked list of the most distant sequences -in the alignment. -Incompatible with the -.B -f -option. - -.TP -.B -f -Fast; use a sampling method to estimate the average %id. -When this option is chosen, -.B alistat -doesn't show the other three pairwise identity numbers. -This option is useful for very large alignments, for -which the full (N)(N-1) calculation of all pairs would -be prohibitive (e.g. Pfam's GP120 alignment, with over -10,000 sequences). Incompatible with the -.B -a -option. - -.TP -.B -h -Print brief help; includes version number and summary of -all options, including expert options. - -.TP -.B -q -be quiet - suppress the verbose header (program name, release number -and date, the parameters and options in effect). - -.TP -.B -B -(Babelfish). Autodetect and read a sequence file format other than the -default (FASTA). Almost any common sequence file format is recognized -(including Genbank, EMBL, SWISS-PROT, PIR, and GCG unaligned sequence -formats, and Stockholm, GCG MSF, and Clustal alignment formats). See -the printed documentation for a complete list of supported formats. - -.SH EXPERT OPTIONS - -.TP -.BI --informat " " -Specify that the sequence file is in format -.I , -rather than the default FASTA format. -Common examples include Genbank, EMBL, GCG, -PIR, Stockholm, Clustal, MSF, or PHYLIP; -see the printed documentation for a complete list -of accepted format names. -This option overrides the default format (FASTA) -and the -.I -B -Babelfish autodetection option. - - - -.SH SEE ALSO - -.PP -@SEEALSO@ - -.SH AUTHOR - -@PACKAGE@ and its documentation is @COPYRIGHT@ -HMMER - Biological sequence analysis with profile HMMs -Copyright (C) 1992-1999 Washington University School of Medicine -All Rights Reserved - - This source code is distributed under the terms of the - GNU General Public License. See the files COPYING and LICENSE - for details. -See COPYING in the source code distribution for more details, or contact me. - -.nf -Sean Eddy -Dept. of Genetics -Washington Univ. School of Medicine -4566 Scott Ave. -St Louis, MO 63110 USA -Phone: 1-314-362-7666 -FAX : 1-314-362-7855 -Email: eddy@genetics.wustl.edu -.fi - - diff --git a/forester/archive/RIO/others/hmmer/squid/Man/seqstat.man b/forester/archive/RIO/others/hmmer/squid/Man/seqstat.man deleted file mode 100644 index 5c0644e..0000000 --- a/forester/archive/RIO/others/hmmer/squid/Man/seqstat.man +++ /dev/null @@ -1,98 +0,0 @@ -.TH "seqstat" 1 "@RELEASEDATE@" "@PACKAGE@ @RELEASE@" "@PACKAGE@ Manual" - -.SH NAME -.TP -seqstat - show statistics and format for a sequence file - -.SH SYNOPSIS -.B seqstat -.I [options] -.I seqfile - -.SH DESCRIPTION - -.B seqstat -reads a sequence file -.I seqfile -and shows a number of simple statistics about it. - -.pp -The printed statistics include the name of the format, the residue -type of the first sequence (protein, RNA, or DNA), the number of -sequences, the total number of residues, and the average and range of -the sequence lengths. - -.SH OPTIONS - -.TP -.B -a -Show additional verbose information: a table with one line per -sequence showing name, length, and description line. -These lines are prefixed with a * character to enable -easily -.BR grep' ing -them out and sorting them. - -.TP -.B -h -Print brief help; includes version number and summary of -all options, including expert options. - -.TP -.B -B -(Babelfish). Autodetect and read a sequence file format other than the -default (FASTA). Almost any common sequence file format is recognized -(including Genbank, EMBL, SWISS-PROT, PIR, and GCG unaligned sequence -formats, and Stockholm, GCG MSF, and Clustal alignment formats). See -the printed documentation for a complete list of supported formats. - -.SH EXPERT OPTIONS - -.TP -.BI --informat " " -Specify that the sequence file is in format -.I , -rather than the default FASTA format. -Common examples include Genbank, EMBL, GCG, -PIR, Stockholm, Clustal, MSF, or PHYLIP; -see the printed documentation for a complete list -of accepted format names. -This option overrides the default expected format (FASTA) -and the -.I -B -Babelfish autodetection option. - -.TP -.B --quiet -Suppress the verbose header (program name, release number -and date, the parameters and options in effect). - -.SH SEE ALSO - -.PP -@SEEALSO@ - -.SH AUTHOR - -@PACKAGE@ and its documentation is @COPYRIGHT@ -HMMER - Biological sequence analysis with profile HMMs -Copyright (C) 1992-1999 Washington University School of Medicine -All Rights Reserved - - This source code is distributed under the terms of the - GNU General Public License. See the files COPYING and LICENSE - for details. -See COPYING in the source code distribution for more details, or contact me. - -.nf -Sean Eddy -Dept. of Genetics -Washington Univ. School of Medicine -4566 Scott Ave. -St Louis, MO 63110 USA -Phone: 1-314-362-7666 -FAX : 1-314-362-7855 -Email: eddy@genetics.wustl.edu -.fi - - diff --git a/forester/archive/RIO/others/hmmer/squid/Man/sfetch.man b/forester/archive/RIO/others/hmmer/squid/Man/sfetch.man deleted file mode 100644 index 25f745c..0000000 --- a/forester/archive/RIO/others/hmmer/squid/Man/sfetch.man +++ /dev/null @@ -1,226 +0,0 @@ -.TH "sfetch" 1 "@RELEASEDATE@" "@PACKAGE@ @RELEASE@" "@PACKAGE@ Manual" - -.SH NAME -.TP -sfetch - get a sequence from a flatfile database. - -.SH SYNOPSIS -.B sfetch -.I [options] -.I seqname - -.SH DESCRIPTION - -.B sfetch -retrieves the sequence named -.I seqname -from a sequence database. - -.PP -Which database is used is controlled by the -.B -d -and -.B -D -options, or "little databases" and "big -databases". -The directory location of "big databases" can -be specified by environment variables, -such as $SWDIR for Swissprot, and $GBDIR -for Genbank (see -.B -D -for complete list). -A complete file path must be specified -for "little databases". -By default, if neither option is specified -and the name looks like a Swissprot identifier -(e.g. it has a _ character), the $SWDIR -environment variable is used to attempt -to retrieve the sequence -.I seqname -from Swissprot. - -.PP -A variety of other options are available which allow -retrieval of subsequences -.RI ( -f,-t ); -retrieval by accession number instead of -by name -.RI ( -a ); -reformatting the extracted sequence into a variety -of other formats -.RI ( -F ); -etc. - -.PP -If the database has been GSI indexed, sequence -retrieval will be extremely efficient; else, -retrieval may be painfully slow (the entire -database may have to be read into memory to -find -.IR seqname ). -GSI indexing -is recommended for all large or permanent -databases. - -.pp -This program was originally named -.B getseq, -and was renamed because it clashed with a GCG -program of the same name. - -.SH OPTIONS - -.TP -.B -a -Interpret -.I seqname -as an accession number, not an identifier. - -.TP -.BI -d " " -Retrieve the sequence from a sequence file named -.I . -If a GSI index -.I .gsi -exists, it is used to speed up the retrieval. - -.TP -.BI -f " " -Extract a subsequence starting from position -.I , -rather than from 1. See -.B -t. -If -.I -is greater than -.I -(as specified by the -.B -t -option), then the sequence is extracted as -its reverse complement (it is assumed to be -nucleic acid sequence). - -.TP -.B -h -Print brief help; includes version number and summary of -all options, including expert options. - -.TP -.BI -o " " -Direct the output to a file named -.I . -By default, output would go to stdout. - -.TP -.BI -r " " -Rename the sequence -.I -in the output after extraction. By default, the original -sequence identifier would be retained. Useful, for instance, -if retrieving a sequence fragment; the coordinates of -the fragment might be added to the name (this is what Pfam -does). - -.TP -.BI -t " " -Extract a subsequence that ends at position -.I , -rather than at the end of the sequence. See -.B -f. -If -.I -is less than -.I -(as specified by the -.B -f -option), then the sequence is extracted as -its reverse complement (it is assumed to be -nucleic acid sequence) - -.TP -.B -B -(Babelfish). Autodetect and read a sequence file format other than the -default (FASTA). Almost any common sequence file format is recognized -(including Genbank, EMBL, SWISS-PROT, PIR, and GCG unaligned sequence -formats, and Stockholm, GCG MSF, and Clustal alignment formats). See -the printed documentation for a complete list of supported formats. - - -.TP -.BI -D " " -Retrieve the sequence from the main sequence database -coded -.I . For each code, there is an environment -variable that specifies the directory path to that -database. -Recognized codes and their corresponding environment -variables are -.I -Dsw -(Swissprot, $SWDIR); -.I -Dpir -(PIR, $PIRDIR); -.I -Dem -(EMBL, $EMBLDIR); -.I -Dgb -(Genbank, $GBDIR); -.I -Dwp -(Wormpep, $WORMDIR); and -.I -Dowl -(OWL, $OWLDIR). -Each database is read in its native flatfile format. - -.TP -.BI -F " " -Reformat the extracted sequence into a different format. -(By default, the sequence is extracted from the database -in the same format as the database.) Available formats -are -.B embl, fasta, genbank, gcg, strider, zuker, ig, pir, squid, -and -.B raw. - -.SH EXPERT OPTIONS - -.TP -.BI --informat " " -Specify that the sequence file is in format -.I , -rather than the default FASTA format. -Common examples include Genbank, EMBL, GCG, -PIR, Stockholm, Clustal, MSF, or PHYLIP; -see the printed documentation for a complete list -of accepted format names. -This option overrides the default format (FASTA) -and the -.I -B -Babelfish autodetection option. - -.SH SEE ALSO - -.PP -@SEEALSO@ - -.SH AUTHOR - -@PACKAGE@ and its documentation is @COPYRIGHT@ -HMMER - Biological sequence analysis with profile HMMs -Copyright (C) 1992-1999 Washington University School of Medicine -All Rights Reserved - - This source code is distributed under the terms of the - GNU General Public License. See the files COPYING and LICENSE - for details. -See COPYING in the source code distribution for more details, or contact me. - -.nf -Sean Eddy -Dept. of Genetics -Washington Univ. School of Medicine -4566 Scott Ave. -St Louis, MO 63110 USA -Phone: 1-314-362-7666 -FAX : 1-314-362-7855 -Email: eddy@genetics.wustl.edu -.fi - - diff --git a/forester/archive/RIO/others/hmmer/squid/Man/shuffle.man b/forester/archive/RIO/others/hmmer/squid/Man/shuffle.man deleted file mode 100644 index 93bbe53..0000000 --- a/forester/archive/RIO/others/hmmer/squid/Man/shuffle.man +++ /dev/null @@ -1,204 +0,0 @@ -.TH "shuffle" 1 "@RELEASEDATE@" "@PACKAGE@ @RELEASE@" "@PACKAGE@ Manual" - -.SH NAME -.TP -shuffle - randomize the sequences in a sequence file - -.SH SYNOPSIS -.B shuffle -.I [options] -.I seqfile - -.SH DESCRIPTION - -.B shuffle -reads a sequence file -.I seqfile, -randomizes each sequence, and prints the randomized sequences -in FASTA format on standard output. The sequence names -are unchanged; this allows you to track down the source -of each randomized sequence if necessary. - -.pp -The default is to simply shuffle each input sequence, preserving -monosymbol composition exactly. To shuffle -each sequence while preserving both its monosymbol and disymbol -composition exactly, use the -.I -d -option. - -.pp -The -.I -0 -and -.I -1 -options allow you to generate sequences with the same -Markov properties as each input sequence. With -.I -0, -for each input sequence, 0th order Markov statistics -are collected (e.g. symbol composition), and a new -sequence is generated with the same composition. -With -.I -1, -the generated sequence has the same 1st order -Markov properties as the input sequence (e.g. -the same disymbol frequencies). - -.pp -Note that the default and -.I -0, -or -.I -d -and -.I -1, -are similar; the shuffling algorithms preserve -composition exactly, while the Markov algorithms -only expect to generate a sequence of similar -composition on average. - -.pp -Other shuffling algorithms are also available, -as documented below in the options. - -.SH OPTIONS - -.TP -.B -0 -Calculate 0th order Markov frequencies of each input sequence -(e.g. residue composition); generate output sequence -using the same 0th order Markov frequencies. - -.TP -.B -1 -Calculate 1st order Markov frequencies for each input -sequence (e.g. diresidue composition); generate output -sequence using the same 1st order Markov frequencies. -The first residue of the output sequence is always -the same as the first residue of the input sequence. - -.TP -.B -d -Shuffle the input sequence while preserving both -monosymbol and disymbol composition exactly. Uses -an algorithm published by S.F. Altschul and B.W. Erickson, -Mol. Biol. Evol. 2:526-538, 1985. - -.TP -.B -h -Print brief help; includes version number and summary of -all options, including expert options. - -.TP -.B -l -Look only at the length of each input sequence; generate -an i.i.d. output protein sequence of that length, -using monoresidue frequencies typical of proteins -(taken from Swissprot 35). - -.TP -.BI -n " " -Make -.I -different randomizations of each input sequence in -.I seqfile, -rather than the default of one. - -.TP -.B -r -Generate the output sequence by reversing the -input sequence. (Therefore only one "randomization" -per input sequence is possible, so it's -not worth using -.I -n -if you use reversal.) - -.TP -.BI -t " " -Truncate each input sequence to a fixed length of exactly -.I -residues. If the input sequence is shorter than -.I -it is discarded (therefore the output file may contain -fewer sequences than the input file). -If the input sequence is longer than -.I -a contiguous subsequence is randomly chosen. - -.TP -.BI -w " " -Regionally shuffle each input sequence in window sizes of -.I , -preserving local residue composition in each window. -Probably a better shuffling algorithm for biosequences -with nonstationary residue composition (e.g. composition -that is varying along the sequence, such as between -different isochores in human genome sequence). - -.TP -.B -B -(Babelfish). Autodetect and read a sequence file format other than the -default (FASTA). Almost any common sequence file format is recognized -(including Genbank, EMBL, SWISS-PROT, PIR, and GCG unaligned sequence -formats, and Stockholm, GCG MSF, and Clustal alignment formats). See -the printed documentation for a complete list of supported formats. - -.SH EXPERT OPTIONS - -.TP -.BI --informat " " -Specify that the sequence file is in format -.I , -rather than the default FASTA format. -Common examples include Genbank, EMBL, GCG, -PIR, Stockholm, Clustal, MSF, or PHYLIP; -see the printed documentation for a complete list -of accepted format names. -This option overrides the default expected format (FASTA) -and the -.I -B -Babelfish autodetection option. - -.TP -.B --nodesc -Do not output any sequence description in the output file, -only the sequence names. - -.TP -.BI --seed " " -Set the random number seed to -.I . -If you want reproducible results, use the same seed each time. -By default, -.B shuffle -uses a different seed each time, so does not generate -the same output in subsequent runs with the same input. - -.SH SEE ALSO - -.PP -@SEEALSO@ - -.SH AUTHOR - -@PACKAGE@ and its documentation is @COPYRIGHT@ -HMMER - Biological sequence analysis with profile HMMs -Copyright (C) 1992-1999 Washington University School of Medicine -All Rights Reserved - - This source code is distributed under the terms of the - GNU General Public License. See the files COPYING and LICENSE - for details. -See COPYING in the source code distribution for more details, or contact me. - -.nf -Sean Eddy -Dept. of Genetics -Washington Univ. School of Medicine -4566 Scott Ave. -St Louis, MO 63110 USA -Phone: 1-314-362-7666 -FAX : 1-314-362-7855 -Email: eddy@genetics.wustl.edu -.fi - - diff --git a/forester/archive/RIO/others/hmmer/squid/Man/sreformat.man b/forester/archive/RIO/others/hmmer/squid/Man/sreformat.man deleted file mode 100644 index c502a39..0000000 --- a/forester/archive/RIO/others/hmmer/squid/Man/sreformat.man +++ /dev/null @@ -1,249 +0,0 @@ -.TH "sreformat" 1 "@RELEASEDATE@" "@PACKAGE@ @RELEASE@" "@PACKAGE@ Manual" - -.SH NAME -.TP -sreformat - convert sequence file to different format - -.SH SYNOPSIS -.B sreformat -.I [options] -.I format -.I seqfile - -.SH DESCRIPTION - -.B sreformat -reads the sequence file -.I seqfile -in any supported format, reformats it -into a new format specified by -.I format, -then prints the reformatted text. - -.PP -Supported input formats include (but are not limited to) the unaligned -formats FASTA, Genbank, EMBL, SWISS-PROT, PIR, and GCG, and the -aligned formats Stockholm, Clustal, GCG MSF, and Phylip. - -.PP -Available unaligned output file format codes -include -.I fasta -(FASTA format); -.I embl -(EMBL/SWISSPROT format); -.I genbank -(Genbank format); -.I gcg -(GCG single sequence format); -.I gcgdata -(GCG flatfile database format); -.I strider -(MacStrider format); -.I zuker -(Zuker MFOLD format); -.I ig -(Intelligenetics format); -.I pir -(PIR/CODATA flatfile format); -.I squid -(an undocumented St. Louis format); -.I raw -(raw sequence, no other information). - -.pp -The available aligned output file format -codes include -.I stockholm -(PFAM/Stockholm format); -.I msf -(GCG MSF format); -.I a2m -(aligned FASTA format, called A2M by the UC Santa Cruz -HMM group); -.I PHYLIP -(Felsenstein's PHYLIP format); and -.I selex -(old SELEX/HMMER/Pfam annotated alignment format); - -.pp -All thee codes are interpreted case-insensitively -(e.g. MSF, Msf, or msf all work). - -.PP -Unaligned format files cannot be reformatted to -aligned formats. -However, aligned formats can be reformatted -to unaligned formats -- gap characters are -simply stripped out. - -.PP -This program was originally named -.B reformat, -but that name clashes with a GCG program of the same name. - -.SH OPTIONS - -.TP -.B -a -Enable alignment reformatting. By default, sreformat expects -that the input file should be handled as an unaligned input -file (even if it is an alignment), and it will not allow you -to convert an unaligned file to an alignment (for obvious -reasons). -.pp -This may seem silly; surely if sreformat can autodetect and parse -alignment file formats as input, it can figure out when it's got an -alignment! There are two reasons. One is just the historical -structure of the code. The other is that FASTA unaligned format and -A2M aligned format (aligned FASTA) are impossible to tell apart with -100% confidence. - -.TP -.B -d -DNA; convert U's to T's, to make sure a nucleic acid -sequence is shown as DNA not RNA. See -.B -r. - -.TP -.B -h -Print brief help; includes version number and summary of -all options, including expert options. - -.TP -.B -l -Lowercase; convert all sequence residues to lower case. -See -.B -u. - -.TP -.B -r -RNA; convert T's to U's, to make sure a nucleic acid -sequence is shown as RNA not DNA. See -.B -d. - -.TP -.B -u -Uppercase; convert all sequence residues to upper case. -See -.B -l. - -.TP -.B -x -For DNA sequences, convert non-IUPAC characters (such as X's) to N's. -This is for compatibility with benighted people who insist on using X -instead of the IUPAC ambiguity character N. (X is for ambiguity -in an amino acid residue). -.pp -Warning: the code doesn't -check that you are actually giving it DNA. It simply -literally just converts non-IUPAC DNA symbols to N. So -if you accidentally give it protein sequence, it will -happily convert most every amino acid residue to an N. - -.TP -.B -B -(Babelfish). Autodetect and read a sequence file format other than the -default (FASTA). Almost any common sequence file format is recognized -(including Genbank, EMBL, SWISS-PROT, PIR, and GCG unaligned sequence -formats, and Stockholm, GCG MSF, and Clustal alignment formats). See -the printed documentation for a complete list of supported formats. - - -.SH EXPERT OPTIONS - -.TP -.BI --informat " " -Specify that the sequence file is in format -.I , -rather than the default FASTA format. -Common examples include Genbank, EMBL, GCG, -PIR, Stockholm, Clustal, MSF, or PHYLIP; -see the printed documentation for a complete list -of accepted format names. -This option overrides the default format (FASTA) -and the -.I -B -Babelfish autodetection option. - -.TP -.B --mingap -If -.I seqfile -is an alignment, remove any columns that contain 100% gap -characters, minimizing the overall length of the alignment. -(Often useful if you've extracted a subset of aligned -sequences from a larger alignment.) - -.TP -.B --pfam -For SELEX alignment output format only, put the entire -alignment in one block (don't wrap into multiple blocks). -This is close to the format used internally by Pfam -in Stockholm and Cambridge. - -.TP -.B --sam -Try to convert gap characters to UC Santa Cruz SAM style, where a . -means a gap in an insert column, and a - means a -deletion in a consensus/match column. This only -works for converting aligned file formats, and only -if the alignment already adheres to the SAM convention -of upper case for residues in consensus/match columns, -and lower case for residues in insert columns. This is -true, for instance, of all alignments produced by old -versions of HMMER. (HMMER2 produces alignments -that adhere to SAM's conventions even in gap character choice.) -This option was added to allow Pfam alignments to be -reformatted into something more suitable for profile HMM -construction using the UCSC SAM software. - -.TP -.BI --samfrac " " -Try to convert the alignment gap characters and -residue cases to UC Santa Cruz SAM style, where a . -means a gap in an insert column and a - means a -deletion in a consensus/match column, and -upper case means match/consensus residues and -lower case means inserted resiudes. This will only -work for converting aligned file formats, but unlike the -.B --sam -option, it will work regardless of whether the file adheres -to the upper/lower case residue convention. Instead, any -column containing more than a fraction -.I -of gap characters is interpreted as an insert column, -and all other columns are interpreted as match columns. -This option was added to allow Pfam alignments to be -reformatted into something more suitable for profile HMM -construction using the UCSC SAM software. - -.SH SEE ALSO - -.PP -@SEEALSO@ - -.SH AUTHOR - -@PACKAGE@ and its documentation is @COPYRIGHT@ -HMMER - Biological sequence analysis with profile HMMs -Copyright (C) 1992-1999 Washington University School of Medicine -All Rights Reserved - - This source code is distributed under the terms of the - GNU General Public License. See the files COPYING and LICENSE - for details. -See COPYING in the source code distribution for more details, or contact me. - -.nf -Sean Eddy -Dept. of Genetics -Washington Univ. School of Medicine -4566 Scott Ave. -St Louis, MO 63110 USA -Phone: 1-314-362-7666 -FAX : 1-314-362-7855 -Email: eddy@genetics.wustl.edu -.fi - - diff --git a/forester/archive/RIO/others/hmmer/squid/a2m.c b/forester/archive/RIO/others/hmmer/squid/a2m.c deleted file mode 100644 index 5beff81..0000000 --- a/forester/archive/RIO/others/hmmer/squid/a2m.c +++ /dev/null @@ -1,113 +0,0 @@ -/***************************************************************** - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - *****************************************************************/ - -/* a2m.c - * - * reading/writing A2M (aligned FASTA) files. - * - * RCS $Id: a2m.c,v 1.1.1.1 2005/03/22 08:34:17 cmzmasek Exp $ - */ - -#include -#include -#include -#include "squid.h" -#include "msa.h" - -/* Function: ReadA2M() - * Date: SRE, Sun Jun 6 17:11:29 1999 [bus from Madison 1999 worm mtg] - * - * Purpose: Parse an alignment read from an open A2M format - * alignment file. A2M is a single alignment format. - * Return the alignment, or NULL if we've already - * read the alignment. - * - * Args: afp - open alignment file - * - * Returns: MSA * - an alignment object. - * Caller responsible for an MSAFree() - */ -MSA * -ReadA2M(MSAFILE *afp) -{ - MSA *msa; - char *buf; - char *name; - char *desc; - char *seq; - int idx; - int len1, len2; - - if (feof(afp->f)) return NULL; - - name = NULL; - msa = MSAAlloc(10, 0); - idx = 0; - while ((buf = MSAFileGetLine(afp)) != NULL) - { - if (*buf == '>') - { - buf++; /* skip the '>' */ - if ((name = sre_strtok(&buf, WHITESPACE, &len1)) == NULL) - Die("Blank name in A2M file %s (line %d)\n", afp->fname, afp->linenumber); - desc = sre_strtok(&buf, "\n", &len2); - - idx = GKIStoreKey(msa->index, name); - if (idx >= msa->nseqalloc) MSAExpand(msa); - - msa->sqname[idx] = sre_strdup(name, len1); - if (desc != NULL) MSASetSeqDescription(msa, idx, desc); - msa->nseq++; - } - else if (name != NULL) - { - if ((seq = sre_strtok(&buf, WHITESPACE, &len1)) == NULL) continue; - msa->sqlen[idx] = sre_strcat(&(msa->aseq[idx]), msa->sqlen[idx], seq, len1); - } - } - if (name == NULL) { MSAFree(msa); return NULL; } - - MSAVerifyParse(msa); - return msa; -} - - -/* Function: WriteA2M() - * Date: SRE, Sun Jun 6 17:40:35 1999 [bus from Madison, 1999 worm mtg] - * - * Purpose: Write an "aligned FASTA" (aka a2m, to UCSC) formatted - * alignment. - * - * Args: fp - open FILE to write to. - * msa - alignment to write - * - * Returns: void - */ -void -WriteA2M(FILE *fp, MSA *msa) -{ - int idx; /* sequence index */ - int pos; /* position in sequence */ - char buf[64]; /* buffer for individual lines */ - int cpl = 60; /* char per line; must be < 64 unless buf is bigger */ - - buf[cpl] = '\0'; - for (idx = 0; idx < msa->nseq; idx++) - { - fprintf(fp, ">%s %s\n", - msa->sqname[idx], - (msa->sqdesc != NULL && msa->sqdesc[idx] != NULL) ? msa->sqdesc[idx] : ""); - for (pos = 0; pos < msa->alen; pos+=cpl) - { - strncpy(buf, &(msa->aseq[idx][pos]), cpl); - fprintf(fp, "%s\n", buf); - } - } -} diff --git a/forester/archive/RIO/others/hmmer/squid/afetch_main.c b/forester/archive/RIO/others/hmmer/squid/afetch_main.c deleted file mode 100644 index 23119a5..0000000 --- a/forester/archive/RIO/others/hmmer/squid/afetch_main.c +++ /dev/null @@ -1,182 +0,0 @@ -/***************************************************************** - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - *****************************************************************/ - -/* afetch_main.c - * SRE, Tue Nov 9 18:47:02 1999 [Saint Louis] - * - * afetch -- a program to extract alignments from the Pfam database - * - * CVS $Id: afetch_main.c,v 1.1.1.1 2005/03/22 08:34:30 cmzmasek Exp $ - */ - -#include -#include -#include "squid.h" -#include "msa.h" -#include "ssi.h" - -static char banner[] = "afetch - retrieve an alignment from Pfam"; - -static char usage[] = "\ -Usage: afetch [-options] \n\ - or: afetch --index \n\ -\n\ - Get an alignment from a database.\n\ - Available options:\n\ - -h : help; print version and usage info\n\ -"; - -static char experts[] = "\ - --index : construct indices for the database\n\ -"; - -struct opt_s OPTIONS[] = { - { "-h", TRUE, sqdARG_NONE }, - { "--index", FALSE, sqdARG_NONE } -}; -#define NOPTIONS (sizeof(OPTIONS) / sizeof(struct opt_s)) - -int -main(int argc, char **argv) -{ - char *afile; /* name of alignment file to read */ - MSAFILE *afp; /* pointer to open index file */ - char *key; /* name/accession of alignment to fetch */ - MSA *msa; /* the fetched alignment */ - int format; /* format of afile */ - int do_index; /* TRUE to index instead of retrieve */ - - char *optname; - char *optarg; - int optind; - - /*********************************************** - * Parse the command line - ***********************************************/ - - /* initializations and defaults */ - format = MSAFILE_STOCKHOLM; /* period. It's the only multi-MSA file format. */ - do_index = FALSE; - key = NULL; - - while (Getopt(argc, argv, OPTIONS, NOPTIONS, usage, - &optind, &optname, &optarg)) - { - if (strcmp(optname, "--index") == 0) { do_index = TRUE; } - else if (strcmp(optname, "-h") == 0) { - Banner(stdout, banner); - puts(usage); - puts(experts); - exit(EXIT_SUCCESS); - } - } - - if ((do_index && argc - optind != 1) || (! do_index && argc - optind != 2)) - Die("Incorrect number of command line arguments.\n%s\n", usage); - - afile = argv[optind++]; - if (! do_index) key = argv[optind++]; - - if ((afp = MSAFileOpen(afile, format, NULL)) == NULL) - Die("Alignment file %s could not be opened for reading", afile); - - /*********************************************** - * Section 1. Alignment database indexing - ***********************************************/ - - if (do_index) { - int mode; - char *ssifile; - SSIINDEX *si; - int fh; - int status; - SSIOFFSET offset; - int n = 0; - - /* Not that we're expecting an alignment file so - * large that it would require a 64-bit index, but... - */ - if ((mode = SSIRecommendMode(afile)) == -1) - Die("File %s doesn't exist, or is too large for your OS", afile); - - ssifile = sre_strdup(afile, -1); - sre_strcat(&ssifile, -1, ".ssi", -1); - - if ((si = SSICreateIndex(mode)) == NULL) - Die("Couldn't allocate/initialize the new SSI index"); - if (SSIAddFileToIndex(si, afile, afp->format, &fh) != 0) - Die("SSIAddFileToIndex() failed"); - - status = SSIGetFilePosition(afp->f, mode, &offset); - if (status != 0) Die("SSIGetFilePosition() failed"); - - while ((msa = MSAFileRead(afp)) != NULL) - { - if (msa->name == NULL) - Die("SSI index requires that every MSA has a name"); - - status = SSIAddPrimaryKeyToIndex(si, msa->name, fh, &offset, NULL, 0); - if (status != 0) Die("SSIAddPrimaryKeyToIndex() failed"); - - if (msa->acc != NULL) { - status = SSIAddSecondaryKeyToIndex(si, msa->acc, msa->name); - if (status != 0) Die("SSIAddSecondaryKeyToIndex() failed"); - } - - status = SSIGetFilePosition(afp->f, mode, &offset); - if (status != 0) Die("SSIGetFilePosition() failed"); - - n++; - MSAFree(msa); - } - - status = SSIWriteIndex(ssifile, si); - if (status != 0) Die("SSIWriteIndex() failed"); - - printf ("%d alignments indexed in SSI index %s\n", n, ssifile); - free(ssifile); - MSAFileClose(afp); - SSIFreeIndex(si); - SqdClean(); - exit (0); /* exit indexing program here */ - } - - /*********************************************** - * Section 2. Alignment retrieval - ***********************************************/ - - /* Indexed retrieval: - */ - if (afp->ssi != NULL) { - if (! MSAFilePositionByKey(afp, key)) - Die("No such alignment %s found in file %s", key, afile); - msa = MSAFileRead(afp); - } - /* Brute force retrieval: - */ - else { - while ((msa = MSAFileRead(afp)) != NULL) - { - if (strcmp(msa->name, key) == 0) break; - if (strcmp(msa->acc, key) == 0) break; - MSAFree(msa); - } - } - - if (msa == NULL) Die("Failed to retrieve %s from file %s", key, afile); - - /* Output the alignment we retrieved - */ - WriteStockholm(stdout, msa); - - MSAFileClose(afp); - MSAFree(msa); - exit (0); -} diff --git a/forester/archive/RIO/others/hmmer/squid/aligneval.c b/forester/archive/RIO/others/hmmer/squid/aligneval.c deleted file mode 100644 index e9c23a2..0000000 --- a/forester/archive/RIO/others/hmmer/squid/aligneval.c +++ /dev/null @@ -1,513 +0,0 @@ -/***************************************************************** - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - *****************************************************************/ - -/* aligneval.c - * RCS $Id: aligneval.c,v 1.1.1.1 2005/03/22 08:34:31 cmzmasek Exp $ - * - * Comparison of multiple alignments. Three functions are - * provided, using subtly different scoring schemes: - * CompareMultAlignments() - basic scoring scheme - * CompareRefMultAlignments() - only certain "canonical" columns - * are scored - * - * The similarity measure is a fractional alignment identity averaged - * over all sequence pairs. The score for all pairs is: - * (identically aligned symbols) / (total aligned columns in - * known alignment) - * - * A column c is identically aligned for sequences i, j if: - * 1) both i,j have a symbol aligned in column c, and the - * same pair of symbols is aligned somewhere in the test - * alignment - * 2) S[i][c] is aligned to a gap in sequence j, and that symbol - * is aligned to a gap in the test alignment - * 3) converse of 2) - * - * - * The algorithm is as follows: - * 1) For each known/test aligned pair of sequences (k1,k2 and t1,t2) - * construct a list for each sequence, in which for every - * counted symbol we record the raw index of the symbol in - * the other sequence that it aligns to, or -1 if it aligns - * to a gap or uncounted symbol. - * - * 2) Compare the list for k1 to the list for t1 and count an identity - * for each correct alignment. - * - * 3) Repeat 2) for comparing k2 to t2. Note that this means correct sym/sym - * alignments count for 2; correct sym/gap alignments count for 1. - * - * 4) The score is (identities from 2 + identities from 3) / - * (totals from 2 + totals from 3). - * - * Written originally for koala's ss2 pairwise alignment package. - * - * Sean Eddy, Sun Nov 1 12:45:11 1992 - * SRE, Thu Jul 29 16:47:18 1993: major revision: all functions replaced by new algorithm - * CVS $Id: aligneval.c,v 1.1.1.1 2005/03/22 08:34:31 cmzmasek Exp $ - */ - - -#include -#include -#include -#include "squid.h" - -#ifdef MEMDEBUG -#include "dbmalloc.h" -#endif - -static int make_alilist(char *s1, char *s2, int **ret_s1_list, int *ret_listlen); -static int make_ref_alilist(int *refcoords, char *k1, char *k2, char *s1, char *s2, - int **ret_s1_list, int *ret_listlen); -static int compare_lists(int *k1, int *k2, int *t1, int *t2, int len1, int len2, float *ret_sc); - - -/* Function: ComparePairAlignments - * - * Purpose: Calculate and return a number representing how well two different alignments - * of a pair of sequences compare. The number is, roughly speaking, - * the fraction of columns which are identically aligned. - * - * For all columns c in which either known1[c] or known2[c] - * is a non-gap, count an identity if those same symbols are - * aligned somewhere in calc1/calc2. The score is identities/total - * columns examined. (i.e. fully gapped columns don't count) - * - * more explicitly, identities come from: - * both known and test aligned pairs have the same symbol in the first sequence aligned to - * a gap in the second sequence; - * both known and test aligned pairs have the same symbol in the second sequence - * aligned to a gap in the first sequence; - * the known alignment has symbols aligned at this column, and the test - * alignment aligns the same two symbols. - * - * Args: known1, known2: trusted alignment of two sequences - * calc1, calc2: test alignment of two sequences - * - * Return: Returns -1.0 on internal failure. - */ -float -ComparePairAlignments(char *known1, char *known2, char *calc1, char *calc2) -{ - int *klist1; - int *klist2; - int *tlist1; - int *tlist2; - int len1, len2; - float score; - - if (! make_alilist(calc1, calc2, &tlist1, &len1)) return -1.0; - if (! make_alilist(calc2, calc1, &tlist2, &len2)) return -1.0; - if (! make_alilist(known1, known2, &klist1, &len1)) return -1.0; - if (! make_alilist(known2, known1, &klist2, &len2)) return -1.0; - if (! compare_lists(klist1, klist2, tlist1, tlist2, len1, len2, &score)) return -1.0; - - free(klist1); - free(klist2); - free(tlist1); - free(tlist2); - return score; -} - - - -/* Function: CompareRefPairAlignments() - * - * Same as above, but the only columns that count are the ones - * with indices in *refcoord. *refcoord and the known1, known2 - * pair must be in sync with each other (come from the same - * multiple sequence alignment) - * - * Args: ref - 0..alen-1 array of 1 or 0 - * known1,known2 - trusted alignment - * calc1, calc2 - test alignment - * - * Return: the fractional alignment identity on success, -1.0 on failure. - */ -float -CompareRefPairAlignments(int *ref, char *known1, char *known2, char *calc1, char *calc2) -{ - int *klist1; - int *klist2; - int *tlist1; - int *tlist2; - int len1, len2; - float score; - - if (! make_ref_alilist(ref, known1, known2, calc1, calc2, &tlist1, &len1)) return -1.0; - if (! make_ref_alilist(ref, known2, known1, calc2, calc1, &tlist2, &len2)) return -1.0; - if (! make_ref_alilist(ref, known1, known2, known1, known2, &klist1, &len1)) return -1.0; - if (! make_ref_alilist(ref, known2, known1, known2, known1, &klist2, &len2)) return -1.0; - if (! compare_lists(klist1, klist2, tlist1, tlist2, len1, len2, &score)) return -1.0; - - free(klist1); - free(klist2); - free(tlist1); - free(tlist2); - return score; -} - -/* Function: make_alilist() - * - * Purpose: Construct a list (array) mapping the raw symbols of s1 - * onto the indexes of the aligned symbols in s2 (or -1 - * for gaps in s2). The list (s1_list) will be of the - * length of s1's raw sequence. - * - * Args: s1 - sequence to construct the list for - * s2 - sequence s1 is aligned to - * ret_s1_list - RETURN: the constructed list (caller must free) - * ret_listlen - RETURN: length of the list - * - * Returns: 1 on success, 0 on failure - */ -static int -make_alilist(char *s1, char *s2, int **ret_s1_list, int *ret_listlen) -{ - int *s1_list; - int col; /* column position in alignment */ - int r1, r2; /* raw symbol index at current col in s1, s2 */ - - /* Malloc for s1_list. It can't be longer than s1 itself; we just malloc - * for that (and waste a wee bit of space) - */ - s1_list = (int *) MallocOrDie (sizeof(int) * strlen(s1)); - r1 = r2 = 0; - for (col = 0; s1[col] != '\0'; col++) - { - /* symbol in s1? Record what it's aligned to, and bump - * the r1 counter. - */ - if (! isgap(s1[col])) - { - s1_list[r1] = isgap(s2[col]) ? -1 : r2; - r1++; - } - - /* symbol in s2? bump the r2 counter - */ - if (! isgap(s2[col])) - r2++; - } - - *ret_listlen = r1; - *ret_s1_list = s1_list; - return 1; -} - - - -/* Function: make_ref_alilist() - * - * Purpose: Construct a list (array) mapping the raw symbols of s1 - * which are under canonical columns of the ref alignment - * onto the indexes of the aligned symbols in s2 (or -1 - * for gaps in s2 or noncanonical symbols in s2). - * - * Args: ref: - array of indices of canonical coords (1 canonical, 0 non) - * k1 - s1's known alignment (w/ respect to refcoords) - * k2 - s2's known alignment (w/ respect to refcoords) - * s1 - sequence to construct the list for - * s2 - sequence s1 is aligned to - * ret_s1_list - RETURN: the constructed list (caller must free) - * ret_listlen - RETURN: length of the list - * - * Returns: 1 on success, 0 on failure - */ -/*ARGSUSED*/ -static int -make_ref_alilist(int *ref, char *k1, char *k2, - char *s1, char *s2, int **ret_s1_list, int *ret_listlen) -{ - int *s1_list; - int col; /* column position in alignment */ - int r1, r2; /* raw symbol index at current col in s1, s2 */ - int *canons1; /* flag array, 1 if position i in s1 raw seq is canonical */ - int lpos; /* position in list */ - - /* Allocations. No arrays can exceed the length of their - * appropriate parent (s1 or s2) - */ - s1_list = (int *) MallocOrDie (sizeof(int) * strlen(s1)); - canons1 = (int *) MallocOrDie (sizeof(int) * strlen(s1)); - - /* First we use refcoords and k1,k2 to construct an array of 1's - * and 0's, telling us whether s1's raw symbol number i is countable. - * It's countable simply if it's under a canonical column. - */ - r1 = 0; - for (col = 0; k1[col] != '\0'; col++) - { - if (! isgap(k1[col])) - { - canons1[r1] = ref[col] ? 1 : 0; - r1++; - } - } - - /* Now we can construct the list. We don't count pairs if the sym in s1 - * is non-canonical. - * We have to keep separate track of our position in the list (lpos) - * from our positions in the raw sequences (r1,r2) - */ - r1 = r2 = lpos = 0; - for (col = 0; s1[col] != '\0'; col++) - { - if (! isgap(s1[col]) && canons1[r1]) - { - s1_list[lpos] = isgap(s2[col]) ? -1 : r2; - lpos++; - } - - if (! isgap(s1[col])) - r1++; - if (! isgap(s2[col])) - r2++; - } - - free(canons1); - *ret_listlen = lpos; - *ret_s1_list = s1_list; - return 1; -} - -/* Function: compare_lists() - * - * Purpose: Given four alignment lists (k1,k2, t1,t2), calculate the - * alignment score. - * - * Args: k1 - list of k1's alignment to k2 - * k2 - list of k2's alignment to k1 - * t1 - list of t1's alignment to t2 - * t2 - list of t2's alignment to t2 - * len1 - length of k1, t1 lists (same by definition) - * len2 - length of k2, t2 lists (same by definition) - * ret_sc - RETURN: identity score of alignment - * - * Return: 1 on success, 0 on failure. - */ -static int -compare_lists(int *k1, int *k2, int *t1, int *t2, int len1, int len2, float *ret_sc) -{ - float id; - float tot; - int i; - - id = tot = 0.0; - for (i = 0; i < len1; i++) - { - tot += 1.0; - if (t1[i] == k1[i]) id += 1.0; - } - - for ( i = 0; i < len2; i++) - { - tot += 1.0; - if (k2[i] == t2[i]) id += 1.0; - } - - *ret_sc = id / tot; - return 1; -} - - -/* Function: CompareMultAlignments - * - * Purpose: Invokes pairwise alignment comparison for every possible pair, - * and returns the average score over all N(N-1) of them or -1.0 - * on an internal failure. - * - * Can be slow for large N, since it's quadratic. - * - * Args: kseqs - trusted multiple alignment - * tseqs - test multiple alignment - * N - number of sequences - * - * Return: average identity score, or -1.0 on failure. - */ -float -CompareMultAlignments(char **kseqs, char **tseqs, int N) -{ - int i, j; /* counters for sequences */ - float score; - float tot_score = 0.0; - /* do all pairwise comparisons */ - for (i = 0; i < N; i++) - for (j = i+1; j < N; j++) - { - score = ComparePairAlignments(kseqs[i], kseqs[j], tseqs[i], tseqs[j]); - if (score < 0.0) return -1.0; - tot_score += score; - } - return ((tot_score * 2.0) / ((float) N * ((float) N - 1.0))); -} - - - -/* Function: CompareRefMultAlignments() - * - * Purpose: Same as above, except an array of reference coords for - * the canonical positions of the known alignment is also - * provided. - * - * Args: ref : 0..alen-1 array of 1/0 flags, 1 if canon - * kseqs : trusted alignment - * tseqs : test alignment - * N : number of sequences - * - * Return: average identity score, or -1.0 on failure - */ -float -CompareRefMultAlignments(int *ref, char **kseqs, char **tseqs, int N) -{ - int i, j; /* counters for sequences */ - float score; - float tot_score = 0.0; - - /* do all pairwise comparisons */ - for (i = 0; i < N; i++) - for (j = i+1; j < N; j++) - { - score = CompareRefPairAlignments(ref, kseqs[i], kseqs[j], tseqs[i], tseqs[j]); - if (score < 0.0) return -1.0; - tot_score += score; - } - return ((tot_score * 2.0)/ ((float) N * ((float) N - 1.0))); -} - -/* Function: PairwiseIdentity() - * - * Purpose: Calculate the pairwise fractional identity between - * two aligned sequences s1 and s2. This is simply - * (idents / MIN(len1, len2)). - * - * Note how many ways there are to calculate pairwise identity, - * because of the variety of choices for the denominator: - * idents/(idents+mismat) has the disadvantage that artifactual - * gappy alignments would have high "identities". - * idents/(AVG|MAX)(len1,len2) both have the disadvantage that - * alignments of fragments to longer sequences would have - * artifactually low "identities". - * - * Case sensitive; also, watch out in nucleic acid alignments; - * U/T RNA/DNA alignments will be counted as mismatches! - */ -float -PairwiseIdentity(char *s1, char *s2) -{ - int idents; /* total identical positions */ - int len1, len2; /* lengths of seqs */ - int x; /* position in aligned seqs */ - - idents = len1 = len2 = 0; - for (x = 0; s1[x] != '\0' && s2[x] != '\0'; x++) - { - if (!isgap(s1[x])) { - len1++; - if (s1[x] == s2[x]) idents++; - } - if (!isgap(s2[x])) len2++; - } - if (len2 < len1) len1 = len2; - return (len1 == 0 ? 0.0 : (float) idents / (float) len1); -} - - - -/* Function: AlignmentIdentityBySampling() - * Date: SRE, Mon Oct 19 14:29:01 1998 [St. Louis] - * - * Purpose: Estimate and return the average pairwise - * fractional identity of an alignment, - * using sampling. - * - * For use when there's so many sequences that - * an all vs. all rigorous calculation will - * take too long. - * - * Case sensitive! - * - * Args: aseq - aligned sequences - * L - length of alignment - * N - number of seqs in alignment - * nsample - number of samples - * - * Returns: average fractional identity, 0..1. - */ -float -AlignmentIdentityBySampling(char **aseq, int L, int N, int nsample) -{ - int x, i, j; /* counters */ - float sum; - - if (N < 2) return 1.0; - - sum = 0.; - for (x = 0; x < nsample; x++) - { - i = CHOOSE(N); - do { j = CHOOSE(N); } while (j == i); /* make sure j != i */ - sum += PairwiseIdentity(aseq[i], aseq[j]); - } - return sum / (float) nsample; -} - -/* Function: MajorityRuleConsensus() - * Date: SRE, Tue Mar 7 15:30:30 2000 [St. Louis] - * - * Purpose: Given a set of aligned sequences, produce a - * majority rule consensus sequence. If >50% nonalphabetic - * (usually meaning gaps) in the column, ignore the column. - * - * Args: aseq - aligned sequences, [0..nseq-1][0..alen-1] - * nseq - number of sequences - * alen - length of alignment - * - * Returns: ptr to allocated consensus sequence. - * Caller is responsible for free'ing this. - */ -char * -MajorityRuleConsensus(char **aseq, int nseq, int alen) -{ - char *cs; /* RETURN: consensus sequence */ - int count[27]; /* counts for a..z and gaps in a column */ - int idx,apos; /* counters for seq, column */ - int spos; /* position in cs */ - int x; /* counter for characters */ - int sym; - int max, bestx; - - cs = MallocOrDie(sizeof(char) * (alen+1)); - - for (spos=0,apos=0; apos < alen; apos++) - { - for (x = 0; x < 27; x++) count[x] = 0; - - for (idx = 0; idx < nseq; idx++) - { - if (isalpha(aseq[idx][apos])) { - sym = toupper(aseq[idx][apos]); - count[sym-'A']++; - } else { - count[26]++; - } - } - - if ((float) count[26] / (float) nseq <= 0.5) { - max = bestx = -1; - for (x = 0; x < 26; x++) - if (count[x] > max) { max = count[x]; bestx = x; } - cs[spos++] = (char) ('A' + bestx); - } - } - cs[spos] = '\0'; - return cs; -} diff --git a/forester/archive/RIO/others/hmmer/squid/alignio.c b/forester/archive/RIO/others/hmmer/squid/alignio.c deleted file mode 100644 index f9070a8..0000000 --- a/forester/archive/RIO/others/hmmer/squid/alignio.c +++ /dev/null @@ -1,643 +0,0 @@ -/***************************************************************** - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - *****************************************************************/ - -/* alignio.c - * SRE, Mon Jul 12 11:57:37 1993 - * RCS $Id: alignio.c,v 1.1.1.1 2005/03/22 08:34:27 cmzmasek Exp $ - * - * Input/output of sequence alignments. - */ - -#include -#include -#include -#include -#include "squid.h" - -#ifdef MEMDEBUG -#include "dbmalloc.h" -#endif - -/* Function: AllocAlignment() - * - * Purpose: Allocate space for an alignment, given the number - * of sequences and the alignment length in columns. - * - * Args: nseq - number of sequences - * alen - width of alignment - * ret_aseq - RETURN: alignment itself - * ainfo - RETURN: other info associated with alignment - * - * Return: (void) - * aseq, ainfo free'd by caller: FreeAlignment(aseq, &ainfo). - * note that ainfo itself is alloc'ed in caller, usually - * just by a "AINFO ainfo" definition. - */ -void -AllocAlignment(int nseq, int alen, char ***ret_aseq, AINFO *ainfo) -{ - char **aseq; - int idx; - - InitAinfo(ainfo); - - aseq = (char **) MallocOrDie (sizeof(char *) * nseq); - for (idx = 0; idx < nseq; idx++) - aseq[idx] = (char *) MallocOrDie (sizeof(char) * (alen+1)); - - ainfo->alen = alen; - ainfo->nseq = nseq; - - ainfo->wgt = (float *) MallocOrDie (sizeof(float) * nseq); - FSet(ainfo->wgt, nseq, 1.0); - - ainfo->sqinfo = (SQINFO *) MallocOrDie (sizeof(SQINFO) * nseq); - for (idx = 0; idx < nseq; idx++) - ainfo->sqinfo[idx].flags = 0; - - *ret_aseq = aseq; -} - - -/* Function: InitAinfo() - * Date: SRE, Tue Jan 19 10:16:02 1999 [St. Louis] - * - * Purpose: Initialize the fields in ainfo structure to - * default (null) values. Does nothing with - * fields that are dependent on nseq or alen. - * - * Args: ainfo - optional info structure for an alignment - * - * Returns: (void). ainfo is modified. - */ -void -InitAinfo(AINFO *ainfo) -{ - ainfo->name = NULL; - ainfo->desc = NULL; - ainfo->cs = NULL; - ainfo->rf = NULL; - ainfo->acc = NULL; - ainfo->au = NULL; - ainfo->flags = 0; - - ainfo->tc1 = ainfo->tc2 = 0.0; - ainfo->nc1 = ainfo->nc2 = 0.0; - ainfo->ga1 = ainfo->ga2 = 0.0; -} - - -/* Function: FreeAlignment() - * - * Purpose: Free the space allocated to alignment, names, and optional - * information. - * - * Args: aseqs - sequence alignment - * ainfo - associated alignment data. - */ -void -FreeAlignment(char **aseqs, AINFO *ainfo) -{ - int i; - - for (i = 0; i < ainfo->nseq; i++) - { - if (ainfo->sqinfo[i].flags & SQINFO_SS) free(ainfo->sqinfo[i].ss); - if (ainfo->sqinfo[i].flags & SQINFO_SA) free(ainfo->sqinfo[i].sa); - } - if (ainfo->cs != NULL) free(ainfo->cs); - if (ainfo->rf != NULL) free(ainfo->rf); - if (ainfo->name != NULL) free(ainfo->name); - if (ainfo->desc != NULL) free(ainfo->desc); - if (ainfo->acc != NULL) free(ainfo->acc); - if (ainfo->au != NULL) free(ainfo->au); - - free(ainfo->sqinfo); - free(ainfo->wgt); - Free2DArray((void **) aseqs, ainfo->nseq); -} - - - -/* Function: SAMizeAlignment() - * Date: SRE, Tue Jun 30 09:49:40 1998 [St. Louis] - * - * Purpose: Make a "best effort" attempt to convert an alignment - * to SAM gap format: - in delete col, . in insert col. - * Only works if alignment adheres to SAM's upper/lower - * case convention, which is true for instance of old - * HMMER alignments. - * - * Args: aseq - alignment to convert - * nseq - number of seqs in alignment - * alen - length of alignment - * - * Returns: (void) - */ -void -SAMizeAlignment(char **aseq, int nseq, int alen) -{ - int col; /* counter for aligned columns */ - int i; /* counter for seqs */ - int sawlower, sawupper, sawgap; - char gapchar; - - for (col = 0; col < alen; col++) - { - sawlower = sawupper = sawgap = 0; - /* pass 1: do we see only upper or lower? */ - for (i = 0; i < nseq; i++) - { - if (isgap(aseq[i][col])) { sawgap = 1; continue; } - if (isupper((int) aseq[i][col])) { sawupper = 1; continue; } - if (islower((int) aseq[i][col])) sawlower = 1; - } - /* select gap character for column */ - gapchar = '-'; /* default */ - if (sawlower && ! sawupper) gapchar = '.'; - - /* pass 2: set gap char */ - for (i = 0; i < nseq; i++) - if (isgap(aseq[i][col])) aseq[i][col] = gapchar; - } -} - - -/* Function: SAMizeAlignmentByGapFrac() - * Date: SRE, Tue Jun 30 10:58:38 1998 [St. Louis] - * - * Purpose: Convert an alignment to SAM's gap and case - * conventions, using gap fraction in a column - * to choose match versus insert columns. In match columns, - * residues are upper case and gaps are '-'. - * In insert columns, residues are lower case and - * gaps are '.' - * - * Args: aseq - aligned sequences - * nseq - number of sequences - * alen - length of alignment - * maxgap - if more gaps than this fraction, column is insert. - * - * Returns: (void) Characters in aseq may be altered. - */ -void -SAMizeAlignmentByGapFrac(char **aseq, int nseq, int alen, float maxgap) -{ - int apos; /* counter over columns */ - int idx; /* counter over sequences */ - int ngap; /* number of gaps seen */ - - for (apos = 0; apos < alen; apos++) - { - /* count gaps */ - ngap = 0; - for (idx = 0; idx < nseq; idx++) - if (isgap(aseq[idx][apos])) ngap++; - - /* convert to SAM conventions */ - if ((float) ngap / (float) nseq > maxgap) - { /* insert column */ - for (idx = 0; idx < nseq; idx++) - if (isgap(aseq[idx][apos])) aseq[idx][apos] = '.'; - else aseq[idx][apos] = (char) tolower((int) aseq[idx][apos]); - } - else - { /* match column */ - for (idx = 0; idx < nseq; idx++) - if (isgap(aseq[idx][apos])) aseq[idx][apos] = '-'; - else aseq[idx][apos] = (char) toupper((int) aseq[idx][apos]); - } - } -} - - - - -/* Function: MakeAlignedString() - * - * Purpose: Given a raw string of some type (secondary structure, say), - * align it to a given aseq by putting gaps wherever the - * aseq has gaps. - * - * Args: aseq: template for alignment - * alen: length of aseq - * ss: raw string to align to aseq - * ret_s: RETURN: aligned ss - * - * Return: 1 on success, 0 on failure (and squid_errno is set.) - * ret_ss is malloc'ed here and must be free'd by caller. - */ -int -MakeAlignedString(char *aseq, int alen, char *ss, char **ret_s) -{ - char *new; - int apos, rpos; - - new = (char *) MallocOrDie ((alen+1) * sizeof(char)); - for (apos = rpos = 0; apos < alen; apos++) - if (! isgap(aseq[apos])) - { - new[apos] = ss[rpos]; - rpos++; - } - else - new[apos] = '.'; - new[apos] = '\0'; - - if (rpos != strlen(ss)) - { squid_errno = SQERR_PARAMETER; free(new); return 0; } - *ret_s = new; - return 1; -} - - -/* Function: MakeDealignedString() - * - * Purpose: Given an aligned string of some type (either sequence or - * secondary structure, for instance), dealign it relative - * to a given aseq. Return a ptr to the new string. - * - * Args: aseq : template alignment - * alen : length of aseq - * ss: : string to make dealigned copy of; same length as aseq - * ret_s : RETURN: dealigned copy of ss - * - * Return: 1 on success, 0 on failure (and squid_errno is set) - * ret_s is alloc'ed here and must be freed by caller - */ -int -MakeDealignedString(char *aseq, int alen, char *ss, char **ret_s) -{ - char *new; - int apos, rpos; - - new = (char *) MallocOrDie ((alen+1) * sizeof(char)); - for (apos = rpos = 0; apos < alen; apos++) - if (! isgap(aseq[apos])) - { - new[rpos] = ss[apos]; - rpos++; - } - new[rpos] = '\0'; - if (alen != strlen(ss)) - { squid_errno = SQERR_PARAMETER; free(new); return 0; } - *ret_s = new; - return 1; -} - - -/* Function: DealignedLength() - * - * Purpose: Count the number of non-gap symbols in seq. - * (i.e. find the length of the unaligned sequence) - * - * Args: aseq - aligned sequence to count symbols in, \0 terminated - * - * Return: raw length of seq. - */ -int -DealignedLength(char *aseq) -{ - int rlen; - for (rlen = 0; *aseq; aseq++) - if (! isgap(*aseq)) rlen++; - return rlen; -} - - -/* Function: WritePairwiseAlignment() - * - * Purpose: Write a nice formatted pairwise alignment out, - * with a BLAST-style middle line showing identities - * as themselves (single letter) and conservative - * changes as '+'. - * - * Args: ofp - open fp to write to (stdout, perhaps) - * aseq1, aseq2 - alignments to write (not necessarily - * flushed right with gaps) - * name1, name2 - names of sequences - * spos1, spos2 - starting position in each (raw) sequence - * pam - PAM matrix; positive values define - * conservative changes - * indent - how many extra spaces to print on left - * - * Return: 1 on success, 0 on failure - */ -int -WritePairwiseAlignment(FILE *ofp, - char *aseq1, char *name1, int spos1, - char *aseq2, char *name2, int spos2, - int **pam, int indent) -{ - char sname1[11]; /* shortened name */ - char sname2[11]; - int still_going; /* True if writing another block */ - char buf1[61]; /* buffer for writing seq1; CPL+1*/ - char bufmid[61]; /* buffer for writing consensus */ - char buf2[61]; - char *s1, *s2; /* ptrs into each sequence */ - int count1, count2; /* number of symbols we're writing */ - int rpos1, rpos2; /* position in raw seqs */ - int rawcount1, rawcount2; /* number of nongap symbols written */ - int apos; - - strncpy(sname1, name1, 10); - sname1[10] = '\0'; - strtok(sname1, WHITESPACE); - - strncpy(sname2, name2, 10); - sname2[10] = '\0'; - strtok(sname2, WHITESPACE); - - s1 = aseq1; - s2 = aseq2; - rpos1 = spos1; - rpos2 = spos2; - - still_going = TRUE; - while (still_going) - { - still_going = FALSE; - - /* get next line's worth from both */ - strncpy(buf1, s1, 60); buf1[60] = '\0'; - strncpy(buf2, s2, 60); buf2[60] = '\0'; - count1 = strlen(buf1); - count2 = strlen(buf2); - - /* is there still more to go? */ - if ((count1 == 60 && s1[60] != '\0') || - (count2 == 60 && s2[60] != '\0')) - still_going = TRUE; - - /* shift seq ptrs by a line */ - s1 += count1; - s2 += count2; - - /* assemble the consensus line */ - for (apos = 0; apos < count1 && apos < count2; apos++) - { - if (!isgap(buf1[apos]) && !isgap(buf2[apos])) - { - if (buf1[apos] == buf2[apos]) - bufmid[apos] = buf1[apos]; - else if (pam[buf1[apos] - 'A'][buf2[apos] - 'A'] > 0) - bufmid[apos] = '+'; - else - bufmid[apos] = ' '; - } - else - bufmid[apos] = ' '; - } - bufmid[apos] = '\0'; - - rawcount1 = 0; - for (apos = 0; apos < count1; apos++) - if (!isgap(buf1[apos])) rawcount1++; - - rawcount2 = 0; - for (apos = 0; apos < count2; apos++) - if (!isgap(buf2[apos])) rawcount2++; - - (void) fprintf(ofp, "%*s%-10.10s %5d %s %5d\n", indent, "", - sname1, rpos1, buf1, rpos1 + rawcount1 -1); - (void) fprintf(ofp, "%*s %s\n", indent, "", - bufmid); - (void) fprintf(ofp, "%*s%-10.10s %5d %s %5d\n", indent, "", - sname2, rpos2, buf2, rpos2 + rawcount2 -1); - (void) fprintf(ofp, "\n"); - - rpos1 += rawcount1; - rpos2 += rawcount2; - } - - return 1; -} - - -/* Function: MingapAlignment() - * - * Purpose: Remove all-gap columns from a multiple sequence alignment - * and its associated data. The alignment is assumed to be - * flushed (all aseqs the same length). - */ -int -MingapAlignment(char **aseqs, AINFO *ainfo) -{ - int apos; /* position in original alignment */ - int mpos; /* position in new alignment */ - int idx; - - /* We overwrite aseqs, using its allocated memory. - */ - for (apos = 0, mpos = 0; aseqs[0][apos] != '\0'; apos++) - { - /* check for all-gap in column */ - for (idx = 0; idx < ainfo->nseq; idx++) - if (! isgap(aseqs[idx][apos])) - break; - if (idx == ainfo->nseq) continue; - - /* shift alignment and ainfo */ - if (mpos != apos) - { - for (idx = 0; idx < ainfo->nseq; idx++) - aseqs[idx][mpos] = aseqs[idx][apos]; - - if (ainfo->cs != NULL) ainfo->cs[mpos] = ainfo->cs[apos]; - if (ainfo->rf != NULL) ainfo->rf[mpos] = ainfo->rf[apos]; - } - mpos++; - } - /* null terminate everything */ - for (idx = 0; idx < ainfo->nseq; idx++) - aseqs[idx][mpos] = '\0'; - ainfo->alen = mpos; /* set new length */ - if (ainfo->cs != NULL) ainfo->cs[mpos] = '\0'; - if (ainfo->rf != NULL) ainfo->rf[mpos] = '\0'; - return 1; -} - - - -/* Function: RandomAlignment() - * - * Purpose: Create a random alignment from raw sequences. - * - * Ideally, we would like to sample an alignment from the - * space of possible alignments according to its probability, - * given a prior probability distribution for alignments. - * I don't see how to describe such a distribution, let alone - * sample it. - * - * This is a rough approximation that tries to capture some - * desired properties. We assume the alignment is generated - * by a simple HMM composed of match and insert states. - * Given parameters (pop, pex) for the probability of opening - * and extending an insertion, we can find the expected number - * of match states, M, in the underlying model for each sequence. - * We use an average M taken over all the sequences (this is - * an approximation. The expectation of M given all the sequence - * lengths is a nasty-looking summation.) - * - * M = len / ( 1 + pop ( 1 + 1/ (1-pex) ) ) - * - * Then, we assign positions in each raw sequence onto the M match - * states and M+1 insert states of this "HMM", by rolling random - * numbers and inserting the (rlen-M) inserted positions randomly - * into the insert slots, taking into account the relative probability - * of open vs. extend. - * - * The resulting alignment has two desired properties: insertions - * tend to follow the HMM-like exponential distribution, and - * the "sparseness" of the alignment is controllable through - * pop and pex. - * - * Args: rseqs - raw sequences to "align", 0..nseq-1 - * sqinfo - array of 0..nseq-1 info structures for the sequences - * nseq - number of sequences - * pop - probability to open insertion (0 minlen) M = minlen; - - /* make arrays that count insertions in M+1 possible insert states - */ - ins = (int **) MallocOrDie (sizeof(int *) * nseq); - master_ins = (int *) MallocOrDie (sizeof(int) * (M+1)); - for (idx = 0; idx < nseq; idx++) - { - ins[idx] = (int *) MallocOrDie (sizeof(int) * (M+1)); - for (rpos = 0; rpos <= M; rpos++) - ins[idx][rpos] = 0; - } - /* normalize */ - pop = pop / (pop+pex); - pex = 1.0 - pop; - /* make insertions for individual sequences */ - for (idx = 0; idx < nseq; idx++) - { - apos = -1; - for (rpos = 0; rpos < rlen[idx]-M; rpos++) - { - if (sre_random() < pop || apos == -1) /* open insertion */ - apos = CHOOSE(M+1); /* choose 0..M */ - ins[idx][apos]++; - } - } - /* calculate master_ins, max inserts */ - alen = M; - for (apos = 0; apos <= M; apos++) - { - master_ins[apos] = 0; - for (idx = 0; idx < nseq; idx++) - if (ins[idx][apos] > master_ins[apos]) - master_ins[apos] = ins[idx][apos]; - alen += master_ins[apos]; - } - - - /* Now, construct alignment - */ - aseqs = (char **) MallocOrDie (sizeof (char *) * nseq); - for (idx = 0; idx < nseq; idx++) - aseqs[idx] = (char *) MallocOrDie (sizeof(char) * (alen+1)); - for (idx = 0; idx < nseq; idx++) - { - apos = rpos = 0; - - for (statepos = 0; statepos <= M; statepos++) - { - for (count = 0; count < ins[idx][statepos]; count++) - aseqs[idx][apos++] = rseqs[idx][rpos++]; - for (; count < master_ins[statepos]; count++) - aseqs[idx][apos++] = ' '; - - if (statepos != M) - aseqs[idx][apos++] = rseqs[idx][rpos++]; - } - aseqs[idx][alen] = '\0'; - } - ainfo->flags = 0; - ainfo->alen = alen; - ainfo->nseq = nseq; - ainfo->sqinfo = (SQINFO *) MallocOrDie (sizeof(SQINFO) * nseq); - for (idx = 0; idx < nseq; idx++) - SeqinfoCopy(&(ainfo->sqinfo[idx]), &(sqinfo[idx])); - - free(rlen); - free(master_ins); - Free2DArray((void **) ins, nseq); - *ret_aseqs = aseqs; - return 1; -} - -/* Function: AlignmentHomogenousGapsym() - * Date: SRE, Sun Mar 19 19:37:12 2000 [wren, St. Louis] - * - * Purpose: Sometimes we've got to convert alignments to - * a lowest common denominator, and we need - * a single specific gap character -- for example, - * PSI-BLAST blastpgp -B takes a very simplistic - * alignment input format which appears to only - * allow '-' as a gap symbol. - * - * Anything matching the isgap() macro is - * converted. - * - * Args: aseq - aligned character strings, [0..nseq-1][0..alen-1] - * nseq - number of aligned strings - * alen - length of alignment - * gapsym - character to use for gaps. - * - * Returns: void ("never fails") - */ -void -AlignmentHomogenousGapsym(char **aseq, int nseq, int alen, char gapsym) -{ - int i, apos; - - for (i = 0; i < nseq; i++) - for (apos = 0; apos < alen; apos++) - if (isgap(aseq[i][apos])) aseq[i][apos] = gapsym; -} diff --git a/forester/archive/RIO/others/hmmer/squid/alistat_main.c b/forester/archive/RIO/others/hmmer/squid/alistat_main.c deleted file mode 100644 index b7c2c2f..0000000 --- a/forester/archive/RIO/others/hmmer/squid/alistat_main.c +++ /dev/null @@ -1,273 +0,0 @@ -/***************************************************************** - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - *****************************************************************/ - -/* alistat_main.c - * Fri Jan 27 10:41:41 1995 - * CVS $Id: alistat_main.c,v 1.1.1.1 2005/03/22 08:34:31 cmzmasek Exp $ - * - * Look at an alignment file, determine some simple statistics. - */ - -#include -#include -#include -#include "squid.h" -#include "msa.h" - -static char banner[] = "alistat - show some simple statistics on an alignment file"; - -static char usage[] = "\ -Usage: alistat [-options] \n\ - Available options:\n\ - -a : report per-sequence info, not just a summary\n\ - -f : fast: estimate average %id by sampling (not compatible with -a)\n\ - -h : help: display usage and version\n\ - -q : quiet: suppress verbose header\n\ -"; - -static char experts[] = "\ - Expert options:\n\ - --consensus : write majority rule consensus sequence(s) in FASTA\n\ - format to file \n\ - --identmx : save a report on all NxN pairwise identities to file \n\ - --informat : specify alignment file format \n\ - allowed formats: SELEX, MSF, Clustal, a2m, PHYLIP\n\ -"; - -struct opt_s OPTIONS[] = { - { "-a", TRUE, sqdARG_NONE }, - { "-f", TRUE, sqdARG_NONE }, - { "-h", TRUE, sqdARG_NONE }, - { "-q", TRUE, sqdARG_NONE }, - { "--consensus", FALSE, sqdARG_STRING }, - { "--identmx", FALSE, sqdARG_STRING }, - { "--informat", FALSE, sqdARG_STRING }, -}; -#define NOPTIONS (sizeof(OPTIONS) / sizeof(struct opt_s)) - -int -main(int argc, char **argv) -{ - char *afile; /* name of aligned sequence file */ - MSAFILE *afp; /* pointer to open alignment file*/ - MSA *msa; /* multiple sequence alignment */ - int fmt; /* format of afile */ - int rlen; /* raw sequence length */ - int nres; /* number of residues */ - float **imx; /* identity matrix */ - int i,j; - int small, large; - int bestj, worstj; - float sum, best, worst; - float worst_worst, worst_best, best_best; - float avgid; - int nsample; - - int allreport; - int do_fast; - int be_quiet; - char *consfile; - FILE *consfp = NULL; - char *identmx_report; /* file to save identity matrix info to */ - FILE *identmx_fp = NULL; - - char *optname; - char *optarg; - int optind; - - /* These inits are solely to silence gcc warnings about - * uninitialized variables - */ - worst_worst = worst_best = best_best = 0.0; - bestj = worstj = -1; - - /*********************************************** - * Parse command line - ***********************************************/ - - fmt = MSAFILE_UNKNOWN; /* by default, we autodetect file format */ - allreport = FALSE; - do_fast = FALSE; - be_quiet = FALSE; - consfile = NULL; - identmx_report = NULL; - - while (Getopt(argc, argv, OPTIONS, NOPTIONS, usage, - &optind, &optname, &optarg)) - { - if (strcmp(optname, "-a") == 0) { allreport = TRUE; } - else if (strcmp(optname, "-f") == 0) { do_fast = TRUE; } - else if (strcmp(optname, "-q") == 0) { be_quiet = TRUE; } - else if (strcmp(optname, "--consensus") == 0) { consfile = optarg; } - else if (strcmp(optname, "--identmx") == 0) { identmx_report = optarg; } - else if (strcmp(optname, "--informat") == 0) { - fmt = String2SeqfileFormat(optarg); - if (fmt == MSAFILE_UNKNOWN) - Die("unrecognized sequence file format \"%s\"", optarg); - if (! IsAlignmentFormat(fmt)) - Die("%s is an unaligned format, can't read as an alignment", optarg); - } - else if (strcmp(optname, "-h") == 0) { - Banner(stdout, banner); - puts(usage); - puts(experts); - exit(EXIT_SUCCESS); - } - } - - if (argc - optind != 1) Die("Incorrect number of arguments.\n%s\n", usage); - afile = argv[optind]; - - if (do_fast && allreport) - Die("Verbose reports (-a, --identmx) are incompatible with fast sampling (-f)"); - if (do_fast && identmx_report != NULL) - Die("Verbose reports (-a, --identmx) are incompatible with fast sampling (-f)"); - - if (! be_quiet) - Banner(stdout, banner); - - /*********************************************** - * Loop over every alignment in the file. - ***********************************************/ - - if ((afp = MSAFileOpen(afile, fmt, NULL)) == NULL) - Die("Alignment file %s could not be opened for reading", afile); - - if (consfile != NULL && (consfp = fopen(consfile, "w")) == NULL) - Die("Failed to open consensus sequence file %s for writing", consfile); - - if (identmx_report != NULL && (identmx_fp = fopen(identmx_report, "w")) == NULL) - Die("Failed to open identity matrix report file %s for writing", identmx_report); - - while ((msa = MSAFileRead(afp)) != NULL) - { - for (i = 0; i < msa->nseq; i++) s2upper(msa->aseq[i]); - - /* Statistics we always collect: - * unaligned sequence lengths; mean and range - */ - nres = 0; - small = large = -1; - for (i = 0; i < msa->nseq; i++) - { - rlen = DealignedLength(msa->aseq[i]); - nres += rlen; - if (small == -1 || rlen < small) small = rlen; - if (large == -1 || rlen > large) large = rlen; - } - - /* Statistics we have to be careful about - * collecting, because of time constraints on NxN operations - */ - if (do_fast) - { - nsample = 1000; - avgid = AlignmentIdentityBySampling(msa->aseq, msa->alen, msa->nseq, - nsample); - } - else - { - /* In a full report, for each sequence, find the best relative, - * and the worst relative. For overall statistics, save the - * worst best (most distant single seq) and the best best - * (most closely related pair) and the worst worst (most - * distantly related pair) and yes, I know it's confusing. - */ - - MakeIdentityMx(msa->aseq, msa->nseq, &imx); - if (allreport) { - printf(" %-15s %5s %7s %-15s %7s %-15s\n", - "NAME", "LEN", "HIGH ID", "(TO)", "LOW ID", "(TO)"); - printf(" --------------- ----- ------- --------------- ------- ---------------\n"); - } - - /* Print the identity matrix report: one line per pair of sequences. - */ - if (identmx_report != NULL) - { - for (i = 0; i < msa->nseq; i++) - for (j = i+1; j < msa->nseq; j++) - fprintf(identmx_fp, "%-4d %-4d %-15s %-15s %.3f\n", - i, j, msa->sqname[i], msa->sqname[j], imx[i][j]); - } - - sum = 0.0; - worst_best = 1.0; - best_best = 0.0; - worst_worst = 1.0; - for (i = 0; i < msa->nseq; i++) - { - worst = 1.0; - best = 0.0; - for (j = 0; j < msa->nseq; j++) - { /* closest seq to this one = best */ - if (i != j && imx[i][j] > best) - { best = imx[i][j]; bestj = j; } - if (imx[i][j] < worst) - { worst = imx[i][j]; worstj = j; } - } - - if (allreport) - printf("* %-15s %5d %7.1f %-15s %7.1f %-15s\n", - msa->sqname[i], DealignedLength(msa->aseq[i]), - best * 100., msa->sqname[bestj], - worst * 100., msa->sqname[worstj]); - - if (best > best_best) best_best = best; - if (best < worst_best) worst_best = best; - if (worst < worst_worst) worst_worst = worst; - for (j = 0; j < i; j++) - sum += imx[i][j]; - - } - avgid = sum / (float) (msa->nseq * (msa->nseq-1)/2.0); - if (allreport) puts(""); - FMX2Free(imx); - } - - /* Print output. - * Some fields aren't available if -f (fast) was chosen. - */ - if (msa->name != NULL) - printf("Alignment name: %s\n", msa->name); - printf("Format: %s\n", SeqfileFormat2String(afp->format)); - printf("Number of sequences: %d\n", msa->nseq); - printf("Total # residues: %d\n", nres); - printf("Smallest: %d\n", small); - printf("Largest: %d\n", large); - printf("Average length: %.1f\n", (float) nres / (float) msa->nseq); - printf("Alignment length: %d\n", msa->alen); - printf("Average identity: %.0f%%\n", 100.*avgid); - if (! do_fast) { - printf("Most related pair: %.0f%%\n", 100.*best_best); - printf("Most unrelated pair: %.0f%%\n", 100.*worst_worst); - printf("Most distant seq: %.0f%%\n", 100.*worst_best); - } - - /* Save majority rule consensus sequence if we were asked - */ - if (consfile != NULL) { - char *cs; - cs = MajorityRuleConsensus(msa->aseq, msa->nseq, msa->alen); - WriteSimpleFASTA(consfp, cs, - msa->name != NULL? msa->name : "consensus", - msa->desc); - free(cs); - printf("Consensus: written to %s\n", consfile); - } - - puts("//"); - MSAFree(msa); - } - - MSAFileClose(afp); - if (consfile != NULL) fclose(consfp); - return 0; -} diff --git a/forester/archive/RIO/others/hmmer/squid/clustal.c b/forester/archive/RIO/others/hmmer/squid/clustal.c deleted file mode 100644 index 5fbafb0..0000000 --- a/forester/archive/RIO/others/hmmer/squid/clustal.c +++ /dev/null @@ -1,179 +0,0 @@ -/***************************************************************** - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - *****************************************************************/ - -/* clustal.c - * SRE, Sun Jun 6 17:50:45 1999 [bus from Madison, 1999 worm mtg] - * - * Import/export of ClustalV/W multiple sequence alignment - * formatted files. Derivative of msf.c; MSF is a pretty - * generic interleaved format. - * - * RCS $Id: clustal.c,v 1.1.1.1 2005/03/22 08:34:27 cmzmasek Exp $ - */ - -#include -#include -#include -#include -#include "squid.h" -#include "msa.h" - -#ifdef TESTDRIVE_CLUSTAL -/***************************************************************** - * msf.c test driver: - * cc -DTESTDRIVE_CLUSTAL -g -O2 -Wall -o test clustal.c msa.c gki.c sqerror.c sre_string.c file.c hsregex.c sre_math.c sre_ctype.c -lm - * - */ -int -main(int argc, char **argv) -{ - MSAFILE *afp; - MSA *msa; - char *file; - - file = argv[1]; - - if ((afp = MSAFileOpen(file, MSAFILE_CLUSTAL, NULL)) == NULL) - Die("Couldn't open %s\n", file); - - while ((msa = ReadClustal(afp)) != NULL) - { - WriteClustal(stdout, msa); - MSAFree(msa); - } - - MSAFileClose(afp); - exit(0); -} -/******************************************************************/ -#endif /* testdrive_clustal */ - - -/* Function: ReadClustal() - * Date: SRE, Sun Jun 6 17:53:49 1999 [bus from Madison, 1999 worm mtg] - * - * Purpose: Parse an alignment read from an open Clustal format - * alignment file. Clustal is a single-alignment format. - * Return the alignment, or NULL if we have no data. - * - * Args: afp - open alignment file - * - * Returns: MSA * - an alignment object - * caller responsible for an MSAFree() - * NULL if no more alignments - * - * Diagnostics: - * Will Die() here with a (potentially) useful message - * if a parsing error occurs. - */ -MSA * -ReadClustal(MSAFILE *afp) -{ - MSA *msa; - char *s; - int slen; - int sqidx; - char *name; - char *seq; - char *s2; - - if (feof(afp->f)) return NULL; - - /* Skip until we see the CLUSTAL header - */ - while ((s = MSAFileGetLine(afp)) != NULL) - { - if (strncmp(s, "CLUSTAL", 7) == 0 && - strstr(s, "multiple sequence alignment") != NULL) - break; - } - if (s == NULL) return NULL; - - msa = MSAAlloc(10, 0); - - /* Now we're in the sequence section. - * As discussed above, if we haven't seen a sequence name, then we - * don't include the sequence in the alignment. - * Watch out for conservation markup lines that contain *.: chars - */ - while ((s = MSAFileGetLine(afp)) != NULL) - { - if ((name = sre_strtok(&s, WHITESPACE, NULL)) == NULL) continue; - if ((seq = sre_strtok(&s, WHITESPACE, &slen)) == NULL) continue; - s2 = sre_strtok(&s, "\n", NULL); - - /* The test for a conservation markup line - */ - if (strpbrk(name, ".*:") != NULL && strpbrk(seq, ".*:") != NULL) - continue; - if (s2 != NULL) - Die("Parse failed at line %d, file %s: possibly using spaces as gaps", - afp->linenumber, afp->fname); - - /* It's not blank, and it's not a coord line: must be sequence - */ - sqidx = MSAGetSeqidx(msa, name, msa->lastidx+1); - msa->lastidx = sqidx; - msa->sqlen[sqidx] = sre_strcat(&(msa->aseq[sqidx]), msa->sqlen[sqidx], seq, slen); - } - - MSAVerifyParse(msa); /* verifies, and also sets alen and wgt. */ - return msa; -} - - -/* Function: WriteClustal() - * Date: SRE, Sun Jun 6 18:12:47 1999 [bus from Madison, worm mtg 1999] - * - * Purpose: Write an alignment in Clustal format to an open file. - * - * Args: fp - file that's open for writing. - * msa - alignment to write. - * - * Returns: (void) - */ -void -WriteClustal(FILE *fp, MSA *msa) -{ - int idx; /* counter for sequences */ - int len; /* tmp variable for name lengths */ - int namelen; /* maximum name length used */ - int pos; /* position counter */ - char buf[64]; /* buffer for writing seq */ - int cpl = 50; /* char per line (< 64) */ - - /* calculate max namelen used */ - namelen = 0; - for (idx = 0; idx < msa->nseq; idx++) - if ((len = strlen(msa->sqname[idx])) > namelen) - namelen = len; - - fprintf(fp, "CLUSTAL W(1.5) multiple sequence alignment\n"); - - /***************************************************** - * Write the sequences - *****************************************************/ - - for (pos = 0; pos < msa->alen; pos += cpl) - { - fprintf(fp, "\n"); /* Blank line between sequence blocks */ - for (idx = 0; idx < msa->nseq; idx++) - { - strncpy(buf, msa->aseq[idx] + pos, cpl); - buf[cpl] = '\0'; - fprintf(fp, "%*s %s\n", namelen, msa->sqname[idx], buf); - } - } - - return; -} - - - diff --git a/forester/archive/RIO/others/hmmer/squid/cluster.c b/forester/archive/RIO/others/hmmer/squid/cluster.c deleted file mode 100644 index 538ae76..0000000 --- a/forester/archive/RIO/others/hmmer/squid/cluster.c +++ /dev/null @@ -1,544 +0,0 @@ -/***************************************************************** - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - *****************************************************************/ - -/* cluster.c - * SRE, Sun Jul 18 09:49:47 1993 - * moved to squid Thu Mar 3 08:42:57 1994 - * RCS $Id: cluster.c,v 1.1.1.1 2005/03/22 08:34:27 cmzmasek Exp $ - * - * almost identical to bord.c, from fd - * also now contains routines for constructing difference matrices - * from alignments - * - * "branch ordering": Input a symmetric or upper-right-diagonal - * NxN difference matrix (usually constructed by pairwise alignment - * and similarity calculations for N sequences). Use the simple - * cluster analysis part of the Fitch/Margoliash tree-building algorithm - * (as described by Fitch and Margoliash 1967 as well as Feng - * and Doolittle 1987) to calculate the topology of an "evolutionary - * tree" consistent with the difference matrix. Returns an array - * which represents the tree. - * - * The input difference matrix is just an NxN matrix of floats. - * A good match is a small difference score (the algorithm is going - * to search for minima among the difference scores). The original difference - * matrix remains unchanged by the calculations. - * - * The output requires some explanation. A phylogenetic - * tree is a binary tree, with N "leaves" and N-1 "nodes". The - * topology of the tree may be completely described by N-1 structures - * containing two pointers; each pointer points to either a leaf - * or another node. Here, this is implemented with integer indices - * rather than pointers. An array of N-1 pairs of ints is returned. - * If the index is in the range (0..N-1), it is a "leaf" -- the - * number of one of the sequences. If the index is in the range - * (N..2N-2), it is another "node" -- (index-N) is the index - * of the node in the returned array. - * - * If both indices of a member of the returned array point to - * nodes, the tree is "compound": composed of more than one - * cluster of related sequences. - * - * The higher-numbered elements of the returned array were the - * first constructed, and hence represent the distal tips - * of the tree -- the most similar sequences. The root - * is node 0. - ****************************************************************** - * - * Algorithm - * - * INITIALIZATIONS: - * - copy the difference matrix (otherwise the caller's copy would - * get destroyed by the operations of this algorithm). If - * it's asymmetric, make it symmetric. - * - make a (0..N-1) array of ints to keep track of the indices in - * the difference matrix as they get swapped around. Initialize - * this matrix to 0..N-1. - * - make a (0..N-2) array of int[2] to store the results (the tree - * topology). Doesn't need to be initialized. - * - keep track of a "N'", the current size of the difference - * matrix being operated on. - * - * PROCESSING THE DIFFERENCE MATRIX: - * - for N' = N down to N' = 2 (N-1 steps): - * - in the half-diagonal N'xN' matrix, find the indices i,j at which - * there's the minimum difference score - * - * Store the results: - * - at position N'-2 of the result array, store coords[i] and - * coords[j]. - * - * Move i,j rows, cols to the outside edges of the matrix: - * - swap row i and row N'-2 - * - swap row j and row N'-1 - * - swap column i and column N'-2 - * - swap column j and column N'-1 - * - swap indices i, N'-2 in the index array - * - swap indices j, N'-1 in the index array - * - * Build a average difference score for differences to i,j: - * - for all columns, find avg difference between rows i and j and store in row i: - * row[i][col] = (row[i][col] + row[j][col]) / 2.0 - * - copy the contents of row i to column i (it's a symmetric - * matrix, no need to recalculate) - * - store an index N'+N-2 at position N'-2 of the index array: means - * that this row/column is now a node rather than a leaf, and - * contains minimum values - * - * Continue: - * - go to the next N' - * - * GARBAGE COLLECTION & RETURN. - * - ********************************************************************** - * - * References: - * - * Feng D-F and R.F. Doolittle. "Progressive sequence alignment as a - * prerequisite to correct phylogenetic trees." J. Mol. Evol. - * 25:351-360, 1987. - * - * Fitch W.M. and Margoliash E. "Construction of phylogenetic trees." - * Science 155:279-284, 1967. - * - ********************************************************************** - * - * SRE, 18 March 1992 (bord.c) - * SRE, Sun Jul 18 09:52:14 1993 (cluster.c) - * added to squid Thu Mar 3 09:13:56 1994 - ********************************************************************** - * Mon May 4 09:47:02 1992: keep track of difference scores at each node - */ - - -#include -#include -#include - -#include "squid.h" -#include "sqfuncs.h" - -#ifdef MEMDEBUG -#include "dbmalloc.h" -#endif - -/* Function: Cluster() - * - * Purpose: Cluster analysis on a distance matrix. Constructs a - * phylogenetic tree which contains the topology - * and info for each node: branch lengths, how many - * sequences are included under the node, and which - * sequences are included under the node. - * - * Args: dmx - the NxN distance matrix ( >= 0.0, larger means more diverged) - * N - size of mx (number of sequences) - * mode - CLUSTER_MEAN, CLUSTER_MAX, or CLUSTER_MIN - * ret_tree- RETURN: the tree - * - * Return: 1 on success, 0 on failure. - * The caller is responsible for freeing the tree's memory, - * by calling FreePhylo(tree, N). - */ -int -Cluster(float **dmx, int N, enum clust_strategy mode, struct phylo_s **ret_tree) -{ - struct phylo_s *tree; /* (0..N-2) phylogenetic tree */ - float **mx; /* copy of difference matrix */ - int *coord; /* (0..N-1), indices for matrix coords */ - int i, j; /* coords of minimum difference */ - int idx; /* counter over seqs */ - int Np; /* N', a working copy of N */ - int row, col; /* loop variables */ - float min; /* best minimum score found */ - float *trow; /* tmp pointer for swapping rows */ - float tcol; /* tmp storage for swapping cols */ - float *diff; /* (0..N-2) difference scores at nodes */ - int swapfoo; /* for SWAP() macro */ - - /************************** - * Initializations. - **************************/ - /* We destroy the matrix we work on, so make a copy of dmx. - */ - mx = MallocOrDie (sizeof(float *) * N); - for (i = 0; i < N; i++) - { - mx[i] = MallocOrDie (sizeof(float) * N); - for (j = 0; j < N; j++) - mx[i][j] = dmx[i][j]; - } - /* coord array alloc, (0..N-1) */ - coord = MallocOrDie (N * sizeof(int)); - diff = MallocOrDie ((N-1) * sizeof(float)); - /* init the coord array to 0..N-1 */ - for (col = 0; col < N; col++) coord[col] = col; - for (i = 0; i < N-1; i++) diff[i] = 0.0; - - /* tree array alloc, (0..N-2) */ - if ((tree = AllocPhylo(N)) == NULL) Die("AllocPhylo() failed"); - - /********************************* - * Process the difference matrix - *********************************/ - - /* N-prime, for an NxN down to a 2x2 diffmx */ - j= 0; /* just to silence gcc uninit warnings */ - for (Np = N; Np >= 2; Np--) - { - /* find a minimum on the N'xN' matrix*/ - min = 999999.; - for (row = 0; row < Np; row++) - for (col = row+1; col < Np; col++) - if (mx[row][col] < min) - { - min = mx[row][col]; - i = row; - j = col; - } - - /* We're clustering row i with col j. write necessary - * data into a node on the tree - */ - /* topology info */ - tree[Np-2].left = coord[i]; - tree[Np-2].right = coord[j]; - if (coord[i] >= N) tree[coord[i]-N].parent = N + Np - 2; - if (coord[j] >= N) tree[coord[j]-N].parent = N + Np - 2; - - /* keep score info */ - diff[Np-2] = tree[Np-2].diff = min; - - /* way-simple branch length estimation */ - tree[Np-2].lblen = tree[Np-2].rblen = min; - if (coord[i] >= N) tree[Np-2].lblen -= diff[coord[i]-N]; - if (coord[j] >= N) tree[Np-2].rblen -= diff[coord[j]-N]; - - /* number seqs included at node */ - if (coord[i] < N) - { - tree[Np-2].incnum ++; - tree[Np-2].is_in[coord[i]] = 1; - } - else - { - tree[Np-2].incnum += tree[coord[i]-N].incnum; - for (idx = 0; idx < N; idx++) - tree[Np-2].is_in[idx] |= tree[coord[i]-N].is_in[idx]; - } - - if (coord[j] < N) - { - tree[Np-2].incnum ++; - tree[Np-2].is_in[coord[j]] = 1; - } - else - { - tree[Np-2].incnum += tree[coord[j]-N].incnum; - for (idx = 0; idx < N; idx++) - tree[Np-2].is_in[idx] |= tree[coord[j]-N].is_in[idx]; - } - - - /* Now build a new matrix, by merging row i with row j and - * column i with column j; see Fitch and Margoliash - */ - /* Row and column swapping. */ - /* watch out for swapping i, j away: */ - if (i == Np-1 || j == Np-2) - SWAP(i,j); - - if (i != Np-2) - { - /* swap row i, row N'-2 */ - trow = mx[Np-2]; mx[Np-2] = mx[i]; mx[i] = trow; - /* swap col i, col N'-2 */ - for (row = 0; row < Np; row++) - { - tcol = mx[row][Np-2]; - mx[row][Np-2] = mx[row][i]; - mx[row][i] = tcol; - } - /* swap coord i, coord N'-2 */ - SWAP(coord[i], coord[Np-2]); - } - - if (j != Np-1) - { - /* swap row j, row N'-1 */ - trow = mx[Np-1]; mx[Np-1] = mx[j]; mx[j] = trow; - /* swap col j, col N'-1 */ - for (row = 0; row < Np; row++) - { - tcol = mx[row][Np-1]; - mx[row][Np-1] = mx[row][j]; - mx[row][j] = tcol; - } - /* swap coord j, coord N'-1 */ - SWAP(coord[j], coord[Np-1]); - } - - /* average i and j together; they're now - at Np-2 and Np-1 though */ - i = Np-2; - j = Np-1; - /* merge by saving avg of cols of row i and row j */ - for (col = 0; col < Np; col++) - { - switch (mode) { - case CLUSTER_MEAN: mx[i][col] =(mx[i][col]+ mx[j][col]) / 2.0; break; - case CLUSTER_MIN: mx[i][col] = MIN(mx[i][col], mx[j][col]); break; - case CLUSTER_MAX: mx[i][col] = MAX(mx[i][col], mx[j][col]); break; - default: mx[i][col] =(mx[i][col]+ mx[j][col]) / 2.0; break; - } - } - /* copy those rows to columns */ - for (col = 0; col < Np; col++) - mx[col][i] = mx[i][col]; - /* store the node index in coords */ - coord[Np-2] = Np+N-2; - } - - /************************** - * Garbage collection and return - **************************/ - Free2DArray((void **) mx, N); - free(coord); - free(diff); - *ret_tree = tree; - return 1; -} - -/* Function: AllocPhylo() - * - * Purpose: Allocate space for a phylo_s array. N-1 structures - * are allocated, one for each node; in each node, a 0..N - * is_in flag array is also allocated and initialized to - * all zeros. - * - * Args: N - size; number of sequences being clustered - * - * Return: pointer to the allocated array - * - */ -struct phylo_s * -AllocPhylo(int N) -{ - struct phylo_s *tree; - int i; - - if ((tree = (struct phylo_s *) malloc ((N-1) * sizeof(struct phylo_s))) == NULL) - return NULL; - - for (i = 0; i < N-1; i++) - { - tree[i].diff = 0.0; - tree[i].lblen = tree[i].rblen = 0.0; - tree[i].left = tree[i].right = tree[i].parent = -1; - tree[i].incnum = 0; - if ((tree[i].is_in = (char *) calloc (N, sizeof(char))) == NULL) - return NULL; - } - return tree; -} - - -/* Function: FreePhylo() - * - * Purpose: Free a clustree array that was built to cluster N sequences. - * - * Args: tree - phylogenetic tree to free - * N - size of clustree; number of sequences it clustered - * - * Return: (void) - */ -void -FreePhylo(struct phylo_s *tree, int N) -{ - int idx; - - for (idx = 0; idx < N-1; idx++) - free(tree[idx].is_in); - free(tree); -} - - -/* Function: MakeDiffMx() - * - * Purpose: Given a set of aligned sequences, construct - * an NxN fractional difference matrix. (i.e. 1.0 is - * completely different, 0.0 is exactly identical). - * - * Args: aseqs - flushed, aligned sequences - * num - number of aseqs - * ret_dmx - RETURN: difference matrix - * - * Return: 1 on success, 0 on failure. - * Caller must free diff matrix with FMX2Free(dmx) - */ -void -MakeDiffMx(char **aseqs, int num, float ***ret_dmx) -{ - float **dmx; /* RETURN: distance matrix */ - int i,j; /* counters over sequences */ - - /* Allocate 2D float matrix - */ - dmx = FMX2Alloc(num, num); - - /* Calculate distances; symmetric matrix - * record difference, not identity (1 - identity) - */ - for (i = 0; i < num; i++) - for (j = i; j < num; j++) - dmx[i][j] = dmx[j][i] = 1.0 - PairwiseIdentity(aseqs[i], aseqs[j]); - - *ret_dmx = dmx; - return; -} - -/* Function: MakeIdentityMx() - * - * Purpose: Given a set of aligned sequences, construct - * an NxN fractional identity matrix. (i.e. 1.0 is - * completely identical, 0.0 is completely different). - * Virtually identical to MakeDiffMx(). It's - * less confusing to have two distinct functions, I find. - * - * Args: aseqs - flushed, aligned sequences - * num - number of aseqs - * ret_imx - RETURN: identity matrix (caller must free) - * - * Return: 1 on success, 0 on failure. - * Caller must free imx using FMX2Free(imx) - */ -void -MakeIdentityMx(char **aseqs, int num, float ***ret_imx) -{ - float **imx; /* RETURN: identity matrix */ - int i,j; /* counters over sequences */ - - /* Allocate 2D float matrix - */ - imx = FMX2Alloc(num, num); - - /* Calculate distances, symmetric matrix - */ - for (i = 0; i < num; i++) - for (j = i; j < num; j++) - imx[i][j] = imx[j][i] = PairwiseIdentity(aseqs[i], aseqs[j]); - - *ret_imx = imx; - return; -} - - - -/* Function: PrintNewHampshireTree() - * - * Purpose: Print out a tree in the "New Hampshire" standard - * format. See PHYLIP's draw.doc for a definition of - * the New Hampshire format. - * - * Like a CFG, we generate the format string left to - * right by a preorder tree traversal. - * - * Args: fp - file to print to - * ainfo- alignment info, including sequence names - * tree - tree to print - * N - number of leaves - * - */ -void -PrintNewHampshireTree(FILE *fp, AINFO *ainfo, struct phylo_s *tree, int N) -{ - struct intstack_s *stack; - int code; - float *blen; - int docomma; - - blen = (float *) MallocOrDie (sizeof(float) * (2*N-1)); - stack = InitIntStack(); - PushIntStack(stack, N); /* push root on stack */ - docomma = FALSE; - - /* node index code: - * 0..N-1 = leaves; indexes of sequences. - * N..2N-2 = interior nodes; node-N = index of node in tree structure. - * code N is the root. - * 2N..3N-2 = special flags for closing interior nodes; node-2N = index in tree - */ - while (PopIntStack(stack, &code)) - { - if (code < N) /* we're a leaf. */ - { - /* 1) print name:branchlength */ - if (docomma) fputs(",", fp); - fprintf(fp, "%s:%.5f", ainfo->sqinfo[code].name, blen[code]); - docomma = TRUE; - } - - else if (code < 2*N) /* we're an interior node */ - { - /* 1) print a '(' */ - if (docomma) fputs(",\n", fp); - fputs("(", fp); - /* 2) push on stack: ), rchild, lchild */ - PushIntStack(stack, code+N); - PushIntStack(stack, tree[code-N].right); - PushIntStack(stack, tree[code-N].left); - /* 3) record branch lengths */ - blen[tree[code-N].right] = tree[code-N].rblen; - blen[tree[code-N].left] = tree[code-N].lblen; - docomma = FALSE; - } - - else /* we're closing an interior node */ - { - /* print a ):branchlength */ - if (code == 2*N) fprintf(fp, ");\n"); - else fprintf(fp, "):%.5f", blen[code-N]); - docomma = TRUE; - } - } - - FreeIntStack(stack); - free(blen); - return; -} - - -/* Function: PrintPhylo() - * - * Purpose: Debugging output of a phylogenetic tree structure. - */ -void -PrintPhylo(FILE *fp, AINFO *ainfo, struct phylo_s *tree, int N) -{ - int idx; - - for (idx = 0; idx < N-1; idx++) - { - fprintf(fp, "Interior node %d (code %d)\n", idx, idx+N); - fprintf(fp, "\tParent: %d (code %d)\n", tree[idx].parent-N, tree[idx].parent); - fprintf(fp, "\tLeft: %d (%s) %f\n", - tree[idx].left < N ? tree[idx].left-N : tree[idx].left, - tree[idx].left < N ? ainfo->sqinfo[tree[idx].left].name : "interior", - tree[idx].lblen); - fprintf(fp, "\tRight: %d (%s) %f\n", - tree[idx].right < N ? tree[idx].right-N : tree[idx].right, - tree[idx].right < N ? ainfo->sqinfo[tree[idx].right].name : "interior", - tree[idx].rblen); - fprintf(fp, "\tHeight: %f\n", tree[idx].diff); - fprintf(fp, "\tIncludes:%d seqs\n", tree[idx].incnum); - } -} - - - diff --git a/forester/archive/RIO/others/hmmer/squid/compalign_main.c b/forester/archive/RIO/others/hmmer/squid/compalign_main.c deleted file mode 100644 index 0ac499d..0000000 --- a/forester/archive/RIO/others/hmmer/squid/compalign_main.c +++ /dev/null @@ -1,221 +0,0 @@ -/***************************************************************** - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - *****************************************************************/ - -/* main for compalign - * - * Compalign -- a program to compare two sequence alignments - * SRE, Tue Nov 3 07:38:03 1992 - * RCS $Id: compalign_main.c,v 1.1.1.1 2005/03/22 08:34:31 cmzmasek Exp $ - * - * incorporated into SQUID, Thu Jan 26 16:52:41 1995 - * - * Usage: compalign - * - * Calculate the fractional "identity" between the trusted alignment - * and the test alignment. The two files must contain exactly the same - * sequences, in exactly the same order. - * - * The identity of the multiple sequence alignments is defined as - * the averaged identity over all N(N-1)/2 pairwise alignments. - * - * The fractional identity of two sets of pairwise alignments - * is in turn defined as follows (for aligned known sequences k1 and k2, - * and aligned test sequences t1 and t2): - * - * matched columns / total columns, - * - * where total columns = the total number of columns in - * which there is a valid (nongap) symbol in k1 or k2; - * - * matched columns = the number of columns in which one of the - * following is true: - * - * k1 and k2 both have valid symbols at a given column; t1 and t2 - * have the same symbols aligned in a column of the t1/t2 - * alignment; - * - * k1 has a symbol aligned to a gap in k2; that symbol in t1 - * is also aligned to a gap; - * - * k2 has a symbol aligned to a gap in k1; that symbol in t2 - * is also aligned to a gap. - * - * Because scores for all possible pairs are calculated, the - * algorithm is of order (N^2)L for N sequences of length L; - * large sequence sets will take a while. - * - * Sean Eddy, Tue Nov 3 07:46:59 1992 - * - */ - -#include -#include -#include "squid.h" -#include "msa.h" - -static char banner[] = "compalign - compare two multiple alignments"; - -static char usage[] = "\ -Usage: compalign [-options] \n\ - Available options:\n\ - -c : only compare under marked #=CS consensus structure\n\ - -h : print short help and usage info\n\ -"; - -static char experts[] = "\ - --informat : specify that both alignments are in format (MSF, for instance)\n\ - --quiet : suppress verbose header (used in regression testing)\n\ -"; - -struct opt_s OPTIONS[] = { - { "-c", TRUE, sqdARG_NONE }, - { "-h", TRUE, sqdARG_NONE }, - { "--informat", FALSE, sqdARG_STRING }, - { "--quiet", FALSE, sqdARG_NONE }, -}; -#define NOPTIONS (sizeof(OPTIONS) / sizeof(struct opt_s)) - - -int -main(int argc, char **argv) -{ - char *kfile; /* name of file of trusted (known) alignment */ - char *tfile; /* name of file of test alignment */ - MSAFILE *kfp; /* open ptr into trusted (known) alignfile */ - MSAFILE *tfp; /* open ptr into test alignment file */ - int format; /* expected format of alignment files */ - MSA *kmsa; /* a trusted (known) alignment */ - MSA *tmsa; /* a test alignment */ - char **kraw; /* dealigned trusted seqs */ - char **traw; /* dealigned test sequences */ - int idx; /* counter for sequences */ - int apos; /* position in alignment */ - float score; /* RESULT: score for the comparison */ - - int cs_only; /* TRUE to compare under #=CS annotation only */ - int *ref = NULL; /* init only to silence gcc warning */ - int be_quiet; /* TRUE to suppress verbose header */ - - char *optname; - char *optarg; - int optind; - - /*********************************************** - * Parse command line - ***********************************************/ - - format = MSAFILE_UNKNOWN; - cs_only = FALSE; - be_quiet = FALSE; - - while (Getopt(argc, argv, OPTIONS, NOPTIONS, usage, - &optind, &optname, &optarg)) - { - if (strcmp(optname, "-c") == 0) cs_only = TRUE; - else if (strcmp(optname, "--quiet") == 0) be_quiet = TRUE; - else if (strcmp(optname, "--informat") == 0) { - format = String2SeqfileFormat(optarg); - if (format == MSAFILE_UNKNOWN) - Die("unrecognized sequence file format \"%s\"", optarg); - if (! IsAlignmentFormat(format)) - Die("%s is an unaligned format, can't read as an alignment", optarg); - } - else if (strcmp(optname, "-h") == 0) { - Banner(stdout, banner); - puts(usage); - puts(experts); - exit(EXIT_SUCCESS); - } - } - - if (argc - optind != 2) - Die("Incorrect number of command line arguments.\n%s\n", usage); - - kfile = argv[optind++]; - tfile = argv[optind]; - - if (! be_quiet) Banner(stdout, banner); - - /*********************************************** - * Read in the alignments - * Capable of handling full Stockholm: >1 alignment/file - ***********************************************/ - - if ((kfp = MSAFileOpen(kfile, format, NULL)) == NULL) - Die("Trusted alignment file %s could not be opened for reading", kfile); - if ((tfp = MSAFileOpen(tfile, format, NULL)) == NULL) - Die("Test alignment file %s could not be opened for reading", tfile); - - while ((kmsa = MSAFileRead(kfp)) != NULL) - { - if ((tmsa = MSAFileRead(tfp)) == NULL) - Die("Failed to get a test alignment to match with the trusted alignment"); - - /* test that they're the same! */ - if (kmsa->nseq != tmsa->nseq) - Die("files %s and %s do not contain same number of seqs!\n", kfile, tfile); - - for (idx = 0; idx < kmsa->nseq; idx++) - { - s2upper(kmsa->aseq[idx]); - s2upper(tmsa->aseq[idx]); - } - /* another sanity check */ - for (idx = 0; idx < kmsa->nseq; idx++) - if (strcmp(kmsa->sqname[idx], tmsa->sqname[idx]) != 0) - Die("seqs in %s and %s don't seem to be in the same order\n (%s != %s)", - kfile, tfile, kmsa->sqname[idx], tmsa->sqname[idx]); - - /* and *another* sanity check */ - DealignAseqs(kmsa->aseq, kmsa->nseq, &kraw); - DealignAseqs(tmsa->aseq, tmsa->nseq, &traw); - for (idx = 0; idx < kmsa->nseq; idx++) - if (strcmp(kraw[idx], traw[idx]) != 0) - Die("raw seqs in %s and %s are not the same (died at %s, number %d)\n", - kfile, tfile, kmsa->sqname[idx], idx); - Free2DArray((void **) kraw, kmsa->nseq); - Free2DArray((void **) traw, tmsa->nseq); - - if (cs_only) - { - if (kmsa->ss_cons == NULL) - Die("Trusted alignment %s has no consensus structure annotation\n -- can't use -c!\n", - kfile); - ref = (int *) MallocOrDie (sizeof(int) * kmsa->alen); - for (apos = 0; apos < kmsa->alen; apos++) - ref[apos] = (isgap(kmsa->ss_cons[apos])) ? FALSE : TRUE; - } - - /*********************************************** - * Compare the alignments, print results - ***********************************************/ - - if (cs_only) - score = CompareRefMultAlignments(ref, kmsa->aseq, tmsa->aseq, kmsa->nseq); - else - score = CompareMultAlignments(kmsa->aseq, tmsa->aseq, kmsa->nseq); - - printf("Trusted alignment: %s\n", kmsa->name != NULL ? kmsa->name : kfile); - printf("Test alignment: %s\n", tmsa->name != NULL ? tmsa->name : tfile); - printf("Total sequences: %d\n", kmsa->nseq); - printf("Alignment identity: %.4f\n", score); - puts("//"); - - if (cs_only) free(ref); - MSAFree(kmsa); - MSAFree(tmsa); - } - - MSAFileClose(kfp); - MSAFileClose(tfp); - return 0; -} - - diff --git a/forester/archive/RIO/others/hmmer/squid/compstruct_main.c b/forester/archive/RIO/others/hmmer/squid/compstruct_main.c deleted file mode 100644 index 9701a00..0000000 --- a/forester/archive/RIO/others/hmmer/squid/compstruct_main.c +++ /dev/null @@ -1,321 +0,0 @@ -/***************************************************************** - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - *****************************************************************/ - -/* compstruct_main.c - * SRE, Tue Aug 30 10:35:31 1994 - * - * Compare RNA secondary structures. - * RCS $Id: compstruct_main.c,v 1.1.1.1 2005/03/22 08:34:22 cmzmasek Exp $ - */ - -#include -#include -#include -#include "squid.h" -#include "msa.h" - -static char banner[] = "compalign - compare test RNA secondary structure predictions to trusted set"; - -char usage[] = "\ -Usage: compstruct [-options] \n\ - Both files must contain secondary structure markup (e.g. Stockholm, SQUID,\n\ - SELEX formats), and sequences must occur in the same order in the two files.\n\ -\n\ - Available options are:\n\ - -h : print short help and usage info\n\ -"; - -static char experts[] = "\ - --informat : specify that both alignments are in format (SELEX, for instance)\n\ - --quiet : suppress verbose header (used in regression testing)\n\ -"; - -struct opt_s OPTIONS[] = { - { "-h", TRUE, sqdARG_NONE }, - { "--informat", FALSE, sqdARG_STRING }, - { "--quiet", FALSE, sqdARG_NONE }, -}; -#define NOPTIONS (sizeof(OPTIONS) / sizeof(struct opt_s)) - - -static int KHS2ct(char *ss, int **ret_ct); -/* static void WriteCT(FILE *fp, char *seq, int *ct, int len); */ - -int -main(int argc, char **argv) -{ - char *kfile, *tfile; /* known, test structure file */ - int format; /* expected format of kfile, tfile */ - SQFILE *kfp, *tfp; /* open kfile, tfile */ - char *kseq, *tseq; /* known, test sequence */ - SQINFO kinfo, tinfo; /* known, test info */ - int *kct, *tct; /* known, test CT rep of structure */ - int pos; - int nseq; - - int correct; /* count of correct base pair predictions */ - int missedpair; /* count of false negatives */ - int falsepair; /* count of false positives */ - int tot_trusted; /* total base pairs in trusted structure */ - int tot_predicted; /* total base pairs in predicted structure*/ - int tot_correct; /* cumulative total correct pairs */ - - int dscorrect; /* count of correct 2-state paired prediction */ - int sscorrect; /* count of correct 2-state unpaired prediction */ - int tot_dscorrect; - int tot_sscorrect; - int tot_positions; - - int quiet; /* TRUE to silence verbose banner */ - - char *optname; - char *optarg; - int optind; - - /*********************************************** - * Parse command line - ***********************************************/ - - format = MSAFILE_UNKNOWN; - quiet = FALSE; - - while (Getopt(argc, argv, OPTIONS, NOPTIONS, usage, - &optind, &optname, &optarg)) - { - if (strcmp(optname, "--quiet") == 0) quiet = TRUE; - else if (strcmp(optname, "--informat") == 0) { - format = String2SeqfileFormat(optarg); - if (format == MSAFILE_UNKNOWN) - Die("unrecognized sequence file format \"%s\"", optarg); - if (! IsAlignmentFormat(format)) - Die("%s is an unaligned format, can't read as an alignment", optarg); - } - else if (strcmp(optname, "-h") == 0) { - Banner(stdout, banner); - puts(usage); - puts(experts); - exit(EXIT_SUCCESS); - } - } - - if (argc - optind != 2) - Die("Incorrect number of command line arguments.\n%s\n", usage); - - kfile = argv[optind++]; - tfile = argv[optind]; - - if (! quiet) Banner(stdout, banner); - - /*********************************************** - * Open the files - ***********************************************/ - - if ((kfp = SeqfileOpen(kfile, format, NULL)) == NULL) - Die("Failed to open trusted structure file %s for reading", kfile); - if ((tfp = SeqfileOpen(tfile, format, NULL)) == NULL) - Die("Failed to open test structure file %s for reading", tfile); - - /*********************************************** - * Do structure comparisons, one seq at a time - ***********************************************/ - - tot_trusted = tot_predicted = tot_correct = 0; - tot_dscorrect = tot_sscorrect = tot_positions = 0; - nseq = 0; - while (ReadSeq(kfp, kfp->format, &kseq, &kinfo) && ReadSeq(tfp, tfp->format, &tseq, &tinfo)) - { - if (!quiet && strcmp(tinfo.name, kinfo.name) != 0) - Warn("Trusted sequence %s, test sequence %s -- names not identical\n", - kinfo.name, tinfo.name); - if (!quiet && strcmp(kseq, tseq) != 0) - Warn("Trusted sequence %s, test sequence %s -- sequences not identical\n", - kinfo.name, tinfo.name); - - printf("%s %s\n", kinfo.name, (kinfo.flags & SQINFO_DESC) ? kinfo.desc : ""); - - if (! (tinfo.flags & SQINFO_SS) && ! (kinfo.flags & SQINFO_SS)) - printf("[no test or trusted structure]\n\n"); - else if (! (tinfo.flags & SQINFO_SS)) - printf("[no test structure]\n\n"); - else if (! (kinfo.flags & SQINFO_SS)) - printf("[no trusted structure]\n\n"); - else - { - if (! KHS2ct(kinfo.ss, &kct)) - { printf("[bad trusted structure]\n"); goto CLEANUP;} - if (! KHS2ct(tinfo.ss, &tct)) - { printf("[bad test structure]\n"); free(kct); goto CLEANUP; } - -/* WriteCT(stdout, tseq, tct, tinfo.len); */ -/* WriteCT(stdout, tseq, kct, tinfo.len); */ - - correct = falsepair = missedpair = 0; - dscorrect = sscorrect = 0; - for (pos = 0; pos < kinfo.len; pos++) - { - /* check if actual base pair is predicted */ - if (kct[pos] >= 0 && kct[pos] == tct[pos]) - correct++; - else if (kct[pos] >= 0) - missedpair++; - - if (tct[pos] >= 0 && kct[pos] != tct[pos]) - falsepair++; - - /* 2 state prediction */ - if (kct[pos] >= 0 && tct[pos] >= 0) - dscorrect++; - else if (kct[pos] < 0 && tct[pos] < 0) - sscorrect++; - } - nseq++; - tot_trusted += correct + missedpair; - tot_predicted += correct + falsepair; - tot_correct += correct; - - tot_dscorrect += dscorrect; - tot_sscorrect += sscorrect; - tot_positions += kinfo.len; - - /* print out per sequence info */ - printf(" %d/%d trusted pairs predicted (%.2f%% sensitivity)\n", - correct, correct+missedpair, - 100. * (float) correct/ (float) (correct + missedpair)); - printf(" %d/%d predicted pairs correct (%.2f%% specificity)\n", - correct, correct + falsepair, - 100. * (float) correct/ (float) (correct + falsepair)); - - printf(" Two state: %d/%d positions correctly predicted (%.2f%% accuracy)\n", - dscorrect + sscorrect, - kinfo.len, - 100. * (float) (dscorrect + sscorrect) / (float) kinfo.len); - puts(""); - - - free(kct); - free(tct); - } - - CLEANUP: - FreeSequence(kseq, &kinfo); - FreeSequence(tseq, &tinfo); - } - - /* And the final summary: - */ - puts(""); - printf("Overall structure prediction accuracy (%d sequences, %d positions)\n", - nseq, tot_positions); - printf(" %d/%d trusted pairs predicted (%.2f%% sensitivity)\n", - tot_correct, tot_trusted, - 100. * (float) tot_correct/ (float) tot_trusted); - printf(" %d/%d predicted pairs correct (%.2f%% specificity)\n", - tot_correct, tot_predicted, - 100. * (float) tot_correct/ (float) tot_predicted); - printf(" Two state: %d/%d positions correctly predicted (%.2f%% accuracy)\n", - tot_dscorrect + tot_sscorrect, tot_positions, - 100. * (float) (tot_dscorrect + tot_sscorrect) / (float) tot_positions); - puts(""); - - SeqfileClose(tfp); - SeqfileClose(kfp); - return 0; -} - - -/* Function: KHS2ct() - * - * Purpose: Convert a secondary structure string to an array of integers - * representing what position each position is base-paired - * to (0..len-1), or -1 if none. This is off-by-one from a - * Zuker .ct file representation. - * - * The .ct representation can accomodate pseudoknots but the - * secondary structure string cannot easily; the string contains - * "Aa", "Bb", etc. pairs as a limited representation of - * pseudoknots. The string contains "><" for base pairs. - * Other symbols are ignored. - * - * Return: ret_ct is allocated here and must be free'd by caller. - * Returns 1 on success, 0 if ss is somehow inconsistent. - */ -static int -KHS2ct(char *ss, int **ret_ct) -{ - struct intstack_s *dolist[27]; - int *ct; - int i; - int pos, pair; - int status = 1; /* success or failure return status */ - int len; - - for (i = 0; i < 27; i++) - dolist[i] = InitIntStack(); - len = strlen(ss); - - if ((ct = (int *) malloc (len * sizeof(int))) == NULL) - Die("malloc failed"); - for (pos = 0; pos < len; pos++) - ct[pos] = -1; - - for (pos = 0; ss[pos] != '\0'; pos++) - { - if (ss[pos] == '>') /* left side of a pair: push onto stack 0 */ - PushIntStack(dolist[0], pos); - else if (ss[pos] == '<') /* right side of a pair; resolve pair */ - { - if (! PopIntStack(dolist[0], &pair)) - { status = 0; } - else - { - ct[pos] = pair; - ct[pair] = pos; - } - } - /* same stuff for pseudoknots */ - else if (isupper((int) ss[pos])) - PushIntStack(dolist[ss[pos] - 'A' + 1], pos); - else if (islower((int) ss[pos])) - { - if (! PopIntStack(dolist[ss[pos] - 'a' + 1], &pair)) - { status = 0; } - else - { - ct[pos] = pair; - ct[pair] = pos; - } - } - else if (!isgap(ss[pos])) status = 0; /* bad character */ - } - - for (i = 0; i < 27; i++) - if ( FreeIntStack(dolist[i]) > 0) - status = 0; - - *ret_ct = ct; - return status; -} - - -#ifdef SRE_REMOVED -/* Function: WriteCT() - * - * Purpose: Write a CT representation of a structure. - * Written in 1..len sense, with 0 for unpaired - * positions. - */ -static void -WriteCT(FILE *fp, char *seq, int *ct, int len) -{ - int pos; - for (pos = 0; pos < len; pos++) - fprintf(fp, "%d %c %d\n", pos+1, seq[pos], ct[pos]+1); -} -#endif diff --git a/forester/archive/RIO/others/hmmer/squid/configure b/forester/archive/RIO/others/hmmer/squid/configure deleted file mode 100755 index 3bfb5cb..0000000 --- a/forester/archive/RIO/others/hmmer/squid/configure +++ /dev/null @@ -1,2241 +0,0 @@ -#! /bin/sh - -# Guess values for system-dependent variables and create Makefiles. -# Generated automatically using autoconf version 2.13 -# Copyright (C) 1992, 93, 94, 95, 96 Free Software Foundation, Inc. -# -# This configure script is free software; the Free Software Foundation -# gives unlimited permission to copy, distribute and modify it. - -# Defaults: -ac_help= -ac_default_prefix=/usr/local -# Any additions from configure.in: -ac_help="$ac_help - --with-pvm enable PVM, Parallel Virtual Machine" - -# Initialize some variables set by options. -# The variables have the same names as the options, with -# dashes changed to underlines. -build=NONE -cache_file=./config.cache -exec_prefix=NONE -host=NONE -no_create= -nonopt=NONE -no_recursion= -prefix=NONE -program_prefix=NONE -program_suffix=NONE -program_transform_name=s,x,x, -silent= -site= -srcdir= -target=NONE -verbose= -x_includes=NONE -x_libraries=NONE -bindir='${exec_prefix}/bin' -sbindir='${exec_prefix}/sbin' -libexecdir='${exec_prefix}/libexec' -datadir='${prefix}/share' -sysconfdir='${prefix}/etc' -sharedstatedir='${prefix}/com' -localstatedir='${prefix}/var' -libdir='${exec_prefix}/lib' -includedir='${prefix}/include' -oldincludedir='/usr/include' -infodir='${prefix}/info' -mandir='${prefix}/man' - -# Initialize some other variables. -subdirs= -MFLAGS= MAKEFLAGS= -SHELL=${CONFIG_SHELL-/bin/sh} -# Maximum number of lines to put in a shell here document. -ac_max_here_lines=12 - -ac_prev= -for ac_option -do - - # If the previous option needs an argument, assign it. - if test -n "$ac_prev"; then - eval "$ac_prev=\$ac_option" - ac_prev= - continue - fi - - case "$ac_option" in - -*=*) ac_optarg=`echo "$ac_option" | sed 's/[-_a-zA-Z0-9]*=//'` ;; - *) ac_optarg= ;; - esac - - # Accept the important Cygnus configure options, so we can diagnose typos. - - case "$ac_option" in - - -bindir | --bindir | --bindi | --bind | --bin | --bi) - ac_prev=bindir ;; - -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*) - bindir="$ac_optarg" ;; - - -build | --build | --buil | --bui | --bu) - ac_prev=build ;; - -build=* | --build=* | --buil=* | --bui=* | --bu=*) - build="$ac_optarg" ;; - - -cache-file | --cache-file | --cache-fil | --cache-fi \ - | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c) - ac_prev=cache_file ;; - -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \ - | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*) - cache_file="$ac_optarg" ;; - - -datadir | --datadir | --datadi | --datad | --data | --dat | --da) - ac_prev=datadir ;; - -datadir=* | --datadir=* | --datadi=* | --datad=* | --data=* | --dat=* \ - | --da=*) - datadir="$ac_optarg" ;; - - -disable-* | --disable-*) - ac_feature=`echo $ac_option|sed -e 's/-*disable-//'` - # Reject names that are not valid shell variable names. - if test -n "`echo $ac_feature| sed 's/[-a-zA-Z0-9_]//g'`"; then - { echo "configure: error: $ac_feature: invalid feature name" 1>&2; exit 1; } - fi - ac_feature=`echo $ac_feature| sed 's/-/_/g'` - eval "enable_${ac_feature}=no" ;; - - -enable-* | --enable-*) - ac_feature=`echo $ac_option|sed -e 's/-*enable-//' -e 's/=.*//'` - # Reject names that are not valid shell variable names. - if test -n "`echo $ac_feature| sed 's/[-_a-zA-Z0-9]//g'`"; then - { echo "configure: error: $ac_feature: invalid feature name" 1>&2; exit 1; } - fi - ac_feature=`echo $ac_feature| sed 's/-/_/g'` - case "$ac_option" in - *=*) ;; - *) ac_optarg=yes ;; - esac - eval "enable_${ac_feature}='$ac_optarg'" ;; - - -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \ - | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \ - | --exec | --exe | --ex) - ac_prev=exec_prefix ;; - -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \ - | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \ - | --exec=* | --exe=* | --ex=*) - exec_prefix="$ac_optarg" ;; - - -gas | --gas | --ga | --g) - # Obsolete; use --with-gas. - with_gas=yes ;; - - -help | --help | --hel | --he) - # Omit some internal or obsolete options to make the list less imposing. - # This message is too long to be a string in the A/UX 3.1 sh. - cat << EOF -Usage: configure [options] [host] -Options: [defaults in brackets after descriptions] -Configuration: - --cache-file=FILE cache test results in FILE - --help print this message - --no-create do not create output files - --quiet, --silent do not print \`checking...' messages - --version print the version of autoconf that created configure -Directory and file names: - --prefix=PREFIX install architecture-independent files in PREFIX - [$ac_default_prefix] - --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX - [same as prefix] - --bindir=DIR user executables in DIR [EPREFIX/bin] - --sbindir=DIR system admin executables in DIR [EPREFIX/sbin] - --libexecdir=DIR program executables in DIR [EPREFIX/libexec] - --datadir=DIR read-only architecture-independent data in DIR - [PREFIX/share] - --sysconfdir=DIR read-only single-machine data in DIR [PREFIX/etc] - --sharedstatedir=DIR modifiable architecture-independent data in DIR - [PREFIX/com] - --localstatedir=DIR modifiable single-machine data in DIR [PREFIX/var] - --libdir=DIR object code libraries in DIR [EPREFIX/lib] - --includedir=DIR C header files in DIR [PREFIX/include] - --oldincludedir=DIR C header files for non-gcc in DIR [/usr/include] - --infodir=DIR info documentation in DIR [PREFIX/info] - --mandir=DIR man documentation in DIR [PREFIX/man] - --srcdir=DIR find the sources in DIR [configure dir or ..] - --program-prefix=PREFIX prepend PREFIX to installed program names - --program-suffix=SUFFIX append SUFFIX to installed program names - --program-transform-name=PROGRAM - run sed PROGRAM on installed program names -EOF - cat << EOF -Host type: - --build=BUILD configure for building on BUILD [BUILD=HOST] - --host=HOST configure for HOST [guessed] - --target=TARGET configure for TARGET [TARGET=HOST] -Features and packages: - --disable-FEATURE do not include FEATURE (same as --enable-FEATURE=no) - --enable-FEATURE[=ARG] include FEATURE [ARG=yes] - --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] - --without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no) - --x-includes=DIR X include files are in DIR - --x-libraries=DIR X library files are in DIR -EOF - if test -n "$ac_help"; then - echo "--enable and --with options recognized:$ac_help" - fi - exit 0 ;; - - -host | --host | --hos | --ho) - ac_prev=host ;; - -host=* | --host=* | --hos=* | --ho=*) - host="$ac_optarg" ;; - - -includedir | --includedir | --includedi | --included | --include \ - | --includ | --inclu | --incl | --inc) - ac_prev=includedir ;; - -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \ - | --includ=* | --inclu=* | --incl=* | --inc=*) - includedir="$ac_optarg" ;; - - -infodir | --infodir | --infodi | --infod | --info | --inf) - ac_prev=infodir ;; - -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*) - infodir="$ac_optarg" ;; - - -libdir | --libdir | --libdi | --libd) - ac_prev=libdir ;; - -libdir=* | --libdir=* | --libdi=* | --libd=*) - libdir="$ac_optarg" ;; - - -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \ - | --libexe | --libex | --libe) - ac_prev=libexecdir ;; - -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \ - | --libexe=* | --libex=* | --libe=*) - libexecdir="$ac_optarg" ;; - - -localstatedir | --localstatedir | --localstatedi | --localstated \ - | --localstate | --localstat | --localsta | --localst \ - | --locals | --local | --loca | --loc | --lo) - ac_prev=localstatedir ;; - -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \ - | --localstate=* | --localstat=* | --localsta=* | --localst=* \ - | --locals=* | --local=* | --loca=* | --loc=* | --lo=*) - localstatedir="$ac_optarg" ;; - - -mandir | --mandir | --mandi | --mand | --man | --ma | --m) - ac_prev=mandir ;; - -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*) - mandir="$ac_optarg" ;; - - -nfp | --nfp | --nf) - # Obsolete; use --without-fp. - with_fp=no ;; - - -no-create | --no-create | --no-creat | --no-crea | --no-cre \ - | --no-cr | --no-c) - no_create=yes ;; - - -no-recursion | --no-recursion | --no-recursio | --no-recursi \ - | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) - no_recursion=yes ;; - - -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \ - | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \ - | --oldin | --oldi | --old | --ol | --o) - ac_prev=oldincludedir ;; - -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \ - | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \ - | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*) - oldincludedir="$ac_optarg" ;; - - -prefix | --prefix | --prefi | --pref | --pre | --pr | --p) - ac_prev=prefix ;; - -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*) - prefix="$ac_optarg" ;; - - -program-prefix | --program-prefix | --program-prefi | --program-pref \ - | --program-pre | --program-pr | --program-p) - ac_prev=program_prefix ;; - -program-prefix=* | --program-prefix=* | --program-prefi=* \ - | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*) - program_prefix="$ac_optarg" ;; - - -program-suffix | --program-suffix | --program-suffi | --program-suff \ - | --program-suf | --program-su | --program-s) - ac_prev=program_suffix ;; - -program-suffix=* | --program-suffix=* | --program-suffi=* \ - | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*) - program_suffix="$ac_optarg" ;; - - -program-transform-name | --program-transform-name \ - | --program-transform-nam | --program-transform-na \ - | --program-transform-n | --program-transform- \ - | --program-transform | --program-transfor \ - | --program-transfo | --program-transf \ - | --program-trans | --program-tran \ - | --progr-tra | --program-tr | --program-t) - ac_prev=program_transform_name ;; - -program-transform-name=* | --program-transform-name=* \ - | --program-transform-nam=* | --program-transform-na=* \ - | --program-transform-n=* | --program-transform-=* \ - | --program-transform=* | --program-transfor=* \ - | --program-transfo=* | --program-transf=* \ - | --program-trans=* | --program-tran=* \ - | --progr-tra=* | --program-tr=* | --program-t=*) - program_transform_name="$ac_optarg" ;; - - -q | -quiet | --quiet | --quie | --qui | --qu | --q \ - | -silent | --silent | --silen | --sile | --sil) - silent=yes ;; - - -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb) - ac_prev=sbindir ;; - -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \ - | --sbi=* | --sb=*) - sbindir="$ac_optarg" ;; - - -sharedstatedir | --sharedstatedir | --sharedstatedi \ - | --sharedstated | --sharedstate | --sharedstat | --sharedsta \ - | --sharedst | --shareds | --shared | --share | --shar \ - | --sha | --sh) - ac_prev=sharedstatedir ;; - -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \ - | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \ - | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \ - | --sha=* | --sh=*) - sharedstatedir="$ac_optarg" ;; - - -site | --site | --sit) - ac_prev=site ;; - -site=* | --site=* | --sit=*) - site="$ac_optarg" ;; - - -srcdir | --srcdir | --srcdi | --srcd | --src | --sr) - ac_prev=srcdir ;; - -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*) - srcdir="$ac_optarg" ;; - - -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \ - | --syscon | --sysco | --sysc | --sys | --sy) - ac_prev=sysconfdir ;; - -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \ - | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*) - sysconfdir="$ac_optarg" ;; - - -target | --target | --targe | --targ | --tar | --ta | --t) - ac_prev=target ;; - -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*) - target="$ac_optarg" ;; - - -v | -verbose | --verbose | --verbos | --verbo | --verb) - verbose=yes ;; - - -version | --version | --versio | --versi | --vers) - echo "configure generated by autoconf version 2.13" - exit 0 ;; - - -with-* | --with-*) - ac_package=`echo $ac_option|sed -e 's/-*with-//' -e 's/=.*//'` - # Reject names that are not valid shell variable names. - if test -n "`echo $ac_package| sed 's/[-_a-zA-Z0-9]//g'`"; then - { echo "configure: error: $ac_package: invalid package name" 1>&2; exit 1; } - fi - ac_package=`echo $ac_package| sed 's/-/_/g'` - case "$ac_option" in - *=*) ;; - *) ac_optarg=yes ;; - esac - eval "with_${ac_package}='$ac_optarg'" ;; - - -without-* | --without-*) - ac_package=`echo $ac_option|sed -e 's/-*without-//'` - # Reject names that are not valid shell variable names. - if test -n "`echo $ac_package| sed 's/[-a-zA-Z0-9_]//g'`"; then - { echo "configure: error: $ac_package: invalid package name" 1>&2; exit 1; } - fi - ac_package=`echo $ac_package| sed 's/-/_/g'` - eval "with_${ac_package}=no" ;; - - --x) - # Obsolete; use --with-x. - with_x=yes ;; - - -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \ - | --x-incl | --x-inc | --x-in | --x-i) - ac_prev=x_includes ;; - -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \ - | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*) - x_includes="$ac_optarg" ;; - - -x-libraries | --x-libraries | --x-librarie | --x-librari \ - | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l) - ac_prev=x_libraries ;; - -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \ - | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*) - x_libraries="$ac_optarg" ;; - - -*) { echo "configure: error: $ac_option: invalid option; use --help to show usage" 1>&2; exit 1; } - ;; - - *) - if test -n "`echo $ac_option| sed 's/[-a-z0-9.]//g'`"; then - echo "configure: warning: $ac_option: invalid host type" 1>&2 - fi - if test "x$nonopt" != xNONE; then - { echo "configure: error: can only configure for one host and one target at a time" 1>&2; exit 1; } - fi - nonopt="$ac_option" - ;; - - esac -done - -if test -n "$ac_prev"; then - { echo "configure: error: missing argument to --`echo $ac_prev | sed 's/_/-/g'`" 1>&2; exit 1; } -fi - -trap 'rm -fr conftest* confdefs* core core.* *.core $ac_clean_files; exit 1' 1 2 15 - -# File descriptor usage: -# 0 standard input -# 1 file creation -# 2 errors and warnings -# 3 some systems may open it to /dev/tty -# 4 used on the Kubota Titan -# 6 checking for... messages and results -# 5 compiler messages saved in config.log -if test "$silent" = yes; then - exec 6>/dev/null -else - exec 6>&1 -fi -exec 5>./config.log - -echo "\ -This file contains any messages produced by compilers while -running configure, to aid debugging if configure makes a mistake. -" 1>&5 - -# Strip out --no-create and --no-recursion so they do not pile up. -# Also quote any args containing shell metacharacters. -ac_configure_args= -for ac_arg -do - case "$ac_arg" in - -no-create | --no-create | --no-creat | --no-crea | --no-cre \ - | --no-cr | --no-c) ;; - -no-recursion | --no-recursion | --no-recursio | --no-recursi \ - | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) ;; - *" "*|*" "*|*[\[\]\~\#\$\^\&\*\(\)\{\}\\\|\;\<\>\?]*) - ac_configure_args="$ac_configure_args '$ac_arg'" ;; - *) ac_configure_args="$ac_configure_args $ac_arg" ;; - esac -done - -# NLS nuisances. -# Only set these to C if already set. These must not be set unconditionally -# because not all systems understand e.g. LANG=C (notably SCO). -# Fixing LC_MESSAGES prevents Solaris sh from translating var values in `set'! -# Non-C LC_CTYPE values break the ctype check. -if test "${LANG+set}" = set; then LANG=C; export LANG; fi -if test "${LC_ALL+set}" = set; then LC_ALL=C; export LC_ALL; fi -if test "${LC_MESSAGES+set}" = set; then LC_MESSAGES=C; export LC_MESSAGES; fi -if test "${LC_CTYPE+set}" = set; then LC_CTYPE=C; export LC_CTYPE; fi - -# confdefs.h avoids OS command line length limits that DEFS can exceed. -rm -rf conftest* confdefs.h -# AIX cpp loses on an empty file, so make sure it contains at least a newline. -echo > confdefs.h - -# A filename unique to this package, relative to the directory that -# configure is in, which we can look for to find out if srcdir is correct. -ac_unique_file=squidcore.c - -# Find the source files, if location was not specified. -if test -z "$srcdir"; then - ac_srcdir_defaulted=yes - # Try the directory containing this script, then its parent. - ac_prog=$0 - ac_confdir=`echo $ac_prog|sed 's%/[^/][^/]*$%%'` - test "x$ac_confdir" = "x$ac_prog" && ac_confdir=. - srcdir=$ac_confdir - if test ! -r $srcdir/$ac_unique_file; then - srcdir=.. - fi -else - ac_srcdir_defaulted=no -fi -if test ! -r $srcdir/$ac_unique_file; then - if test "$ac_srcdir_defaulted" = yes; then - { echo "configure: error: can not find sources in $ac_confdir or .." 1>&2; exit 1; } - else - { echo "configure: error: can not find sources in $srcdir" 1>&2; exit 1; } - fi -fi -srcdir=`echo "${srcdir}" | sed 's%\([^/]\)/*$%\1%'` - -# Prefer explicitly selected file to automatically selected ones. -if test -z "$CONFIG_SITE"; then - if test "x$prefix" != xNONE; then - CONFIG_SITE="$prefix/share/config.site $prefix/etc/config.site" - else - CONFIG_SITE="$ac_default_prefix/share/config.site $ac_default_prefix/etc/config.site" - fi -fi -for ac_site_file in $CONFIG_SITE; do - if test -r "$ac_site_file"; then - echo "loading site script $ac_site_file" - . "$ac_site_file" - fi -done - -if test -r "$cache_file"; then - echo "loading cache $cache_file" - . $cache_file -else - echo "creating cache $cache_file" - > $cache_file -fi - -ac_ext=c -# CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options. -ac_cpp='$CPP $CPPFLAGS' -ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5' -ac_link='${CC-cc} -o conftest${ac_exeext} $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5' -cross_compiling=$ac_cv_prog_cc_cross - -ac_exeext= -ac_objext=o -if (echo "testing\c"; echo 1,2,3) | grep c >/dev/null; then - # Stardent Vistra SVR4 grep lacks -e, says ghazi@caip.rutgers.edu. - if (echo -n testing; echo 1,2,3) | sed s/-n/xn/ | grep xn >/dev/null; then - ac_n= ac_c=' -' ac_t=' ' - else - ac_n=-n ac_c= ac_t= - fi -else - ac_n= ac_c='\c' ac_t= -fi - - - - -echo " Welcome to SQUID... configuring for your system." - - - - - - - -# Extract the first word of "gcc", so it can be a program name with args. -set dummy gcc; ac_word=$2 -echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:540: checking for $ac_word" >&5 -if eval "test \"`echo '$''{'ac_cv_prog_CC'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - if test -n "$CC"; then - ac_cv_prog_CC="$CC" # Let the user override the test. -else - IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" - ac_dummy="$PATH" - for ac_dir in $ac_dummy; do - test -z "$ac_dir" && ac_dir=. - if test -f $ac_dir/$ac_word; then - ac_cv_prog_CC="gcc" - break - fi - done - IFS="$ac_save_ifs" -fi -fi -CC="$ac_cv_prog_CC" -if test -n "$CC"; then - echo "$ac_t""$CC" 1>&6 -else - echo "$ac_t""no" 1>&6 -fi - -if test -z "$CC"; then - # Extract the first word of "cc", so it can be a program name with args. -set dummy cc; ac_word=$2 -echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:570: checking for $ac_word" >&5 -if eval "test \"`echo '$''{'ac_cv_prog_CC'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - if test -n "$CC"; then - ac_cv_prog_CC="$CC" # Let the user override the test. -else - IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" - ac_prog_rejected=no - ac_dummy="$PATH" - for ac_dir in $ac_dummy; do - test -z "$ac_dir" && ac_dir=. - if test -f $ac_dir/$ac_word; then - if test "$ac_dir/$ac_word" = "/usr/ucb/cc"; then - ac_prog_rejected=yes - continue - fi - ac_cv_prog_CC="cc" - break - fi - done - IFS="$ac_save_ifs" -if test $ac_prog_rejected = yes; then - # We found a bogon in the path, so make sure we never use it. - set dummy $ac_cv_prog_CC - shift - if test $# -gt 0; then - # We chose a different compiler from the bogus one. - # However, it has the same basename, so the bogon will be chosen - # first if we set CC to just the basename; use the full file name. - shift - set dummy "$ac_dir/$ac_word" "$@" - shift - ac_cv_prog_CC="$@" - fi -fi -fi -fi -CC="$ac_cv_prog_CC" -if test -n "$CC"; then - echo "$ac_t""$CC" 1>&6 -else - echo "$ac_t""no" 1>&6 -fi - - if test -z "$CC"; then - case "`uname -s`" in - *win32* | *WIN32*) - # Extract the first word of "cl", so it can be a program name with args. -set dummy cl; ac_word=$2 -echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:621: checking for $ac_word" >&5 -if eval "test \"`echo '$''{'ac_cv_prog_CC'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - if test -n "$CC"; then - ac_cv_prog_CC="$CC" # Let the user override the test. -else - IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" - ac_dummy="$PATH" - for ac_dir in $ac_dummy; do - test -z "$ac_dir" && ac_dir=. - if test -f $ac_dir/$ac_word; then - ac_cv_prog_CC="cl" - break - fi - done - IFS="$ac_save_ifs" -fi -fi -CC="$ac_cv_prog_CC" -if test -n "$CC"; then - echo "$ac_t""$CC" 1>&6 -else - echo "$ac_t""no" 1>&6 -fi - ;; - esac - fi - test -z "$CC" && { echo "configure: error: no acceptable cc found in \$PATH" 1>&2; exit 1; } -fi - -echo $ac_n "checking whether the C compiler ($CC $CFLAGS $LDFLAGS) works""... $ac_c" 1>&6 -echo "configure:653: checking whether the C compiler ($CC $CFLAGS $LDFLAGS) works" >&5 - -ac_ext=c -# CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options. -ac_cpp='$CPP $CPPFLAGS' -ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5' -ac_link='${CC-cc} -o conftest${ac_exeext} $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5' -cross_compiling=$ac_cv_prog_cc_cross - -cat > conftest.$ac_ext << EOF - -#line 664 "configure" -#include "confdefs.h" - -main(){return(0);} -EOF -if { (eval echo configure:669: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then - ac_cv_prog_cc_works=yes - # If we can't run a trivial program, we are probably using a cross compiler. - if (./conftest; exit) 2>/dev/null; then - ac_cv_prog_cc_cross=no - else - ac_cv_prog_cc_cross=yes - fi -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - ac_cv_prog_cc_works=no -fi -rm -fr conftest* -ac_ext=c -# CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options. -ac_cpp='$CPP $CPPFLAGS' -ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5' -ac_link='${CC-cc} -o conftest${ac_exeext} $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5' -cross_compiling=$ac_cv_prog_cc_cross - -echo "$ac_t""$ac_cv_prog_cc_works" 1>&6 -if test $ac_cv_prog_cc_works = no; then - { echo "configure: error: installation or configuration problem: C compiler cannot create executables." 1>&2; exit 1; } -fi -echo $ac_n "checking whether the C compiler ($CC $CFLAGS $LDFLAGS) is a cross-compiler""... $ac_c" 1>&6 -echo "configure:695: checking whether the C compiler ($CC $CFLAGS $LDFLAGS) is a cross-compiler" >&5 -echo "$ac_t""$ac_cv_prog_cc_cross" 1>&6 -cross_compiling=$ac_cv_prog_cc_cross - -echo $ac_n "checking whether we are using GNU C""... $ac_c" 1>&6 -echo "configure:700: checking whether we are using GNU C" >&5 -if eval "test \"`echo '$''{'ac_cv_prog_gcc'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.c <&5; (eval $ac_try) 2>&5; }; } | egrep yes >/dev/null 2>&1; then - ac_cv_prog_gcc=yes -else - ac_cv_prog_gcc=no -fi -fi - -echo "$ac_t""$ac_cv_prog_gcc" 1>&6 - -if test $ac_cv_prog_gcc = yes; then - GCC=yes -else - GCC= -fi - -ac_test_CFLAGS="${CFLAGS+set}" -ac_save_CFLAGS="$CFLAGS" -CFLAGS= -echo $ac_n "checking whether ${CC-cc} accepts -g""... $ac_c" 1>&6 -echo "configure:728: checking whether ${CC-cc} accepts -g" >&5 -if eval "test \"`echo '$''{'ac_cv_prog_cc_g'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - echo 'void f(){}' > conftest.c -if test -z "`${CC-cc} -g -c conftest.c 2>&1`"; then - ac_cv_prog_cc_g=yes -else - ac_cv_prog_cc_g=no -fi -rm -f conftest* - -fi - -echo "$ac_t""$ac_cv_prog_cc_g" 1>&6 -if test "$ac_test_CFLAGS" = set; then - CFLAGS="$ac_save_CFLAGS" -elif test $ac_cv_prog_cc_g = yes; then - if test "$GCC" = yes; then - CFLAGS="-g -O2" - else - CFLAGS="-g" - fi -else - if test "$GCC" = yes; then - CFLAGS="-O2" - else - CFLAGS= - fi -fi - -echo $ac_n "checking whether ln -s works""... $ac_c" 1>&6 -echo "configure:760: checking whether ln -s works" >&5 -if eval "test \"`echo '$''{'ac_cv_prog_LN_S'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - rm -f conftestdata -if ln -s X conftestdata 2>/dev/null -then - rm -f conftestdata - ac_cv_prog_LN_S="ln -s" -else - ac_cv_prog_LN_S=ln -fi -fi -LN_S="$ac_cv_prog_LN_S" -if test "$ac_cv_prog_LN_S" = "ln -s"; then - echo "$ac_t""yes" 1>&6 -else - echo "$ac_t""no" 1>&6 -fi - -# Extract the first word of "ranlib", so it can be a program name with args. -set dummy ranlib; ac_word=$2 -echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:783: checking for $ac_word" >&5 -if eval "test \"`echo '$''{'ac_cv_prog_RANLIB'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - if test -n "$RANLIB"; then - ac_cv_prog_RANLIB="$RANLIB" # Let the user override the test. -else - IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" - ac_dummy="$PATH" - for ac_dir in $ac_dummy; do - test -z "$ac_dir" && ac_dir=. - if test -f $ac_dir/$ac_word; then - ac_cv_prog_RANLIB="ranlib" - break - fi - done - IFS="$ac_save_ifs" - test -z "$ac_cv_prog_RANLIB" && ac_cv_prog_RANLIB=":" -fi -fi -RANLIB="$ac_cv_prog_RANLIB" -if test -n "$RANLIB"; then - echo "$ac_t""$RANLIB" 1>&6 -else - echo "$ac_t""no" 1>&6 -fi - - - - - echo $ac_n "checking whether your make is GNU make""... $ac_c" 1>&6 -echo "configure:814: checking whether your make is GNU make" >&5 - foundGNUmake='nope, assuming sysv make.' ; - EXEC_DEPENDENCY=\$\$\@_main.o ; - if ( make --version nothing 2> /dev/null | grep GNU > /dev/null ) ; then - foundGNUmake='yes, it is.' ; - EXEC_DEPENDENCY='%: %_main.o' ; - fi - echo "$ac_t""$foundGNUmake" 1>&6 - - - -# Check whether --with-pvm or --without-pvm was given. -if test "${with_pvm+set}" = set; then - withval="$with_pvm" - case $with_pvm in - yes) echo 'Configuring for PVM' - PVMLIBDIR="-L${PVM_ROOT}/lib/${PVM_ARCH}" - PVMINCDIR="-I${PVM_ROOT}/include" - PVMFLAG="-DSRE_ENABLE_PVM" - PVMLIBS="-lpvm3" - ;; - no) ;; - *) echo "Ignoring unknown argument to --with-pvm: $with_pvm" - ;; -esac -fi - - -echo $ac_n "checking whether byte ordering is bigendian""... $ac_c" 1>&6 -echo "configure:843: checking whether byte ordering is bigendian" >&5 -if eval "test \"`echo '$''{'ac_cv_c_bigendian'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - ac_cv_c_bigendian=unknown -# See if sys/param.h defines the BYTE_ORDER macro. -cat > conftest.$ac_ext < -#include -int main() { - -#if !BYTE_ORDER || !BIG_ENDIAN || !LITTLE_ENDIAN - bogus endian macros -#endif -; return 0; } -EOF -if { (eval echo configure:861: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then - rm -rf conftest* - # It does; now see whether it defined to BIG_ENDIAN or not. -cat > conftest.$ac_ext < -#include -int main() { - -#if BYTE_ORDER != BIG_ENDIAN - not big endian -#endif -; return 0; } -EOF -if { (eval echo configure:876: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then - rm -rf conftest* - ac_cv_c_bigendian=yes -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - ac_cv_c_bigendian=no -fi -rm -f conftest* -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 -fi -rm -f conftest* -if test $ac_cv_c_bigendian = unknown; then -if test "$cross_compiling" = yes; then - { echo "configure: error: can not run test program while cross compiling" 1>&2; exit 1; } -else - cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null -then - ac_cv_c_bigendian=no -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -fr conftest* - ac_cv_c_bigendian=yes -fi -rm -fr conftest* -fi - -fi -fi - -echo "$ac_t""$ac_cv_c_bigendian" 1>&6 -if test $ac_cv_c_bigendian = yes; then - cat >> confdefs.h <<\EOF -#define WORDS_BIGENDIAN 1 -EOF - -fi - -for ac_func in ntohs -do -echo $ac_n "checking for $ac_func""... $ac_c" 1>&6 -echo "configure:935: checking for $ac_func" >&5 -if eval "test \"`echo '$''{'ac_cv_func_$ac_func'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -/* Override any gcc2 internal prototype to avoid an error. */ -/* We use char because int might match the return type of a gcc2 - builtin and then its argument prototype would still apply. */ -char $ac_func(); - -int main() { - -/* The GNU C library defines this for functions which it implements - to always fail with ENOSYS. Some functions are actually named - something starting with __ and the normal name is an alias. */ -#if defined (__stub_$ac_func) || defined (__stub___$ac_func) -choke me -#else -$ac_func(); -#endif - -; return 0; } -EOF -if { (eval echo configure:963: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then - rm -rf conftest* - eval "ac_cv_func_$ac_func=yes" -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - eval "ac_cv_func_$ac_func=no" -fi -rm -f conftest* -fi - -if eval "test \"`echo '$ac_cv_func_'$ac_func`\" = yes"; then - echo "$ac_t""yes" 1>&6 - ac_tr_func=HAVE_`echo $ac_func | tr 'abcdefghijklmnopqrstuvwxyz' 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'` - cat >> confdefs.h <&6 -echo $ac_n "checking for ntohs in -lsocket""... $ac_c" 1>&6 -echo "configure:985: checking for ntohs in -lsocket" >&5 -ac_lib_var=`echo socket'_'ntohs | sed 'y%./+-%__p_%'` -if eval "test \"`echo '$''{'ac_cv_lib_$ac_lib_var'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - ac_save_LIBS="$LIBS" -LIBS="-lsocket $LIBS" -cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then - rm -rf conftest* - eval "ac_cv_lib_$ac_lib_var=yes" -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - eval "ac_cv_lib_$ac_lib_var=no" -fi -rm -f conftest* -LIBS="$ac_save_LIBS" - -fi -if eval "test \"`echo '$ac_cv_lib_'$ac_lib_var`\" = yes"; then - echo "$ac_t""yes" 1>&6 - ac_tr_lib=HAVE_LIB`echo socket | sed -e 's/^a-zA-Z0-9_/_/g' \ - -e 'y/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/'` - cat >> confdefs.h <&6 -fi - -fi -done - -for ac_func in ntohl -do -echo $ac_n "checking for $ac_func""... $ac_c" 1>&6 -echo "configure:1037: checking for $ac_func" >&5 -if eval "test \"`echo '$''{'ac_cv_func_$ac_func'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -/* Override any gcc2 internal prototype to avoid an error. */ -/* We use char because int might match the return type of a gcc2 - builtin and then its argument prototype would still apply. */ -char $ac_func(); - -int main() { - -/* The GNU C library defines this for functions which it implements - to always fail with ENOSYS. Some functions are actually named - something starting with __ and the normal name is an alias. */ -#if defined (__stub_$ac_func) || defined (__stub___$ac_func) -choke me -#else -$ac_func(); -#endif - -; return 0; } -EOF -if { (eval echo configure:1065: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then - rm -rf conftest* - eval "ac_cv_func_$ac_func=yes" -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - eval "ac_cv_func_$ac_func=no" -fi -rm -f conftest* -fi - -if eval "test \"`echo '$ac_cv_func_'$ac_func`\" = yes"; then - echo "$ac_t""yes" 1>&6 - ac_tr_func=HAVE_`echo $ac_func | tr 'abcdefghijklmnopqrstuvwxyz' 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'` - cat >> confdefs.h <&6 -echo $ac_n "checking for ntohl in -lsocket""... $ac_c" 1>&6 -echo "configure:1087: checking for ntohl in -lsocket" >&5 -ac_lib_var=`echo socket'_'ntohl | sed 'y%./+-%__p_%'` -if eval "test \"`echo '$''{'ac_cv_lib_$ac_lib_var'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - ac_save_LIBS="$LIBS" -LIBS="-lsocket $LIBS" -cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then - rm -rf conftest* - eval "ac_cv_lib_$ac_lib_var=yes" -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - eval "ac_cv_lib_$ac_lib_var=no" -fi -rm -f conftest* -LIBS="$ac_save_LIBS" - -fi -if eval "test \"`echo '$ac_cv_lib_'$ac_lib_var`\" = yes"; then - echo "$ac_t""yes" 1>&6 - ac_tr_lib=HAVE_LIB`echo socket | sed -e 's/^a-zA-Z0-9_/_/g' \ - -e 'y/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/'` - cat >> confdefs.h <&6 -fi - -fi -done - -for ac_func in htons -do -echo $ac_n "checking for $ac_func""... $ac_c" 1>&6 -echo "configure:1139: checking for $ac_func" >&5 -if eval "test \"`echo '$''{'ac_cv_func_$ac_func'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -/* Override any gcc2 internal prototype to avoid an error. */ -/* We use char because int might match the return type of a gcc2 - builtin and then its argument prototype would still apply. */ -char $ac_func(); - -int main() { - -/* The GNU C library defines this for functions which it implements - to always fail with ENOSYS. Some functions are actually named - something starting with __ and the normal name is an alias. */ -#if defined (__stub_$ac_func) || defined (__stub___$ac_func) -choke me -#else -$ac_func(); -#endif - -; return 0; } -EOF -if { (eval echo configure:1167: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then - rm -rf conftest* - eval "ac_cv_func_$ac_func=yes" -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - eval "ac_cv_func_$ac_func=no" -fi -rm -f conftest* -fi - -if eval "test \"`echo '$ac_cv_func_'$ac_func`\" = yes"; then - echo "$ac_t""yes" 1>&6 - ac_tr_func=HAVE_`echo $ac_func | tr 'abcdefghijklmnopqrstuvwxyz' 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'` - cat >> confdefs.h <&6 -echo $ac_n "checking for htons in -lsocket""... $ac_c" 1>&6 -echo "configure:1189: checking for htons in -lsocket" >&5 -ac_lib_var=`echo socket'_'htons | sed 'y%./+-%__p_%'` -if eval "test \"`echo '$''{'ac_cv_lib_$ac_lib_var'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - ac_save_LIBS="$LIBS" -LIBS="-lsocket $LIBS" -cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then - rm -rf conftest* - eval "ac_cv_lib_$ac_lib_var=yes" -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - eval "ac_cv_lib_$ac_lib_var=no" -fi -rm -f conftest* -LIBS="$ac_save_LIBS" - -fi -if eval "test \"`echo '$ac_cv_lib_'$ac_lib_var`\" = yes"; then - echo "$ac_t""yes" 1>&6 - ac_tr_lib=HAVE_LIB`echo socket | sed -e 's/^a-zA-Z0-9_/_/g' \ - -e 'y/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/'` - cat >> confdefs.h <&6 -fi - -fi -done - -for ac_func in htonl -do -echo $ac_n "checking for $ac_func""... $ac_c" 1>&6 -echo "configure:1241: checking for $ac_func" >&5 -if eval "test \"`echo '$''{'ac_cv_func_$ac_func'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -/* Override any gcc2 internal prototype to avoid an error. */ -/* We use char because int might match the return type of a gcc2 - builtin and then its argument prototype would still apply. */ -char $ac_func(); - -int main() { - -/* The GNU C library defines this for functions which it implements - to always fail with ENOSYS. Some functions are actually named - something starting with __ and the normal name is an alias. */ -#if defined (__stub_$ac_func) || defined (__stub___$ac_func) -choke me -#else -$ac_func(); -#endif - -; return 0; } -EOF -if { (eval echo configure:1269: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then - rm -rf conftest* - eval "ac_cv_func_$ac_func=yes" -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - eval "ac_cv_func_$ac_func=no" -fi -rm -f conftest* -fi - -if eval "test \"`echo '$ac_cv_func_'$ac_func`\" = yes"; then - echo "$ac_t""yes" 1>&6 - ac_tr_func=HAVE_`echo $ac_func | tr 'abcdefghijklmnopqrstuvwxyz' 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'` - cat >> confdefs.h <&6 -echo $ac_n "checking for htonl in -lsocket""... $ac_c" 1>&6 -echo "configure:1291: checking for htonl in -lsocket" >&5 -ac_lib_var=`echo socket'_'htonl | sed 'y%./+-%__p_%'` -if eval "test \"`echo '$''{'ac_cv_lib_$ac_lib_var'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - ac_save_LIBS="$LIBS" -LIBS="-lsocket $LIBS" -cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then - rm -rf conftest* - eval "ac_cv_lib_$ac_lib_var=yes" -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - eval "ac_cv_lib_$ac_lib_var=no" -fi -rm -f conftest* -LIBS="$ac_save_LIBS" - -fi -if eval "test \"`echo '$ac_cv_lib_'$ac_lib_var`\" = yes"; then - echo "$ac_t""yes" 1>&6 - ac_tr_lib=HAVE_LIB`echo socket | sed -e 's/^a-zA-Z0-9_/_/g' \ - -e 'y/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/'` - cat >> confdefs.h <&6 -fi - -fi -done - -echo $ac_n "checking size of unsigned short""... $ac_c" 1>&6 -echo "configure:1341: checking size of unsigned short" >&5 -if eval "test \"`echo '$''{'ac_cv_sizeof_unsigned_short'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - if test "$cross_compiling" = yes; then - ac_cv_sizeof_unsigned_short=2 -else - cat > conftest.$ac_ext < -main() -{ - FILE *f=fopen("conftestval", "w"); - if (!f) exit(1); - fprintf(f, "%d\n", sizeof(unsigned short)); - exit(0); -} -EOF -if { (eval echo configure:1360: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null -then - ac_cv_sizeof_unsigned_short=`cat conftestval` -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -fr conftest* - ac_cv_sizeof_unsigned_short=0 -fi -rm -fr conftest* -fi - -fi -echo "$ac_t""$ac_cv_sizeof_unsigned_short" 1>&6 -cat >> confdefs.h <&6 -echo "configure:1380: checking size of unsigned int" >&5 -if eval "test \"`echo '$''{'ac_cv_sizeof_unsigned_int'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - if test "$cross_compiling" = yes; then - ac_cv_sizeof_unsigned_int=4 -else - cat > conftest.$ac_ext < -main() -{ - FILE *f=fopen("conftestval", "w"); - if (!f) exit(1); - fprintf(f, "%d\n", sizeof(unsigned int)); - exit(0); -} -EOF -if { (eval echo configure:1399: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null -then - ac_cv_sizeof_unsigned_int=`cat conftestval` -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -fr conftest* - ac_cv_sizeof_unsigned_int=0 -fi -rm -fr conftest* -fi - -fi -echo "$ac_t""$ac_cv_sizeof_unsigned_int" 1>&6 -cat >> confdefs.h <&6 -echo "configure:1419: checking size of unsigned long" >&5 -if eval "test \"`echo '$''{'ac_cv_sizeof_unsigned_long'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - if test "$cross_compiling" = yes; then - ac_cv_sizeof_unsigned_long=4 -else - cat > conftest.$ac_ext < -main() -{ - FILE *f=fopen("conftestval", "w"); - if (!f) exit(1); - fprintf(f, "%d\n", sizeof(unsigned long)); - exit(0); -} -EOF -if { (eval echo configure:1438: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null -then - ac_cv_sizeof_unsigned_long=`cat conftestval` -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -fr conftest* - ac_cv_sizeof_unsigned_long=0 -fi -rm -fr conftest* -fi - -fi -echo "$ac_t""$ac_cv_sizeof_unsigned_long" 1>&6 -cat >> confdefs.h <&6 -echo "configure:1458: checking size of unsigned long long" >&5 -if eval "test \"`echo '$''{'ac_cv_sizeof_unsigned_long_long'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - if test "$cross_compiling" = yes; then - ac_cv_sizeof_unsigned_long_long=8 -else - cat > conftest.$ac_ext < -main() -{ - FILE *f=fopen("conftestval", "w"); - if (!f) exit(1); - fprintf(f, "%d\n", sizeof(unsigned long long)); - exit(0); -} -EOF -if { (eval echo configure:1477: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null -then - ac_cv_sizeof_unsigned_long_long=`cat conftestval` -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -fr conftest* - ac_cv_sizeof_unsigned_long_long=0 -fi -rm -fr conftest* -fi - -fi -echo "$ac_t""$ac_cv_sizeof_unsigned_long_long" 1>&6 -cat >> confdefs.h <&2 "No 16-bit int? Manually edit config file to typedef sqd_uint16." -fi -if test "$ac_cv_sizeof_unsigned_int" = "4"; then - SQD_UINT32="unsigned int " -elif test "$ac_cv_sizeof_unsigned_long" = "4"; then - SQD_UINT32="unsigned long " -else - SQD_UINT32="FIXME" - echo "configure: warning: " 1>&2 "No 32-bit int? Manually edit config file to typedef sqd_uint32." -fi -if test "$ac_cv_sizeof_unsigned_long" = "8"; then - SQD_UINT64="unsigned long " -elif test "$ac_cv_sizeof_unsigned_long_long" = "8"; then - SQD_UINT64="unsigned long long" -else - SQD_UINT64="FIXME" - echo "configure: warning: " 1>&2 "No 64-bit int? Manually edit config file to typedef sqd_uint64." -fi - - - - - - - - echo $ac_n "checking whether fpos_t is an arithmetic datatype""... $ac_c" 1>&6 -echo "configure:1526: checking whether fpos_t is an arithmetic datatype" >&5 - fpos_arithmetic="no." - cat > conftest.$ac_ext < -int main() { -int main(void) { fpos_t f1, f2; if (f1 == f2) f1 = 0;} -; return 0; } -EOF -if { (eval echo configure:1536: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then - rm -rf conftest* - cat >> confdefs.h <<\EOF -#define ARITHMETIC_FPOS_T 1 -EOF - - fpos_arithmetic="yes." -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 -fi -rm -f conftest* - echo "$ac_t""$fpos_arithmetic" 1>&6 - - -for ac_func in ftello fseeko -do -echo $ac_n "checking for $ac_func""... $ac_c" 1>&6 -echo "configure:1554: checking for $ac_func" >&5 -if eval "test \"`echo '$''{'ac_cv_func_$ac_func'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -/* Override any gcc2 internal prototype to avoid an error. */ -/* We use char because int might match the return type of a gcc2 - builtin and then its argument prototype would still apply. */ -char $ac_func(); - -int main() { - -/* The GNU C library defines this for functions which it implements - to always fail with ENOSYS. Some functions are actually named - something starting with __ and the normal name is an alias. */ -#if defined (__stub_$ac_func) || defined (__stub___$ac_func) -choke me -#else -$ac_func(); -#endif - -; return 0; } -EOF -if { (eval echo configure:1582: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then - rm -rf conftest* - eval "ac_cv_func_$ac_func=yes" -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - eval "ac_cv_func_$ac_func=no" -fi -rm -f conftest* -fi - -if eval "test \"`echo '$ac_cv_func_'$ac_func`\" = yes"; then - echo "$ac_t""yes" 1>&6 - ac_tr_func=HAVE_`echo $ac_func | tr 'abcdefghijklmnopqrstuvwxyz' 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'` - cat >> confdefs.h <&6 -fi -done - -for ac_func in ftello64 fseeko64 -do -echo $ac_n "checking for $ac_func""... $ac_c" 1>&6 -echo "configure:1609: checking for $ac_func" >&5 -if eval "test \"`echo '$''{'ac_cv_func_$ac_func'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -/* Override any gcc2 internal prototype to avoid an error. */ -/* We use char because int might match the return type of a gcc2 - builtin and then its argument prototype would still apply. */ -char $ac_func(); - -int main() { - -/* The GNU C library defines this for functions which it implements - to always fail with ENOSYS. Some functions are actually named - something starting with __ and the normal name is an alias. */ -#if defined (__stub_$ac_func) || defined (__stub___$ac_func) -choke me -#else -$ac_func(); -#endif - -; return 0; } -EOF -if { (eval echo configure:1637: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then - rm -rf conftest* - eval "ac_cv_func_$ac_func=yes" -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - eval "ac_cv_func_$ac_func=no" -fi -rm -f conftest* -fi - -if eval "test \"`echo '$ac_cv_func_'$ac_func`\" = yes"; then - echo "$ac_t""yes" 1>&6 - ac_tr_func=HAVE_`echo $ac_func | tr 'abcdefghijklmnopqrstuvwxyz' 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'` - cat >> confdefs.h <&6 -fi -done - -for ac_func in ftell64 fseek64 -do -echo $ac_n "checking for $ac_func""... $ac_c" 1>&6 -echo "configure:1664: checking for $ac_func" >&5 -if eval "test \"`echo '$''{'ac_cv_func_$ac_func'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -/* Override any gcc2 internal prototype to avoid an error. */ -/* We use char because int might match the return type of a gcc2 - builtin and then its argument prototype would still apply. */ -char $ac_func(); - -int main() { - -/* The GNU C library defines this for functions which it implements - to always fail with ENOSYS. Some functions are actually named - something starting with __ and the normal name is an alias. */ -#if defined (__stub_$ac_func) || defined (__stub___$ac_func) -choke me -#else -$ac_func(); -#endif - -; return 0; } -EOF -if { (eval echo configure:1692: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then - rm -rf conftest* - eval "ac_cv_func_$ac_func=yes" -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - eval "ac_cv_func_$ac_func=no" -fi -rm -f conftest* -fi - -if eval "test \"`echo '$ac_cv_func_'$ac_func`\" = yes"; then - echo "$ac_t""yes" 1>&6 - ac_tr_func=HAVE_`echo $ac_func | tr 'abcdefghijklmnopqrstuvwxyz' 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'` - cat >> confdefs.h <&6 -fi -done - -echo $ac_n "checking for stat64""... $ac_c" 1>&6 -echo "configure:1717: checking for stat64" >&5 -if eval "test \"`echo '$''{'ac_cv_func_stat64'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -/* Override any gcc2 internal prototype to avoid an error. */ -/* We use char because int might match the return type of a gcc2 - builtin and then its argument prototype would still apply. */ -char stat64(); - -int main() { - -/* The GNU C library defines this for functions which it implements - to always fail with ENOSYS. Some functions are actually named - something starting with __ and the normal name is an alias. */ -#if defined (__stub_stat64) || defined (__stub___stat64) -choke me -#else -stat64(); -#endif - -; return 0; } -EOF -if { (eval echo configure:1745: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then - rm -rf conftest* - eval "ac_cv_func_stat64=yes" -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - eval "ac_cv_func_stat64=no" -fi -rm -f conftest* -fi - -if eval "test \"`echo '$ac_cv_func_'stat64`\" = yes"; then - echo "$ac_t""yes" 1>&6 - : -else - echo "$ac_t""no" 1>&6 -fi - -echo $ac_n "checking size of off_t""... $ac_c" 1>&6 -echo "configure:1765: checking size of off_t" >&5 -if eval "test \"`echo '$''{'ac_cv_sizeof_off_t'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - if test "$cross_compiling" = yes; then - { echo "configure: error: can not run test program while cross compiling" 1>&2; exit 1; } -else - cat > conftest.$ac_ext < -main() -{ - FILE *f=fopen("conftestval", "w"); - if (!f) exit(1); - fprintf(f, "%d\n", sizeof(off_t)); - exit(0); -} -EOF -if { (eval echo configure:1784: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null -then - ac_cv_sizeof_off_t=`cat conftestval` -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -fr conftest* - ac_cv_sizeof_off_t=0 -fi -rm -fr conftest* -fi - -fi -echo "$ac_t""$ac_cv_sizeof_off_t" 1>&6 -cat >> confdefs.h <&6 -echo "configure:1804: checking size of off64_t" >&5 -if eval "test \"`echo '$''{'ac_cv_sizeof_off64_t'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - if test "$cross_compiling" = yes; then - { echo "configure: error: can not run test program while cross compiling" 1>&2; exit 1; } -else - cat > conftest.$ac_ext < -main() -{ - FILE *f=fopen("conftestval", "w"); - if (!f) exit(1); - fprintf(f, "%d\n", sizeof(off64_t)); - exit(0); -} -EOF -if { (eval echo configure:1823: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null -then - ac_cv_sizeof_off64_t=`cat conftestval` -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -fr conftest* - ac_cv_sizeof_off64_t=0 -fi -rm -fr conftest* -fi - -fi -echo "$ac_t""$ac_cv_sizeof_off64_t" 1>&6 -cat >> confdefs.h <&6 -echo "configure:1843: checking size of fpos_t" >&5 -if eval "test \"`echo '$''{'ac_cv_sizeof_fpos_t'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - if test "$cross_compiling" = yes; then - { echo "configure: error: can not run test program while cross compiling" 1>&2; exit 1; } -else - cat > conftest.$ac_ext < -main() -{ - FILE *f=fopen("conftestval", "w"); - if (!f) exit(1); - fprintf(f, "%d\n", sizeof(fpos_t)); - exit(0); -} -EOF -if { (eval echo configure:1862: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null -then - ac_cv_sizeof_fpos_t=`cat conftestval` -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -fr conftest* - ac_cv_sizeof_fpos_t=0 -fi -rm -fr conftest* -fi - -fi -echo "$ac_t""$ac_cv_sizeof_fpos_t" 1>&6 -cat >> confdefs.h < confcache <<\EOF -# This file is a shell script that caches the results of configure -# tests run on this system so they can be shared between configure -# scripts and configure runs. It is not useful on other systems. -# If it contains results you don't want to keep, you may remove or edit it. -# -# By default, configure uses ./config.cache as the cache file, -# creating it if it does not exist already. You can give configure -# the --cache-file=FILE option to use a different cache file; that is -# what configure does when it calls configure scripts in -# subdirectories, so they share the cache. -# Giving --cache-file=/dev/null disables caching, for debugging configure. -# config.status only pays attention to the cache file if you give it the -# --recheck option to rerun configure. -# -EOF -# The following way of writing the cache mishandles newlines in values, -# but we know of no workaround that is simple, portable, and efficient. -# So, don't put newlines in cache variables' values. -# Ultrix sh set writes to stderr and can't be redirected directly, -# and sets the high bit in the cache file unless we assign to the vars. -(set) 2>&1 | - case `(ac_space=' '; set | grep ac_space) 2>&1` in - *ac_space=\ *) - # `set' does not quote correctly, so add quotes (double-quote substitution - # turns \\\\ into \\, and sed turns \\ into \). - sed -n \ - -e "s/'/'\\\\''/g" \ - -e "s/^\\([a-zA-Z0-9_]*_cv_[a-zA-Z0-9_]*\\)=\\(.*\\)/\\1=\${\\1='\\2'}/p" - ;; - *) - # `set' quotes correctly as required by POSIX, so do not add quotes. - sed -n -e 's/^\([a-zA-Z0-9_]*_cv_[a-zA-Z0-9_]*\)=\(.*\)/\1=${\1=\2}/p' - ;; - esac >> confcache -if cmp -s $cache_file confcache; then - : -else - if test -w $cache_file; then - echo "updating cache $cache_file" - cat confcache > $cache_file - else - echo "not updating unwritable cache $cache_file" - fi -fi -rm -f confcache - -trap 'rm -fr conftest* confdefs* core core.* *.core $ac_clean_files; exit 1' 1 2 15 - -test "x$prefix" = xNONE && prefix=$ac_default_prefix -# Let make expand exec_prefix. -test "x$exec_prefix" = xNONE && exec_prefix='${prefix}' - -# Any assignment to VPATH causes Sun make to only execute -# the first set of double-colon rules, so remove it if not needed. -# If there is a colon in the path, we need to keep it. -if test "x$srcdir" = x.; then - ac_vpsub='/^[ ]*VPATH[ ]*=[^:]*$/d' -fi - -trap 'rm -f $CONFIG_STATUS conftest*; exit 1' 1 2 15 - -DEFS=-DHAVE_CONFIG_H - -# Without the "./", some shells look in PATH for config.status. -: ${CONFIG_STATUS=./config.status} - -echo creating $CONFIG_STATUS -rm -f $CONFIG_STATUS -cat > $CONFIG_STATUS </dev/null | sed 1q`: -# -# $0 $ac_configure_args -# -# Compiler output produced by configure, useful for debugging -# configure, is in ./config.log if it exists. - -ac_cs_usage="Usage: $CONFIG_STATUS [--recheck] [--version] [--help]" -for ac_option -do - case "\$ac_option" in - -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r) - echo "running \${CONFIG_SHELL-/bin/sh} $0 $ac_configure_args --no-create --no-recursion" - exec \${CONFIG_SHELL-/bin/sh} $0 $ac_configure_args --no-create --no-recursion ;; - -version | --version | --versio | --versi | --vers | --ver | --ve | --v) - echo "$CONFIG_STATUS generated by autoconf version 2.13" - exit 0 ;; - -help | --help | --hel | --he | --h) - echo "\$ac_cs_usage"; exit 0 ;; - *) echo "\$ac_cs_usage"; exit 1 ;; - esac -done - -ac_given_srcdir=$srcdir - -trap 'rm -fr `echo "Makefile squid.h squidconf.h" | sed "s/:[^ ]*//g"` conftest*; exit 1' 1 2 15 -EOF -cat >> $CONFIG_STATUS < conftest.subs <<\\CEOF -$ac_vpsub -$extrasub -s%@SHELL@%$SHELL%g -s%@CFLAGS@%$CFLAGS%g -s%@CPPFLAGS@%$CPPFLAGS%g -s%@CXXFLAGS@%$CXXFLAGS%g -s%@FFLAGS@%$FFLAGS%g -s%@DEFS@%$DEFS%g -s%@LDFLAGS@%$LDFLAGS%g -s%@LIBS@%$LIBS%g -s%@exec_prefix@%$exec_prefix%g -s%@prefix@%$prefix%g -s%@program_transform_name@%$program_transform_name%g -s%@bindir@%$bindir%g -s%@sbindir@%$sbindir%g -s%@libexecdir@%$libexecdir%g -s%@datadir@%$datadir%g -s%@sysconfdir@%$sysconfdir%g -s%@sharedstatedir@%$sharedstatedir%g -s%@localstatedir@%$localstatedir%g -s%@libdir@%$libdir%g -s%@includedir@%$includedir%g -s%@oldincludedir@%$oldincludedir%g -s%@infodir@%$infodir%g -s%@mandir@%$mandir%g -s%@MDEFS@%$MDEFS%g -s%@PVMLIBDIR@%$PVMLIBDIR%g -s%@PVMINCDIR@%$PVMINCDIR%g -s%@PVMFLAG@%$PVMFLAG%g -s%@PVMLIBS@%$PVMLIBS%g -s%@CC@%$CC%g -s%@LN_S@%$LN_S%g -s%@RANLIB@%$RANLIB%g -s%@EXEC_DEPENDENCY@%$EXEC_DEPENDENCY%g -s%@SQD_UINT16@%$SQD_UINT16%g -s%@SQD_UINT32@%$SQD_UINT32%g -s%@SQD_UINT64@%$SQD_UINT64%g - -CEOF -EOF - -cat >> $CONFIG_STATUS <<\EOF - -# Split the substitutions into bite-sized pieces for seds with -# small command number limits, like on Digital OSF/1 and HP-UX. -ac_max_sed_cmds=90 # Maximum number of lines to put in a sed script. -ac_file=1 # Number of current file. -ac_beg=1 # First line for current file. -ac_end=$ac_max_sed_cmds # Line after last line for current file. -ac_more_lines=: -ac_sed_cmds="" -while $ac_more_lines; do - if test $ac_beg -gt 1; then - sed "1,${ac_beg}d; ${ac_end}q" conftest.subs > conftest.s$ac_file - else - sed "${ac_end}q" conftest.subs > conftest.s$ac_file - fi - if test ! -s conftest.s$ac_file; then - ac_more_lines=false - rm -f conftest.s$ac_file - else - if test -z "$ac_sed_cmds"; then - ac_sed_cmds="sed -f conftest.s$ac_file" - else - ac_sed_cmds="$ac_sed_cmds | sed -f conftest.s$ac_file" - fi - ac_file=`expr $ac_file + 1` - ac_beg=$ac_end - ac_end=`expr $ac_end + $ac_max_sed_cmds` - fi -done -if test -z "$ac_sed_cmds"; then - ac_sed_cmds=cat -fi -EOF - -cat >> $CONFIG_STATUS <> $CONFIG_STATUS <<\EOF -for ac_file in .. $CONFIG_FILES; do if test "x$ac_file" != x..; then - # Support "outfile[:infile[:infile...]]", defaulting infile="outfile.in". - case "$ac_file" in - *:*) ac_file_in=`echo "$ac_file"|sed 's%[^:]*:%%'` - ac_file=`echo "$ac_file"|sed 's%:.*%%'` ;; - *) ac_file_in="${ac_file}.in" ;; - esac - - # Adjust a relative srcdir, top_srcdir, and INSTALL for subdirectories. - - # Remove last slash and all that follows it. Not all systems have dirname. - ac_dir=`echo $ac_file|sed 's%/[^/][^/]*$%%'` - if test "$ac_dir" != "$ac_file" && test "$ac_dir" != .; then - # The file is in a subdirectory. - test ! -d "$ac_dir" && mkdir "$ac_dir" - ac_dir_suffix="/`echo $ac_dir|sed 's%^\./%%'`" - # A "../" for each directory in $ac_dir_suffix. - ac_dots=`echo $ac_dir_suffix|sed 's%/[^/]*%../%g'` - else - ac_dir_suffix= ac_dots= - fi - - case "$ac_given_srcdir" in - .) srcdir=. - if test -z "$ac_dots"; then top_srcdir=. - else top_srcdir=`echo $ac_dots|sed 's%/$%%'`; fi ;; - /*) srcdir="$ac_given_srcdir$ac_dir_suffix"; top_srcdir="$ac_given_srcdir" ;; - *) # Relative path. - srcdir="$ac_dots$ac_given_srcdir$ac_dir_suffix" - top_srcdir="$ac_dots$ac_given_srcdir" ;; - esac - - - echo creating "$ac_file" - rm -f "$ac_file" - configure_input="Generated automatically from `echo $ac_file_in|sed 's%.*/%%'` by configure." - case "$ac_file" in - *Makefile*) ac_comsub="1i\\ -# $configure_input" ;; - *) ac_comsub= ;; - esac - - ac_file_inputs=`echo $ac_file_in|sed -e "s%^%$ac_given_srcdir/%" -e "s%:% $ac_given_srcdir/%g"` - sed -e "$ac_comsub -s%@configure_input@%$configure_input%g -s%@srcdir@%$srcdir%g -s%@top_srcdir@%$top_srcdir%g -" $ac_file_inputs | (eval "$ac_sed_cmds") > $ac_file -fi; done -rm -f conftest.s* - -# These sed commands are passed to sed as "A NAME B NAME C VALUE D", where -# NAME is the cpp macro being defined and VALUE is the value it is being given. -# -# ac_d sets the value in "#define NAME VALUE" lines. -ac_dA='s%^\([ ]*\)#\([ ]*define[ ][ ]*\)' -ac_dB='\([ ][ ]*\)[^ ]*%\1#\2' -ac_dC='\3' -ac_dD='%g' -# ac_u turns "#undef NAME" with trailing blanks into "#define NAME VALUE". -ac_uA='s%^\([ ]*\)#\([ ]*\)undef\([ ][ ]*\)' -ac_uB='\([ ]\)%\1#\2define\3' -ac_uC=' ' -ac_uD='\4%g' -# ac_e turns "#undef NAME" without trailing blanks into "#define NAME VALUE". -ac_eA='s%^\([ ]*\)#\([ ]*\)undef\([ ][ ]*\)' -ac_eB='$%\1#\2define\3' -ac_eC=' ' -ac_eD='%g' - -if test "${CONFIG_HEADERS+set}" != set; then -EOF -cat >> $CONFIG_STATUS <> $CONFIG_STATUS <<\EOF -fi -for ac_file in .. $CONFIG_HEADERS; do if test "x$ac_file" != x..; then - # Support "outfile[:infile[:infile...]]", defaulting infile="outfile.in". - case "$ac_file" in - *:*) ac_file_in=`echo "$ac_file"|sed 's%[^:]*:%%'` - ac_file=`echo "$ac_file"|sed 's%:.*%%'` ;; - *) ac_file_in="${ac_file}.in" ;; - esac - - echo creating $ac_file - - rm -f conftest.frag conftest.in conftest.out - ac_file_inputs=`echo $ac_file_in|sed -e "s%^%$ac_given_srcdir/%" -e "s%:% $ac_given_srcdir/%g"` - cat $ac_file_inputs > conftest.in - -EOF - -# Transform confdefs.h into a sed script conftest.vals that substitutes -# the proper values into config.h.in to produce config.h. And first: -# Protect against being on the right side of a sed subst in config.status. -# Protect against being in an unquoted here document in config.status. -rm -f conftest.vals -cat > conftest.hdr <<\EOF -s/[\\&%]/\\&/g -s%[\\$`]%\\&%g -s%#define \([A-Za-z_][A-Za-z0-9_]*\) *\(.*\)%${ac_dA}\1${ac_dB}\1${ac_dC}\2${ac_dD}%gp -s%ac_d%ac_u%gp -s%ac_u%ac_e%gp -EOF -sed -n -f conftest.hdr confdefs.h > conftest.vals -rm -f conftest.hdr - -# This sed command replaces #undef with comments. This is necessary, for -# example, in the case of _POSIX_SOURCE, which is predefined and required -# on some systems where configure will not decide to define it. -cat >> conftest.vals <<\EOF -s%^[ ]*#[ ]*undef[ ][ ]*[a-zA-Z_][a-zA-Z_0-9]*%/* & */% -EOF - -# Break up conftest.vals because some shells have a limit on -# the size of here documents, and old seds have small limits too. - -rm -f conftest.tail -while : -do - ac_lines=`grep -c . conftest.vals` - # grep -c gives empty output for an empty file on some AIX systems. - if test -z "$ac_lines" || test "$ac_lines" -eq 0; then break; fi - # Write a limited-size here document to conftest.frag. - echo ' cat > conftest.frag <> $CONFIG_STATUS - sed ${ac_max_here_lines}q conftest.vals >> $CONFIG_STATUS - echo 'CEOF - sed -f conftest.frag conftest.in > conftest.out - rm -f conftest.in - mv conftest.out conftest.in -' >> $CONFIG_STATUS - sed 1,${ac_max_here_lines}d conftest.vals > conftest.tail - rm -f conftest.vals - mv conftest.tail conftest.vals -done -rm -f conftest.vals - -cat >> $CONFIG_STATUS <<\EOF - rm -f conftest.frag conftest.h - echo "/* $ac_file. Generated automatically by configure. */" > conftest.h - cat conftest.in >> conftest.h - rm -f conftest.in - if cmp -s $ac_file conftest.h 2>/dev/null; then - echo "$ac_file is unchanged" - rm -f conftest.h - else - # Remove last slash and all that follows it. Not all systems have dirname. - ac_dir=`echo $ac_file|sed 's%/[^/][^/]*$%%'` - if test "$ac_dir" != "$ac_file" && test "$ac_dir" != .; then - # The file is in a subdirectory. - test ! -d "$ac_dir" && mkdir "$ac_dir" - fi - rm -f $ac_file - mv conftest.h $ac_file - fi -fi; done - -EOF -cat >> $CONFIG_STATUS <> $CONFIG_STATUS <<\EOF - -exit 0 -EOF -chmod +x $CONFIG_STATUS -rm -fr confdefs* $ac_clean_files -test "$no_create" = yes || ${CONFIG_SHELL-/bin/sh} $CONFIG_STATUS || exit 1 - - - diff --git a/forester/archive/RIO/others/hmmer/squid/dayhoff.c b/forester/archive/RIO/others/hmmer/squid/dayhoff.c deleted file mode 100644 index 906fb76..0000000 --- a/forester/archive/RIO/others/hmmer/squid/dayhoff.c +++ /dev/null @@ -1,171 +0,0 @@ -/***************************************************************** - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - *****************************************************************/ - -/* dayhoff.c - * - * Routines for dealing with PAM matrices. - * - * Includes: - * ParsePAMFile() -- read a PAM matrix from disk. - * - * - * SRE - Fri Apr 2 11:23:45 1993 - * RCS $Id: dayhoff.c,v 1.1.1.1 2005/03/22 08:34:17 cmzmasek Exp $ - */ - - -#include -#include -#include -#include -#include -#include "squid.h" - -/* Function: ParsePAMFile() - * - * Purpose: Given a pointer to an open file containing a PAM matrix, - * parse the file and allocate and fill a 2D array of - * floats containing the matrix. The PAM file is - * assumed to be in the format that NCBI distributes - * with BLAST. BLOSUM matrices also work fine, as - * produced by Henikoff's program "MATBLAS". - * - * Parses both old format and new format BLAST matrices. - * Old format just had rows of integers. - * New format includes a leading character on each row. - * - * The PAM matrix is a 27x27 matrix, 0=A..25=Z,26=*. - * Note that it's not a 20x20 matrix as you might expect; - * this is for speed of indexing as well as the ability - * to deal with ambiguous characters. - * - * Args: fp - open PAM file - * ret_pam - RETURN: pam matrix, integers - * ret_scale - RETURN: scale factor for converting - * to real Sij. For instance, PAM120 is - * given in units of ln(2)/2. This may - * be passed as NULL if the caller - * doesn't care. - * - * Returns: 1 on success; 0 on failure and sets squid_errno to - * indicate the cause. ret_pam is allocated here and - * must be freed by the caller (use FreePAM). - */ -int -ParsePAMFile(FILE *fp, int ***ret_pam, float *ret_scale) -{ - int **pam; - char buffer[512]; /* input buffer from fp */ - int order[27]; /* order of fields, obtained from header */ - int nsymbols; /* total number of symbols in matrix */ - char *sptr; - int idx; - int row, col; - float scale; - int gotscale = FALSE; - - if (fp == NULL) { squid_errno = SQERR_NODATA; return 0; } - - /* Look at the first non-blank, non-comment line in the file. - * It gives single-letter codes in the order the PAM matrix - * is arrayed in the file. - */ - do { - if (fgets(buffer, 512, fp) == NULL) - { squid_errno = SQERR_NODATA; return 0; } - - /* Get the scale factor from the header. - * For BLOSUM files, we assume the line looks like: - * BLOSUM Clustered Scoring Matrix in 1/2 Bit Units - * and we assume that the fraction is always 1/x; - * - * For PAM files, we assume the line looks like: - * PAM 120 substitution matrix, scale = ln(2)/2 = 0.346574 - * and we assume that the number following the final '=' is our scale - */ - scale = 0.0; /* just to silence gcc uninit warnings */ - if (strstr(buffer, "BLOSUM Clustered Scoring Matrix") != NULL && - (sptr = strchr(buffer, '/')) != NULL) - { - sptr++; - if (! isdigit((int) (*sptr))) { squid_errno = SQERR_FORMAT; return 0; } - scale = (float) (log(2.0) / atof(sptr)); - gotscale = TRUE; - } - else if (strstr(buffer, "substitution matrix,") != NULL) - { - while ((sptr = strrchr(buffer, '=')) != NULL) { - sptr += 2; - if (IsReal(sptr)) { - scale = atof(sptr); - gotscale = TRUE; - break; - } - } - } - } while ((sptr = strtok(buffer, " \t\n")) == NULL || *sptr == '#'); - - idx = 0; - do { - order[idx] = (int) *sptr - (int) 'A'; - if (order[idx] < 0 || order[idx] > 25) order[idx] = 26; - idx++; - } while ((sptr = strtok(NULL, " \t\n")) != NULL); - nsymbols = idx; - - /* Allocate a pam matrix. For speed of indexing, we use - * a 27x27 matrix so we can do lookups using the ASCII codes - * of amino acid single-letter representations, plus one - * extra field to deal with the "*" (terminators). - */ - if ((pam = (int **) calloc (27, sizeof(int *))) == NULL) - Die("calloc failed"); - for (idx = 0; idx < 27; idx++) - if ((pam[idx] = (int *) calloc (27, sizeof(int))) == NULL) - Die("calloc failed"); - - /* Parse the rest of the file. - */ - for (row = 0; row < nsymbols; row++) - { - if (fgets(buffer, 512, fp) == NULL) - { squid_errno = SQERR_NODATA; return 0; } - - if ((sptr = strtok(buffer, " \t\n")) == NULL) - { squid_errno = SQERR_NODATA; return 0; } - for (col = 0; col < nsymbols; col++) - { - if (sptr == NULL) { squid_errno = SQERR_NODATA; return 0; } - - /* Watch out for new BLAST format, with leading characters - */ - if (*sptr == '*' || isalpha((int) *sptr)) - col--; /* hack hack */ - else - pam [order[row]] [order[col]] = atoi(sptr); - - sptr = strtok(NULL, " \t\n"); - } - } - - /* Return - */ - if (ret_scale != NULL) - { - if (gotscale) *ret_scale = scale; - else - { - Warn("Failed to parse PAM matrix scale factor. Defaulting to ln(2)/2!"); - *ret_scale = log(2.0) / 2.0; - } - } - *ret_pam = pam; - return 1; -} diff --git a/forester/archive/RIO/others/hmmer/squid/eps.c b/forester/archive/RIO/others/hmmer/squid/eps.c deleted file mode 100644 index 849c8f1..0000000 --- a/forester/archive/RIO/others/hmmer/squid/eps.c +++ /dev/null @@ -1,115 +0,0 @@ -/***************************************************************** - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - *****************************************************************/ - -/* eps.c - * SRE, Thu Jun 21 18:02:31 2001 [St. Louis] - * - * Some crude support for Encapsulated PostScript (EPS) output, - * DSC compliant. - * - * CVS $Id: eps.c,v 1.1.1.1 2005/03/22 08:34:17 cmzmasek Exp $ - */ - -#include -#include - -#include "squid.h" -#include "msa.h" - -/* Function: EPSWriteSmallMSA() - * Date: SRE, Thu Jun 21 18:15:21 2001 [St. Louis] - * - * Purpose: Write an alignment in singleblock, Stockholm/SELEX like - * format to an open file. Very crude. - * Currently fails if the alignment is >50 columns long, because - * it doesn't think it will fit on a single page. - * - * Args: fp - open file for writing - * msa - alignment to write - * - * Returns: (void) - */ -void -EPSWriteSmallMSA(FILE *fp, MSA *msa) -{ - int namewidth; /* namewidth in PostScript units */ - int fontwidth; /* width of a character in this font */ - int hspace; /* horizontal space between aligned chars */ - int vspace; /* vertical space between sequences */ - char *font; /* font name, e.g. "Courier" */ - int fontsize; /* font size in pts */ - int i,j; /* counter over sequences, columns */ - int len; /* tmp var holding length of something */ - int width, height; /* width and height of bounding box */ - int xpos, ypos; /* x,y position */ - - /* Set some font characteristics; done here, so it'll - * be easy to change. Magic numbers for Courier 12 determined - * by trial and error. - */ - fontwidth = 8; - hspace = 9; - vspace = 15; - font = sre_strdup("Courier", -1); - fontsize = 12; - - /* Find the width of the longest sequence name in characters. - */ - namewidth = 0; - for (i = 0; i < msa->nseq; i++) - if ((len = (int) strlen(msa->sqname[i])) > namewidth) - namewidth = len; - namewidth += 1; /* add a space to separate name & aligned seq */ - namewidth *= fontwidth; - - /* Determine bounding box - */ - if (msa->alen > 50) Die("No EPS fmt if alignment is >50 columns"); - width = namewidth + hspace*msa->alen; - if (width > 612) Die("Alignment too wide to write in EPS"); - height = vspace*msa->nseq; - if (height > 792) Die("Too many seqs to write in EPS"); - - /* Magic EPS header, bare-bones DSC-compliant. - */ - fprintf(fp, "%%!PS-Adobe-3.0 EPSF-3.0\n"); - fprintf(fp, "%%%%BoundingBox: %d %d %d %d\n", 0, 0, width, height); - fprintf(fp, "%%%%Pages: 1\n"); - fprintf(fp, "%%%%EndComments\n"); - - /* More postscript magic before we start the alignment - */ - fprintf(fp, "/%s findfont\n", font); - fprintf(fp, "%d scalefont\n", fontsize); - fprintf(fp, "setfont\n"); - fprintf(fp, "newpath\n"); - - /* Write the alignment in PostScript in a single block - */ - for (i = 0; i < msa->nseq; i++) - { - ypos = (msa->nseq-i-1)*vspace; - /* name first */ - fprintf(fp, "%d %d moveto\n", 0, ypos); - fprintf(fp, "(%s) show\n", msa->sqname[i]); - /* now seq */ - xpos = namewidth; - for (j = 0; j < msa->alen; j++) - { - fprintf(fp, "%d %d moveto\n", xpos, ypos); - fprintf(fp, "(%c) show\n", msa->aseq[i][j]); - xpos+= hspace; - } - } - - free(font); -} - - diff --git a/forester/archive/RIO/others/hmmer/squid/file.c b/forester/archive/RIO/others/hmmer/squid/file.c deleted file mode 100644 index ec3647a..0000000 --- a/forester/archive/RIO/others/hmmer/squid/file.c +++ /dev/null @@ -1,231 +0,0 @@ -/***************************************************************** - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - *****************************************************************/ - - -/* file.c - * SRE, Wed Jun 19 11:19:22 1996 - * - * File operation utilities, dealing with pathnames, directories, - * and environment variables. - * - * The goal is to have these be platform-independent but they - * currently are UNIX-specific: i.e. this file is currently POSIX compliant - * but it is NOT ANSI C compliant. (The sole offender is getenv().) - * - * RCS $Id: file.c,v 1.1.1.1 2005/03/22 08:34:26 cmzmasek Exp $ - */ - -#include -#include -#include - -#include "squid.h" -#include "sqfuncs.h" - -/* - * VMS: #define DIRSLASH ']' - * MacOS: #define DIRSLASH ':' - * DOS: #define DIRSLASH '\\' - * - * The code assumes that '.' is used for file name extensions, - * such as "foo.bar". - */ -#define DIRSLASH '/' /* UNIX directory paths have /foo/bar */ - - - -/* Function: FileDirname() - * - * Purpose: Returns the path from a filename: - * "/foo/bar/baz" -> "/foo/bar" - * "foo/bar" -> "foo" - * "foo" -> "." - * "/" -> "/" - * i.e. the string will be non-NULL; it will - * contain the string up to but not including the - * last '/' character; returns "." if - * there are no '/' characters, and returns "/" - * if the last slash is the first character. - * Modeled on Tcl's "file dirname" command. - * - * Args: file - name of file "/foo/bar/baz". - * - * Return: ptr to malloc'ed string "/foo/bar". - */ -char * -FileDirname(char *file) -{ - char *dirname; - char *lastslash; - int len; - - lastslash = strrchr(file, DIRSLASH); - len = (lastslash == NULL) ? 0 : (int) (lastslash - file); - dirname = (char *) MallocOrDie (sizeof(char) * (len+2)); - if (len > 0) strncpy(dirname, file, len); - else if (*file != DIRSLASH) { *dirname = '.'; len = 1; } - else { *dirname = DIRSLASH; len = 1; } - dirname[len] = '\0'; - return dirname; -} - - -/* Function: FileTail() - * - * Purpose: Return everything after the DIRSLASH: - * "/foo/bar/baz.1" -> "baz.1" - * "foo/bar" -> "bar" - * "foo" -> "foo" - * "/" -> "" - * If noextension is TRUE, removes a trailing ".foo" extension - * too. - * - * Args: file - name of file "/foo/bar/baz.1" - * noextension - TRUE to also remove extensions - * - * Return: ptr to malloc'ed string "baz.1" - */ -char * -FileTail(char *file, int noextension) -{ - char *tail; - char *lastslash; - char *lastdot; - /* remove directory prefix */ - lastslash = strrchr(file, DIRSLASH); - tail = (char *) MallocOrDie (sizeof(char) * (strlen(file)+1)); - if (lastslash == NULL) strcpy(tail, file); - else strcpy(tail, lastslash+1); - /* remove trailing suffix */ - if (noextension) { - if ((lastdot = strrchr(tail, '.')) != NULL) - *lastdot = '\0'; - } - - return tail; -} - - -/* Function: FileConcat() - * - * Purpose: Concatenate a directory path and a file name, - * returning a pointer to a malloc'ed string with the - * full filename. - */ -char * -FileConcat(char *dir, char *file) -{ - char *full; - - full = (char *) MallocOrDie (sizeof(char) * (strlen(dir)+strlen(file)+2)); - if (*file == DIRSLASH) strcpy(full, file); /* file = "/foo", ignore directory. */ - else sprintf(full, "%s%c%s", dir, DIRSLASH, file); - return full; -} - - -/* Function: FileAddSuffix() - * Date: SRE, Wed Aug 1 11:19:33 2001 [Pasadena] - * - * Purpose: Add a suffix to a filename, return a malloc'ed - * string containing the new filename.sfx name. - * Example: - * FileAddSuffix("genbank", "ssi") - * returns "genbank.ssi". - */ -char * -FileAddSuffix(char *filename, char *sfx) -{ - char *new; - new = MallocOrDie(strlen(filename) + strlen(sfx) + 2); - sprintf(new, "%s.%s", filename, sfx); - return new; -} - -/* Function: EnvFileOpen() - * Date: Sun Feb 12 10:55:29 1995 - * - * Purpose: Open a file, given a file name and an environment - * variable that contains a directory path. Files - * are opened read-only. Does not look at current directory - * unless "." is explicitly in the path specified by env. - * - * For instance: - * fp = EnvFileOpen("BLOSUM45", "BLASTMAT", NULL); - * or: - * fp = EnvFileOpen("swiss", "BLASTDB", NULL); - * - * Environment variables may contain a colon-delimited - * list of more than one path; e.g. - * setenv BLASTDB /nfs/databases/foo:/nfs/databases/bar - * - * Sometimes a group of files may be found in - * one directory; for instance, an index file with a - * database. The caller can EnvFileOpen() the main - * file, and ask to get the name of the - * directory back in ret_dir, so it can construct - * the other auxiliary file names and fopen() them. (If it called - * EnvFileOpen(), it might get confused by - * file name clashes and open files in different - * directories. - * - * Args: fname - name of file to open - * env - name of environment variable containing path - * ret_dir - if non-NULL, RETURN: name of dir that was used. - * - * Return: FILE * to open file, or NULL on failure -- same as fopen() - * Caller must free ret_dir if it passed a non-NULL address. - */ -FILE * -EnvFileOpen(char *fname, char *env, char **ret_dir) -{ - FILE *fp; - char *path; - char *s; /* ptr to indiv element in env list */ - char full[1024]; /* constructed file name */ - - if (env == NULL) return NULL; - if ((path = Strdup(getenv(env))) == NULL) return NULL; - - fp = NULL; - s = strtok(path, ":"); - while (s != NULL) - { - if (((int) strlen(fname) + (int) strlen(s) + 2) > 1024) - { free(path); return NULL; } - sprintf(full, "%s%c%s", s, DIRSLASH, fname); - if ((fp = fopen(full, "r")) != NULL) break; - s = strtok(NULL, ":"); - } - - /* Return the path we used, if caller wants it - */ - if (ret_dir != NULL) *ret_dir = Strdup(s); - free(path); - - return fp; -} - - -/* Function: FileExists() - * - * Purpose: Return TRUE if filename exists. - * Testing fopen() is the only possible platform-independent test - * I'm aware of. - */ -int -FileExists(char *filename) -{ - FILE *fp; - if ((fp = fopen(filename, "r"))) { fclose(fp); return TRUE; } - return FALSE; -} - - diff --git a/forester/archive/RIO/others/hmmer/squid/getopt.c b/forester/archive/RIO/others/hmmer/squid/getopt.c deleted file mode 100644 index 75158f7..0000000 --- a/forester/archive/RIO/others/hmmer/squid/getopt.c +++ /dev/null @@ -1,251 +0,0 @@ -/***************************************************************** - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - *****************************************************************/ - -/* RCS $Id: getopt.c,v 1.1.1.1 2005/03/22 08:34:26 cmzmasek Exp $ - */ - -#include -#include -#include - -#include "squid.h" - -/* Function: Getopt() - * - * Purpose: Portable command line option parsing with abbreviated - * option switches. Replaces UNIX getopt(). Using UNIX getopt() - * hinders portability to non-UNIX platforms, and getopt() - * is also limited to single letter options. - * - * Getopt() implements a superset of UNIX getopt(). - * All of getopt()'s single-character switch behavior - * is emulated, and "--" by itself terminates the options. - * Additionally, Getopt() provides extended switches - * like "--youroptionhere", and Getopt() type checks - * arguments. - * - * Extended options must start with "--", as in "--option1". - * Normal options must start with "-", as in "-o". - * Normal options may be concatenated, as in "-a -b" == "-ab". - * - * See bottom of this .c file after #fdef GETOPT_TESTDRIVER - * for an example of calling Getopt(). - * - * Args: argc - from main(). number of elems in argv. - * argv - from main(). argv[0] is the name of the command. - * opt - array of opt_s structures, defining option switches - * nopts - number of switches in opt - * usage - a (possibly long) string to print if usage error. - * ret_optind - RETURN: the index in argv[] of the next - * valid command-line token. - * ret_optname- RETURN: ptr to the name of option switch - * seen, or NULL if no option was seen. - * ret_optarg - RETURN: ptr to the optional argument, if any; - * NULL if option takes no argument. - * - * Return: 1 if a valid option was parsed. - * 0 if no option was found, and command-line parsing is complete. - * Die()'s here if an error is detected. - */ -int -Getopt(int argc, char **argv, struct opt_s *opt, int nopts, char *usage, - int *ret_optind, char **ret_optname, char **ret_optarg) -{ - int i; - int arglen; - int nmatch; - static int optind = 1; /* init to 1 on first call */ - static char *optptr = NULL; /* ptr to next valid switch */ - int opti = 0; /* init only to silence gcc uninit warnings */ - - /* Check to see if we've run out of options. - * A '-' by itself is an argument (e.g. "read from stdin") - * not an option. - */ - if (optind >= argc || argv[optind][0] != '-' || strcmp(argv[optind], "-") == 0) - { - *ret_optind = optind; - *ret_optarg = NULL; - *ret_optname = NULL; - return 0; - } - - /* Check to see if we're being told that this is the end - * of the options with the special "--" flag. - */ - if (strcmp(argv[optind], "--") == 0) - { - optind++; - *ret_optind = optind; - *ret_optname = NULL; - *ret_optarg = NULL; - return 0; - } - - /* We have a real option. Find which one it is. - * We handle single letter switches "-o" separately - * from full switches "--option", based on the "-" vs. "--" - * prefix -- single letter switches can be concatenated - * as long as they don't have arguments. - */ - /* full option */ - if (optptr == NULL && strncmp(argv[optind], "--", 2) == 0) - { - /* Use optptr to parse argument in options of form "--foo=666" - */ - if ((optptr = strchr(argv[optind], '=')) != NULL) - { *optptr = '\0'; optptr++; } - - arglen = strlen(argv[optind]); - nmatch = 0; - for (i = 0; i < nopts; i++) - if (opt[i].single == FALSE && - strncmp(opt[i].name, argv[optind], arglen) == 0) - { - nmatch++; - opti = i; - if (arglen == strlen(opt[i].name)) break; /* exact match, stop now */ - } - if (nmatch > 1 && arglen != strlen(opt[i].name)) - Die("Option \"%s\" is ambiguous; please be more specific.\n%s", - argv[optind], usage); - if (nmatch == 0) - Die("No such option \"%s\".\n%s", argv[optind], usage); - - *ret_optname = opt[opti].name; - - /* Set the argument, if there is one - */ - if (opt[opti].argtype != sqdARG_NONE) - { - if (optptr != NULL) - { /* --foo=666 style */ - *ret_optarg = optptr; - optptr = NULL; - optind++; - } - else if (optind+1 >= argc) - Die("Option %s requires an argument\n%s", opt[opti].name, usage); - else /* "--foo 666" style */ - { - *ret_optarg = argv[optind+1]; - optind+=2; - } - } - else /* sqdARG_NONE */ - { - if (optptr != NULL) - Die("Option %s does not take an argument\n%s", opt[opti].name, usage); - *ret_optarg = NULL; - optind++; - } - } - else /* else, a single letter option "-o" */ - { - /* find the option */ - if (optptr == NULL) - optptr = argv[optind]+1; - for (opti = -1, i = 0; i < nopts; i++) - if (opt[i].single == TRUE && *optptr == opt[i].name[1]) - { opti = i; break; } - if (opti == -1) - Die("No such option \"%c\".\n%s", *optptr, usage); - *ret_optname = opt[opti].name; - - /* set the argument, if there is one */ - if (opt[opti].argtype != sqdARG_NONE) - { - if (*(optptr+1) != '\0') /* attached argument */ - { - *ret_optarg = optptr+1; - optind++; - } - else if (optind+1 < argc) /* unattached argument */ - { - *ret_optarg = argv[optind+1]; - optind+=2; - } - else Die("Option %s requires an argument\n%s", opt[opti].name, usage); - - optptr = NULL; /* can't concatenate after an argument */ - } - else /* sqdARG_NONE */ - { - *ret_optarg = NULL; - if (*(optptr+1) != '\0') /* concatenation */ - optptr++; - else - { - optind++; /* move to next field */ - optptr = NULL; - } - } - - } - - /* Type check the argument, if there is one - */ - if (opt[opti].argtype != sqdARG_NONE) - { - if (opt[opti].argtype == sqdARG_INT && ! IsInt(*ret_optarg)) - Die("Option %s requires an integer argument\n%s", - opt[opti].name, usage); - else if (opt[opti].argtype == sqdARG_FLOAT && ! IsReal(*ret_optarg)) - Die("Option %s requires a numerical argument\n%s", - opt[opti].name, usage); - else if (opt[opti].argtype == sqdARG_CHAR && strlen(*ret_optarg) != 1) - Die("Option %s requires a single-character argument\n%s", - opt[opti].name, usage); - /* sqdARG_STRING is always ok, no type check necessary */ - } - - *ret_optind = optind; - return 1; -} - - - -#ifdef GETOPT_TESTDRIVER -/* cc -DGETOPT_TESTDRIVER -L ~/lib/squid.linux/ getopt.c -lsquid - */ -struct opt_s OPTIONS[] = { - { "--test1", FALSE, sqdARG_INT }, - { "--test2", FALSE, sqdARG_FLOAT }, - { "--test3", FALSE, sqdARG_STRING }, - { "--test4", FALSE, sqdARG_CHAR }, - { "-a", TRUE, sqdARG_NONE }, - { "-b", TRUE, sqdARG_INT }, -}; -#define NOPTIONS (sizeof(OPTIONS) / sizeof(struct opt_s)) - -int -main(int argc, char **argv) -{ - int optind; - char *optarg; - char *optname; - - while (Getopt(argc, argv, OPTIONS, NOPTIONS, "Usage/help here", - &optind, &optname, &optarg)) - { - printf("Option: index: %d name: %s argument: %s\n", - optind, optname, optarg); - } - while (optind < argc) - { - printf("Argument: index: %d name: %s\n", optind, argv[optind]); - optind++; - } - - -} - - -#endif /*GETOPT_TESTDRIVER*/ diff --git a/forester/archive/RIO/others/hmmer/squid/gki.c b/forester/archive/RIO/others/hmmer/squid/gki.c deleted file mode 100644 index 3ce8390..0000000 --- a/forester/archive/RIO/others/hmmer/squid/gki.c +++ /dev/null @@ -1,390 +0,0 @@ -/***************************************************************** - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - *****************************************************************/ - -/* gki.c - * SRE, Sat May 1 14:49:08 1999 - * - * "generic key index" module: emulation of Perl hashes. - * Maps keys (ASCII char strings) to array index. Dynamically - * resizes the hash table. - * - * Limitations: - * - hash table can only grow; no provision for deleting keys - * or downsizing the hash table. - * - Maximum hash table size set at 100003. Performance - * will degrade for key sets much larger than this. - * - Assumes that integers are 32 bits (or greater). - * - * Defines a typedef'd structure: - * gki - a key index hash table. - * Provides functions: - * GKIInit() - start a hash table. - * GKIStoreKey() - store a new key, get a unique index. - * GKIKeyIndex() - retrieve an existing key's index. - * GKIFree() - free a hash table. - * GKIStatus() - Debugging: prints internal status of a hash struct - * - * - * Note that there are no dependencies on squid; the gki.c/gki.h - * pair are base ANSI C and can be reused anywhere. - ***************************************************************** - * - * API for storing/reading stuff: - * moral equivalent of Perl's $foo{$key} = whatever, $bar{$key} = whatever: - * #include "gki.h" - * - * gki *hash; - * int idx; - * char *key; - * - * hash = GKIInit(); - * (Storing:) - * (foreach key) { - * idx = GKIStoreKey(hash, key); - * (reallocate foo, bar as needed) - * foo[idx] = whatever; - * bar[idx] = whatever; - * } - * (Reading:) - * (foreach key) { - * idx = GKIKeyIndex(hash, key); - * if (idx == -1) {no_such_key; } - * (do something with) foo[idx]; - * (do something with) bar[idx]; - * } - * GKIFree(); - * - ***************************************************************** - * - * Timings on wrasse for 45402 keys in /usr/dict/words using - * Tests/test_gki: - * 250 msec store (6 usec/store) - * 140 msec retrieve (3 usec/retrieve) - * and using the 13408 names of Pfam's GP120.full alignment: - * 70 msec store (5 usec/store) - * 50 msec retrieve (4 usec/retrieve) - * - * RCS $Id: gki.c,v 1.1.1.1 2005/03/22 08:34:18 cmzmasek Exp $ - */ - - - -#include -#include -#include -#include -#include "squid.h" -#include "gki.h" - -/* - * Best hash table sizes are prime numbers (see Knuth vol 3, Sorting - * and Searching). - * gki_primes[] defines the ascending order of hash table sizes - * that we use in upsizing the hash table dynamically. - * useful site for testing primes: - * http://www.idbsu.edu/people/jbrennan/algebra/numbers/sieve.html - * Because of the way gki_hashvalue works, the largest number - * must be < INT_MAX / 128 / 128 : 131072 on a 32 bit machine. - */ -static int gki_primes[] = { 101, 1009, 10007, 100003 }; -#define GKI_NPRIMES 4 -#define GKI_ALPHABETSIZE 128 - -static GKI *gki_alloc(int primelevel); -static int gki_hashvalue(GKI *hash, char *key); -static int gki_upsize(GKI *old); - - -/* Function: GKIInit() - * Date: SRE, Sat May 1 11:12:24 1999 [May Day geek-out] - * - * Purpose: Initialize a hash table for key indexing. - * Simply a wrapper around a level 0 gki_alloc(). - * - * Args: (void) - * - * Returns: An allocated hash table structure. - * Caller frees with GKIFree(). - */ -GKI * -GKIInit(void) -{ - GKI *hash; - hash = gki_alloc(0); - return hash; -} - -/* Function: GKIFree() - * Date: SRE, Sat May 1 11:13:26 1999 [May Day geek-out] - * - * Purpose: Free a key index hash table. - * - * Args: hash - the gki structure - * - * Returns: (void). - * hash table is destroyed. - */ -void -GKIFree(GKI *hash) -{ - struct gki_elem *ptr; - int i; - - if (hash == NULL) return; /* tolerate a NULL */ - - for (i = 0; i < hash->nhash; i++) - while (hash->table[i] != NULL) - { - ptr = hash->table[i]->nxt; - /* NULL keys can occur after we've gki_upsize'd */ - if (hash->table[i]->key != NULL) free(hash->table[i]->key); - free(hash->table[i]); - hash->table[i] = ptr; - } - free(hash->table); - free(hash); -} - -/* Function: GKIStoreKey() - * Date: SRE, Sat May 1 11:16:48 1999 [May Day geek-out] - * - * Purpose: Store a key in the key index hash table. - * Associate it with a unique "key index", counting - * from 0. (It's this index that lets us map - * the hashed keys to indexed C arrays, (clumsily) - * emulating Perl's hashes.) - * - * Does *not* check to see if the key's already - * in the table, so it's possible to store multiple - * copies of a key with different indices; probably - * not what you want, so if you're not sure the - * key is unique, check the table first with - * GKIKeyIndex(). - * - * Args: hash - GKI structure to store the key in - * key - string to store - * - * Returns: the new key's index. Since it's always the - * last one in the current array, this index is - * just hash->nkeys-1. - * On a malloc failure, returns -1. - * hash table is modified. - */ -int -GKIStoreKey(GKI *hash, char *key) -{ - int val; - struct gki_elem *ptr; - - val = gki_hashvalue(hash, key); - - ptr = hash->table[val]; - hash->table[val] = MallocOrDie(sizeof(struct gki_elem)); - hash->table[val]->key = MallocOrDie(sizeof(char) * (strlen(key)+1)); - strcpy(hash->table[val]->key, key); - - hash->table[val]->idx = hash->nkeys; - hash->table[val]->nxt = ptr; - - hash->nkeys++; - /* time to upsize? */ - if (hash->nkeys > 3*hash->nhash && hash->primelevel < GKI_NPRIMES-1) - gki_upsize(hash); - - return hash->nkeys-1; -} - -/* Function: GKIKeyIndex() - * Date: SRE, Sat May 1 11:20:42 1999 [May Day geek-out] - * - * Purpose: Look up a key in the hash table. Return - * its index (0..nkeys-1), else -1 if the key - * isn't in the hash (yet). - * - * Args: hash - the GKI hash table to search in - * key - the key to look up - * - * Returns: -1 if key is not found; - * index of key if it is found (range 0..nkeys-1). - * hash table is unchanged. - */ -int -GKIKeyIndex(GKI *hash, char *key) -{ - struct gki_elem *ptr; - int val; - - val = gki_hashvalue(hash, key); - for (ptr = hash->table[val]; ptr != NULL; ptr = ptr->nxt) - if (strcmp(key, ptr->key) == 0) return ptr->idx; - return -1; -} - -/* Function: GKIStatus() - * Date: SRE, Sat May 1 11:11:13 1999 [St. Louis] - * - * Purpose: (DEBUGGING) How are we doing? Calculate some - * simple statistics for the hash table. - * - * Args: hash - the GKI hash table to look at - * - * Returns: (void) - * Prints diagnostics on stdout. - * hash table is unchanged. - */ -void -GKIStatus(GKI *hash) -{ - struct gki_elem *ptr; - int i; - int nkeys; - int nempty = 0; - int maxkeys = -1; - int minkeys = INT_MAX; - - for (i = 0; i < hash->nhash; i++) - { - nkeys = 0; - for (ptr = hash->table[i]; ptr != NULL; ptr = ptr->nxt) - nkeys++; - - if (nkeys == 0) nempty++; - if (nkeys > maxkeys) maxkeys = nkeys; - if (nkeys < minkeys) minkeys = nkeys; - } - - printf("Total keys: %d\n", hash->nkeys); - printf("Hash table size: %d\n", hash->nhash); - printf("Average occupancy: %.1f\n", (float) hash->nkeys / (float) hash->nhash); - printf("Unoccupied slots: %d\n", nempty); - printf("Most in one slot: %d\n", maxkeys); - printf("Least in one slot: %d\n", minkeys); - -} - - -/* Function: gki_alloc() - * Date: SRE, Sat May 1 11:55:47 1999 [May Day geek-out] - * - * Purpose: Allocate a hash table structure with the - * size given by primelevel. - * - * Args: primelevel - level 0..GKI_NPRIMES-1, specifying - * the size of the table; see gki_primes[] - * array. - * - * Returns: An allocated hash table structure. - * Caller frees with GKIFree(). - */ -static GKI * -gki_alloc(int primelevel) -{ - GKI *hash; - int i; - - if (primelevel < 0 || primelevel >= GKI_NPRIMES) - Die("bad primelevel in gki_alloc()"); - hash = MallocOrDie(sizeof(GKI)); - - hash->primelevel = primelevel; - hash->nhash = gki_primes[hash->primelevel]; - hash->table = MallocOrDie(sizeof(struct gki_elem) * hash->nhash); - for (i = 0; i < hash->nhash; i++) - hash->table[i] = NULL; - hash->nkeys = 0; - return hash; -} - - -/* Function: gki_hashvalue() - * Date: SRE, Sat May 1 11:14:10 1999 [May Day geek-out] - * - * Purpose: Calculate the hash value for a key. Usually - * we expect a one-word key, but the function will - * hash any ASCII string effectively. The hash function - * is a simple one (see p. 233 of Sedgewick, - * Algorithms in C). - * Slightly optimized: does two characters at a time - * before doing the modulo; this gives us a significant - * speedup. - * - * Args: hash - the gki structure (we need to know the hash table size) - * key - a string to calculate the hash value for - * - * Returns: a hash value, in the range 0..hash->nhash-1. - * hash table is unmodified. - */ -static int -gki_hashvalue(GKI *hash, char *key) -{ - int val = 0; - - for (; *key != '\0'; key++) - { - val = GKI_ALPHABETSIZE*val + *key; - if (*(++key) == '\0') { val = val % hash->nhash; break; } - val = (GKI_ALPHABETSIZE*val + *key) % hash->nhash; - } - return val; -} - -/* Function: gki_upsize() - * Date: SRE, Sat May 1 11:46:07 1999 [May Day geek-out] - * - * Purpose: Grow the hash table to the next available size. - * - * Args: old - the GKI hash table to reallocate. - * - * Returns: 1 on success (the hash table is changed); - * 0 on failure; the table is already at its maximum size, - * and the hash table is returned unchanged. - */ -static int -gki_upsize(GKI *old) -{ - GKI *new; - int i; - struct gki_elem *optr; - struct gki_elem *nptr; - int val; - - if (old->primelevel >= GKI_NPRIMES-1) return 0; - new = gki_alloc(old->primelevel+1); - - /* Read the old, store in the new, while *not changing* - * any key indices. Because of the way the lists are - * treated as LIFO stacks, all the lists are reversed - * in the new structure. - */ - for (i = 0; i < old->nhash; i++) - { - optr = old->table[i]; - while (optr != NULL) - { - val = gki_hashvalue(new, optr->key); - - nptr = new->table[val]; - new->table[val] = optr; - optr = optr->nxt; - new->table[val]->nxt = nptr; - } - } - free(old->table); - - /* Now swap within the interior of the structures, so the old - * structure is updated to the new structure. - * (nkeys is identical, so we don't need to swap that element.) - */ - old->primelevel = new->primelevel; - old->nhash = new->nhash; - old->table = new->table; - free(new); - return 1; -} diff --git a/forester/archive/RIO/others/hmmer/squid/gki.h b/forester/archive/RIO/others/hmmer/squid/gki.h deleted file mode 100644 index 1346045..0000000 --- a/forester/archive/RIO/others/hmmer/squid/gki.h +++ /dev/null @@ -1,51 +0,0 @@ -/***************************************************************** - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - *****************************************************************/ - -#ifndef SQUID_GKI_INCLUDED -#define SQUID_GKI_INCLUDED - -/* gki.h - * SRE, Sat May 1 15:07:22 1999 - * - * Declarations of structures, functions for generic key index - * module: emulation of Perl hashes. See gki.c. - * - * RCS $Id: gki.h,v 1.1.1.1 2005/03/22 08:34:18 cmzmasek Exp $ - */ - -/* gki_elem: - * key, array index pairs are kept in linked list structures. - */ -struct gki_elem { - char *key; - int idx; - struct gki_elem *nxt; -}; - -/* gki: - * a dynamically resized hash structure; - * contains a hash table and associated data - */ -typedef struct { - struct gki_elem **table; - - int primelevel; - int nhash; - int nkeys; -} GKI; - -GKI *GKIInit(void); -void GKIFree(GKI *hash); -int GKIHashValue(GKI *hash, char *key); -int GKIStoreKey(GKI *hash, char *key); -int GKIKeyIndex(GKI *hash, char *key); -void GKIStatus(GKI *hash); - -#endif /* SQUID_GKI_INCLUDED */ diff --git a/forester/archive/RIO/others/hmmer/squid/gsi.c b/forester/archive/RIO/others/hmmer/squid/gsi.c deleted file mode 100644 index f5cbee1..0000000 --- a/forester/archive/RIO/others/hmmer/squid/gsi.c +++ /dev/null @@ -1,385 +0,0 @@ -/***************************************************************** - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - *****************************************************************/ - -/* gsi.c - * Interfaces for GSI "generic sequence index" files. - * broken away from sqio.c and extended: SRE, Wed Aug 5 10:32:53 1998 - * - * - * GSI definition: - * 1 + + total records. - * Each record = 38 bytes. - * - * one header record : <"GSI" (32)> - * file records : - * key records : - * - * Matches up with my Perl scripts that create GSI files. - * - * RCS $Id: gsi.c,v 1.1.1.1 2005/03/22 08:34:18 cmzmasek Exp $ - */ - -#include -#include -#include -#ifndef SEEK_SET -#include /* needed for poor crippled SunOS */ -#endif - -#include "squid.h" -#include "gsi.h" - - -/***************************************************************** - * GSI index file access routines - *****************************************************************/ - -/* Function: GSIOpen() - * - * Purpose: Open a GSI file. Returns the number of records in - * the file and a file pointer. Returns NULL on failure. - * The file pointer should be fclose()'d normally. - */ -GSIFILE * -GSIOpen(char *gsifile) -{ - GSIFILE *gsi; - char magic[GSI_KEYSIZE]; - - gsi = (GSIFILE *) MallocOrDie (sizeof(GSIFILE)); - if ((gsi->gsifp = fopen(gsifile, "r")) == NULL) - { free(gsi); squid_errno = SQERR_NOFILE; return NULL; } - - if (! fread(magic, sizeof(char), GSI_KEYSIZE, gsi->gsifp)) - { free(gsi); squid_errno = SQERR_NODATA; return NULL; } - if (strcmp(magic, "GSI") != 0) - { free(gsi); squid_errno = SQERR_FORMAT; return NULL; } - - if (! fread(&(gsi->nfiles), sizeof(sqd_uint16), 1, gsi->gsifp)) - { free(gsi); squid_errno = SQERR_NODATA; return NULL; } - if (! fread(&(gsi->recnum), sizeof(sqd_uint32), 1, gsi->gsifp)) - { free(gsi); squid_errno = SQERR_NODATA; return NULL; } - - gsi->nfiles = sre_ntoh16(gsi->nfiles); /* convert from network short */ - gsi->recnum = sre_ntoh32(gsi->recnum); /* convert from network long */ - - return gsi; -} - -/* Function: GSIGetRecord() - * - * Purpose: Each non-header record of a GSI index files consists - * of 38 bytes: 32 bytes of character string, a 2 byte - * short, and a 4 byte long. This function returns the - * three values. - * - * Args: gsi - open GSI index file, correctly positioned at a record - * f1 - char[32], allocated by caller (or NULL if unwanted) - * f2 - pointer to short (or NULL if unwanted) - * f3 - pointer to long (or NULL if unwanted) - * - * Return: 0 on failure and sets squid_errno. - */ -int -GSIGetRecord(GSIFILE *gsi, char *f1, sqd_uint16 *f2, sqd_uint32 *f3) -{ - if (f1 == NULL) fseek(gsi->gsifp, GSI_KEYSIZE, SEEK_CUR); - else if (! fread(f1, GSI_KEYSIZE, 1, gsi->gsifp)) - { squid_errno = SQERR_NODATA; return 0; } - - if (f2 == NULL) fseek(gsi->gsifp, sizeof(sqd_uint16), SEEK_CUR); - else if (! fread(f2, sizeof(sqd_uint16), 1, gsi->gsifp)) - { squid_errno = SQERR_NODATA; return 0; } - - if (f3 == NULL) fseek(gsi->gsifp, sizeof(sqd_uint32), SEEK_CUR); - else if (! fread(f3, sizeof(sqd_uint32), 1, gsi->gsifp)) - { squid_errno = SQERR_NODATA; return 0; } - - if (f2 != NULL) *f2 = sre_ntoh16(*f2); - if (f3 != NULL) *f3 = sre_ntoh32(*f3); - - return 1; -} - - -/* Function: GSIGetOffset() - * - * Purpose: From a key (sequence name), find a disk offset - * in an open general sequence index file by binary - * search. Presumably GSI indexing could be even faster - * if we used hashing. - * - * Args: gsi - GSI index file, opened by GSIOpen() - * key - name of key to retrieve indices for - * ret_seqfile - pre-alloced char[32] array for seqfile name - * ret_fmt - format of seqfile - * ret_offset - return: disk offset in seqfile. - */ -int -GSIGetOffset(GSIFILE *gsi, char *key, char *ret_seqfile, - int *ret_format, long *ret_offset) -{ - sqd_uint32 left, right, mid; - int cmp; - char name[GSI_KEYSIZE + 1]; - sqd_uint32 offset; - sqd_uint16 filenum; - sqd_uint32 fmt; - - name[GSI_KEYSIZE] = '\0'; - - left = gsi->nfiles + 1; - right = gsi->nfiles + gsi->recnum; - mid = (left + right) / 2; - fseek(gsi->gsifp, mid * GSI_RECSIZE, SEEK_SET); - - while (GSIGetRecord(gsi, name, &filenum, &offset)) - { - cmp = strcmp(name, key); - if (cmp == 0) break; /* found it! */ - else if (left >= right) return 0; /* oops, missed it; fail. */ - else if (cmp < 0) left = mid + 1; /* it's right of mid */ - else if (cmp > 0) right = mid - 1; /* it's left of mid */ - mid = (left + right) / 2; - fseek(gsi->gsifp, mid * GSI_RECSIZE, SEEK_SET); - } - - /* Using file number, look up the sequence file and format. - */ - fseek(gsi->gsifp, filenum * GSI_RECSIZE, SEEK_SET); - GSIGetRecord(gsi, ret_seqfile, NULL, &fmt); - *ret_format = (int) fmt; - *ret_offset = (long) offset; - - return 1; -} - -/* Function: GSIClose() - * - * Purpose: Close an open GSI sequence index file. - */ -void -GSIClose(GSIFILE *gsi) -{ - fclose(gsi->gsifp); - free(gsi); -} - - -/***************************************************************** - * GSI index construction routines - * SRE, Wed Nov 10 11:49:14 1999 [St. Louis] - * - * API: - * g = GSIAllocIndex(); - * - * [foreach filename, <32 char, no directory path] - * GSIAddFileToIndex(g, filename); - * filenum++; - * [foreach key, <32 char, w/ filenum 1..nfiles, w/ 32bit offset] - * GSIAddKeyToIndex(g, key, filenum, offset); - * - * GSISortIndex(g); - * GSIWriteIndex(fp, g); - * GSIFreeIndex(g); - *****************************************************************/ -struct gsiindex_s * -GSIAllocIndex(void) -{ - struct gsiindex_s *g; - - g = MallocOrDie(sizeof(struct gsiindex_s)); - g->filenames = MallocOrDie(sizeof(char *) * 10); - g->fmt = MallocOrDie(sizeof(int) * 10); - g->elems = MallocOrDie(sizeof(struct gsikey_s) * 100); - g->nfiles = 0; - g->nkeys = 0; - return g; -} -void -GSIFreeIndex(struct gsiindex_s *g) -{ - int i; - for (i = 0; i < g->nfiles; i++) free(g->filenames[i]); - free(g->filenames); - free(g->fmt); - free(g->elems); - free(g); -} -void -GSIAddFileToIndex(struct gsiindex_s *g, char *filename, int fmt) -{ - int len; - - len = strlen(filename); - if (len >= GSI_KEYSIZE) Die("File name too long to be indexed."); - g->filenames[g->nfiles] = sre_strdup(filename, len); - g->fmt[g->nfiles] = fmt; - g->nfiles++; - if (g->nfiles % 10 == 0) { - g->filenames = ReallocOrDie(g->filenames, sizeof(char *) * (g->nfiles + 10)); - g->fmt = ReallocOrDie(g->fmt, sizeof(int) * (g->nfiles + 10)); - } -} -void -GSIAddKeyToIndex(struct gsiindex_s *g, char *key, int filenum, long offset) -{ - if (strlen(key) >= GSI_KEYSIZE) Die("key too long in GSI index"); - if (filenum > SQD_UINT16_MAX) Die("too many files in GSI index"); - if (offset > SQD_UINT32_MAX) Die("offset too big in GSI index"); - - strncpy(g->elems[g->nkeys].key, key, GSI_KEYSIZE-1); - g->elems[g->nkeys].key[GSI_KEYSIZE-1] = '\0'; - g->elems[g->nkeys].filenum = (sqd_uint16) filenum; - g->elems[g->nkeys].offset = (sqd_uint32) offset; - g->nkeys++; - - if (g->nkeys % 100 == 0) - g->elems = ReallocOrDie(g->elems, sizeof(struct gsikey_s) * (g->nkeys + 100)); -} -static int -gsi_keysorter(const void *k1, const void *k2) -{ - struct gsikey_s *key1; - struct gsikey_s *key2; - key1 = (struct gsikey_s *) k1; - key2 = (struct gsikey_s *) k2; - return strcmp(key1->key, key2->key); -} -void -GSISortIndex(struct gsiindex_s *g) -{ - qsort((void *) g->elems, g->nkeys, sizeof(struct gsikey_s), gsi_keysorter); -} -void -GSIWriteIndex(FILE *fp, struct gsiindex_s *g) -{ - sqd_uint32 i; - - /* Range checking. - */ - if (g->nfiles > SQD_UINT16_MAX) Die("Too many files in GSI index."); - if (g->nkeys > SQD_UINT32_MAX) Die("Too many keys in GSI index."); - - GSIWriteHeader(fp, g->nfiles, g->nkeys); - for (i = 0; i < g->nfiles; i++) - GSIWriteFileRecord(fp, g->filenames[i], i+1, g->fmt[i]); - for (i = 0; i < g->nkeys; i++) - GSIWriteKeyRecord(fp, g->elems[i].key, g->elems[i].filenum, g->elems[i].offset); -} - - - - - -/* Function: GSIWriteHeader() - * Date: SRE, Wed Aug 5 10:36:02 1998 [St. Louis] - * - * Purpose: Write the first record to an open GSI file: - * "GSI" - * - * Args: fp - open file to write to. - * nfiles - number of files indexed - * nkeys - number of keys indexed - * - * Returns: void - */ -void -GSIWriteHeader(FILE *fp, int nfiles, long nkeys) -{ - char key[GSI_KEYSIZE]; - sqd_uint16 f1; - sqd_uint32 f2; - - /* beware potential range errors! - */ - if (nfiles > SQD_UINT16_MAX) Die("GSI: nfiles out of range"); - if (nkeys > SQD_UINT32_MAX) Die("GSI: nkeys out of range"); - - f1 = (sqd_uint16) nfiles; - f2 = (sqd_uint32) nkeys; - f1 = sre_hton16(f1); - f2 = sre_hton32(f2); - strcpy(key, "GSI"); - - if (fwrite(key, 1, GSI_KEYSIZE, fp) < GSI_KEYSIZE) PANIC; - if (fwrite(&f1, 2, 1, fp) < 1) PANIC; - if (fwrite(&f2, 4, 1, fp) < 1) PANIC; -} - - -/* Function: GSIWriteFileRecord() - * Date: SRE, Wed Aug 5 10:45:51 1998 [St. Louis] - * - * Purpose: Write a file record to an open GSI file. - * - * Args: fp - open GSI file - * fname - file name (max 31 characters) - * idx - file number - * fmt - file format (e.g. kPearson, etc.) - * - * Returns: 0 on failure. 1 on success. - */ -int -GSIWriteFileRecord(FILE *fp, char *fname, int idx, int fmt) -{ - sqd_uint16 f1; - sqd_uint32 f2; - - if (strlen(fname) >= GSI_KEYSIZE) return 0; - if (idx > SQD_UINT16_MAX) Die("GSI: file index out of range"); - if (fmt > SQD_UINT32_MAX) Die("GSI: format index out of range"); - - f1 = (sqd_uint16) idx; - f2 = (sqd_uint32) fmt; - f1 = sre_hton16(f1); - f2 = sre_hton32(f2); - - if (fwrite(fname, 1, GSI_KEYSIZE, fp) < GSI_KEYSIZE) PANIC; - if (fwrite(&f1, 2, 1, fp) < 1) PANIC; - if (fwrite(&f2, 4, 1, fp) < 1) PANIC; - return 1; -} - - -/* Function: GSIWriteKeyRecord() - * Date: SRE, Wed Aug 5 10:52:30 1998 [St. Louis] - * - * Purpose: Write a key record to a GSI file. - * - * Args: fp - open GSI file for writing - * key - key (max 31 char + \0) - * fileidx - which file number to find this key in - * offset - offset for this key - * - * Returns: 1 on success, else 0. - * will fail if key >= 32 chars, for instance. - */ -int -GSIWriteKeyRecord(FILE *fp, char *key, int fileidx, long offset) -{ - sqd_uint16 f1; - sqd_uint32 f2; - - if (strlen(key) >= GSI_KEYSIZE) return 0; - if (fileidx > SQD_UINT16_MAX) Die("GSI: file index out of range"); - if (offset > SQD_UINT32_MAX) Die("GSI: offset out of range"); - - f1 = (sqd_uint16) fileidx; - f2 = (sqd_uint32) offset; - f1 = sre_hton16(f1); - f2 = sre_hton32(f2); - - if (fwrite(key, 1, GSI_KEYSIZE, fp) < GSI_KEYSIZE) PANIC; - if (fwrite(&f1, 2, 1, fp) < 1) PANIC; - if (fwrite(&f2, 4, 1, fp) < 1) PANIC; - return 1; -} - diff --git a/forester/archive/RIO/others/hmmer/squid/gsi.h b/forester/archive/RIO/others/hmmer/squid/gsi.h deleted file mode 100644 index 1c385ff..0000000 --- a/forester/archive/RIO/others/hmmer/squid/gsi.h +++ /dev/null @@ -1,85 +0,0 @@ -/***************************************************************** - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - *****************************************************************/ - -#ifndef GSIH_INCLUDED -#define GSIH_INCLUDED - -/* gsi.h - * Database indexing (GSI format support) - * RCS $Id: gsi.h,v 1.1.1.1 2005/03/22 08:34:18 cmzmasek Exp $ - * - * A GSI (generic sequence index) file is composed of - * recnum + nfiles + 1 records. Each record contains - * three fields; key, file number, and disk offset. - * Record 0 contains: - * [ "GSI" ] [ nfiles ] [ recnum ] - * Records 1..nfiles map file names to file numbers, and contain: - * [ filename ] [ file number, 1..nfiles ] [ 0 (unused) ] - * Records nfiles+1 to recnum+nfiles+1 provide disk offset - * and file number indices for every key: - * [ key ] [ file number ] [ offset] - * - * Because the file is binary, we take some (but not - * complete) care to improve portability amongst platforms. - * This means using network order integers (see ntohl()) - * and defining types for 16 and 32 bit integers. - * - * Because we use 32-bit offsets, ftell(), and fseek(), - * there is an implicit 2 Gb file size maximum. - * AFAIK neither ANSI C nor POSIX provide a portable solution - * to this problem. fsetpos(), fgetpos() use an - * opaque fpos_t datatype that we can't write portably - * to a disk file. Suggestions welcomed. - */ -#define GSI_KEYSIZE 32 /* keys are 32 bytes long */ -#define GSI_RECSIZE 38 /* 32 + 2 + 4 bytes */ -#define SQD_UINT16_MAX 65535 /* 2^16-1 */ -#define SQD_UINT32_MAX 4294967295U/* 2^32-1 */ - -struct gsi_s { - FILE *gsifp; /* open GSI index file */ - sqd_uint16 nfiles; /* number of files = 16 bit int */ - sqd_uint32 recnum; /* number of records = 32 bit int */ -}; -typedef struct gsi_s GSIFILE; - -struct gsikey_s { - char key[GSI_KEYSIZE]; - sqd_uint16 filenum; - sqd_uint32 offset; -}; -struct gsiindex_s { - char **filenames; - int *fmt; - sqd_uint16 nfiles; - - struct gsikey_s *elems; - int nkeys; -}; - - -/* from gsi.c - */ -extern GSIFILE *GSIOpen(char *gsifile); -extern int GSIGetRecord(GSIFILE *gsi, char *f1, sqd_uint16 *f2, sqd_uint32 *f3); -extern int GSIGetOffset(GSIFILE *gsi, char *key, char *sqfile, - int *fmt, long *ret_offset); -extern void GSIClose(GSIFILE *gsi); -extern struct gsiindex_s *GSIAllocIndex(void); -extern void GSIFreeIndex(struct gsiindex_s *g); -extern void GSIAddFileToIndex(struct gsiindex_s *g, char *filename, int fmt); -extern void GSIAddKeyToIndex(struct gsiindex_s *g, char *key, int filenum, long offset); -extern void GSISortIndex(struct gsiindex_s *g); -extern void GSIWriteIndex(FILE *fp, struct gsiindex_s *g); -extern void GSIWriteHeader(FILE *fp, int nfiles, long nkeys); -extern int GSIWriteFileRecord(FILE *fp, char *fname, int idx, int fmt); -extern int GSIWriteKeyRecord(FILE *fp, char *key, int fileidx, long offset); - -#endif /*GSIH_INCLUDED*/ diff --git a/forester/archive/RIO/others/hmmer/squid/gsi64.c b/forester/archive/RIO/others/hmmer/squid/gsi64.c deleted file mode 100644 index 0aeb82c..0000000 --- a/forester/archive/RIO/others/hmmer/squid/gsi64.c +++ /dev/null @@ -1,395 +0,0 @@ -/***************************************************************** - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - *****************************************************************/ -#ifdef USE_GSI64 - -/* gsi64.c - * Updated interfaces for GSI64 64-bit "generic sequence index" files. - * See gsi.c for old interfaces. - * This is a temporary hack! Needed for human genome project. - */ - -/* 1 + + total records. - * Each record = 42 bytes. - * - * one header record : <"GSI64" (32)> - * file records : - * key records : - * - * CVS $Id: gsi64.c,v 1.1.1.1 2005/03/22 08:34:29 cmzmasek Exp $ - */ - -#include -#include -#include -#ifndef SEEK_SET -#include /* needed for poor crippled SunOS */ -#endif - -#include "squid.h" -#include "gsi64.h" - -/***************************************************************** - * GSI64 index file access routines - *****************************************************************/ - -/* Function: GSI64Open() - * - * Purpose: Open a GSI64 file. Returns the number of records in - * the file and a file pointer. Returns NULL on failure. - * The file pointer should be fclose()'d normally. - */ -GSI64FILE * -GSI64Open(char *gsifile) -{ - GSI64FILE *gsi; - char magic[GSI64_KEYSIZE]; - - gsi = (GSI64FILE *) MallocOrDie (sizeof(GSI64FILE)); - if ((gsi->gsifp = fopen(gsifile, "r")) == NULL) - { free(gsi); squid_errno = SQERR_NOFILE; return NULL; } - - if (! fread(magic, sizeof(char), GSI64_KEYSIZE, gsi->gsifp)) - { free(gsi); squid_errno = SQERR_NODATA; return NULL; } - if (strcmp(magic, "GSI64") != 0) - { free(gsi); squid_errno = SQERR_FORMAT; return NULL; } - - if (! fread(&(gsi->nfiles), sizeof(sqd_uint16), 1, gsi->gsifp)) - { free(gsi); squid_errno = SQERR_NODATA; return NULL; } - if (! fread(&(gsi->recnum), sizeof(sqd_uint64), 1, gsi->gsifp)) - { free(gsi); squid_errno = SQERR_NODATA; return NULL; } - -#if 0 /* HACK! we don't byteswap */ - gsi->nfiles = sre_ntohs(gsi->nfiles); /* convert from network short */ - gsi->recnum = sre_ntohl(gsi->recnum); /* convert from network long */ -#endif - - return gsi; -} - -/* Function: GSI64GetRecord() - * - * Purpose: Each non-header record of a GSI64 index file consists - * of 42 bytes: 32 bytes of character string, a 2 byte - * short, and an 8 byte long long. This function returns the - * three values. - * - * Args: gsi - open GSI64 index file, correctly positioned at a record - * f1 - char[32], allocated by caller (or NULL if unwanted) - * f2 - pointer to short (or NULL if unwanted) - * f3 - pointer to long long (or NULL if unwanted) - * - * Return: 0 on failure and sets squid_errno. - */ -int -GSI64GetRecord(GSI64FILE *gsi, char *f1, sqd_uint16 *f2, sqd_uint64 *f3) -{ - if (f1 == NULL) fseek64(gsi->gsifp, GSI64_KEYSIZE, SEEK_CUR); - else if (! fread(f1, GSI64_KEYSIZE, 1, gsi->gsifp)) - { squid_errno = SQERR_NODATA; return 0; } - - if (f2 == NULL) fseek64(gsi->gsifp, sizeof(sqd_uint16), SEEK_CUR); - else if (! fread(f2, sizeof(sqd_uint16), 1, gsi->gsifp)) - { squid_errno = SQERR_NODATA; return 0; } - - if (f3 == NULL) fseek64(gsi->gsifp, sizeof(sqd_uint64), SEEK_CUR); - else if (! fread(f3, sizeof(sqd_uint64), 1, gsi->gsifp)) - { squid_errno = SQERR_NODATA; return 0; } - -#if 0 /* no byteswap yet! HACK! */ - if (f2 != NULL) *f2 = sre_ntohs(*f2); - if (f3 != NULL) *f3 = sre_ntohl(*f3); -#endif - - return 1; -} - - -/* Function: GSI64GetOffset() - * - * Purpose: From a key (sequence name), find a disk offset - * in an open general sequence index file by binary - * search. Presumably GSI64 indexing could be even faster - * if we used hashing. - * - * Args: gsi - GSI64 index file, opened by GSI64Open() - * key - name of key to retrieve indices for - * ret_seqfile - pre-alloced char[32] array for seqfile name - * ret_fmt - format of seqfile - * ret_offset - return: disk offset in seqfile. - */ -int -GSI64GetOffset(GSI64FILE *gsi, char *key, char *ret_seqfile, - int *ret_format, long long *ret_offset) -{ - sqd_uint64 left, right, mid; - int cmp; - char name[GSI64_KEYSIZE + 1]; - sqd_uint64 offset; - sqd_uint16 filenum; - sqd_uint64 fmt; - - name[GSI64_KEYSIZE] = '\0'; - - left = gsi->nfiles + 1; - right = gsi->nfiles + gsi->recnum; - mid = (left + right) / 2; - fseek64(gsi->gsifp, mid * GSI64_RECSIZE, SEEK_SET); - - while (GSI64GetRecord(gsi, name, &filenum, &offset)) - { - cmp = strcmp(name, key); - if (cmp == 0) break; /* found it! */ - else if (left >= right) return 0; /* oops, missed it; fail. */ - else if (cmp < 0) left = mid + 1; /* it's right of mid */ - else if (cmp > 0) right = mid - 1; /* it's left of mid */ - mid = (left + right) / 2; - fseek64(gsi->gsifp, mid * GSI64_RECSIZE, SEEK_SET); - } - - /* Using file number, look up the sequence file and format. - */ - fseek64(gsi->gsifp, filenum * GSI64_RECSIZE, SEEK_SET); - GSI64GetRecord(gsi, ret_seqfile, NULL, &fmt); - *ret_format = (int) fmt; - *ret_offset = (long long) offset; - - return 1; -} - -/* Function: GSI64Close() - * - * Purpose: Close an open GSI64 sequence index file. - */ -void -GSI64Close(GSI64FILE *gsi) -{ - fclose(gsi->gsifp); - free(gsi); -} - - -/***************************************************************** - * GSI64 index construction routines - * SRE, Wed Nov 10 11:49:14 1999 [St. Louis] - * - * API: - * g = GSI64AllocIndex(); - * - * [foreach filename, <32 char, no directory path] - * GSI64AddFileToIndex(g, filename); - * filenum++; - * [foreach key, <32 char, w/ filenum 1..nfiles, w/ 64bit offset] - * GSI64AddKeyToIndex(g, key, filenum, offset); - * - * GSI64SortIndex(g); - * GSI64WriteIndex(fp, g); - * GSI64FreeIndex(g); - *****************************************************************/ -struct gsi64index_s * -GSI64AllocIndex(void) -{ - struct gsi64index_s *g; - - g = MallocOrDie(sizeof(struct gsi64index_s)); - g->filenames = MallocOrDie(sizeof(char *) * 10); - g->fmt = MallocOrDie(sizeof(int) * 10); - g->elems = MallocOrDie(sizeof(struct gsi64key_s) * 100); - g->nfiles = 0; - g->nkeys = 0; - return g; -} -void -GSI64FreeIndex(struct gsi64index_s *g) -{ - int i; - for (i = 0; i < g->nfiles; i++) free(g->filenames[i]); - free(g->filenames); - free(g->fmt); - free(g->elems); - free(g); -} -void -GSI64AddFileToIndex(struct gsi64index_s *g, char *filename, int fmt) -{ - int len; - - len = strlen(filename); - if (len >= GSI64_KEYSIZE) Die("File name too long to be indexed."); - g->filenames[g->nfiles] = sre_strdup(filename, len); - g->fmt[g->nfiles] = fmt; - g->nfiles++; - if (g->nfiles % 10 == 0) { - g->filenames = ReallocOrDie(g->filenames, sizeof(char *) * (g->nfiles + 10)); - g->fmt = ReallocOrDie(g->fmt, sizeof(int) * (g->nfiles + 10)); - } -} -void -GSI64AddKeyToIndex(struct gsi64index_s *g, char *key, int filenum, long long offset) -{ - if (strlen(key) >= GSI64_KEYSIZE) Die("key too long in GSI64 index"); - if (filenum > SQD_UINT16_MAX) Die("too many files in GSI64 index"); - if (offset > SQD_UINT64_MAX) Die("offset too big in GSI64 index"); - - strncpy(g->elems[g->nkeys].key, key, GSI64_KEYSIZE-1); - g->elems[g->nkeys].key[GSI64_KEYSIZE-1] = '\0'; - g->elems[g->nkeys].filenum = (sqd_uint16) filenum; - g->elems[g->nkeys].offset = (sqd_uint64) offset; - g->nkeys++; - - if (g->nkeys % 100 == 0) - g->elems = ReallocOrDie(g->elems, sizeof(struct gsi64key_s) * (g->nkeys + 100)); -} -static int -gsi_keysorter(const void *k1, const void *k2) -{ - struct gsi64key_s *key1; - struct gsi64key_s *key2; - key1 = (struct gsi64key_s *) k1; - key2 = (struct gsi64key_s *) k2; - return strcmp(key1->key, key2->key); -} -void -GSI64SortIndex(struct gsi64index_s *g) -{ - qsort((void *) g->elems, g->nkeys, sizeof(struct gsi64key_s), gsi_keysorter); -} -void -GSI64WriteIndex(FILE *fp, struct gsi64index_s *g) -{ - sqd_uint16 i; - sqd_uint64 j; - - /* Range checking. - */ - if (g->nfiles > SQD_UINT16_MAX) Die("Too many files in GSI64 index."); - if (g->nkeys > SQD_UINT64_MAX) Die("Too many keys in GSI64 index."); - - GSI64WriteHeader(fp, g->nfiles, g->nkeys); - for (i = 0; i < g->nfiles; i++) - GSI64WriteFileRecord(fp, g->filenames[i], i+1, g->fmt[i]); - for (j = 0; j < g->nkeys; j++) - GSI64WriteKeyRecord(fp, g->elems[j].key, g->elems[j].filenum, g->elems[j].offset); -} - - - - - -/* Function: GSI64WriteHeader() - * Date: SRE, Wed Aug 5 10:36:02 1998 [St. Louis] - * - * Purpose: Write the first record to an open GSI64 file: - * "GSI64" - * - * Args: fp - open file to write to. - * nfiles - number of files indexed - * nkeys - number of keys indexed - * - * Returns: void - */ -void -GSI64WriteHeader(FILE *fp, int nfiles, long long nkeys) -{ - char key[GSI64_KEYSIZE]; - sqd_uint16 f1; - sqd_uint64 f2; - - /* beware potential range errors! - */ - if (nfiles > SQD_UINT16_MAX) Die("GSI64: nfiles out of range"); - if (nkeys > SQD_UINT64_MAX) Die("GSI64: nkeys out of range"); - - f1 = (sqd_uint16) nfiles; - f2 = (sqd_uint64) nkeys; -#if 0 /* HACK no byteswap */ - f1 = sre_htons(f1); - f2 = sre_htonl(f2); -#endif - strcpy(key, "GSI64"); - - if (fwrite(key, 1, GSI64_KEYSIZE, fp) < GSI64_KEYSIZE) PANIC; - if (fwrite(&f1, 2, 1, fp) < 1) PANIC; - if (fwrite(&f2, 8, 1, fp) < 1) PANIC; -} - - -/* Function: GSI64WriteFileRecord() - * Date: SRE, Wed Aug 5 10:45:51 1998 [St. Louis] - * - * Purpose: Write a file record to an open GSI64 file. - * - * Args: fp - open GSI64 file - * fname - file name (max 31 characters) - * idx - file number - * fmt - file format (e.g. kPearson, etc.) - * - * Returns: 0 on failure. 1 on success. - */ -int -GSI64WriteFileRecord(FILE *fp, char *fname, int idx, int fmt) -{ - sqd_uint16 f1; - sqd_uint64 f2; - - if (strlen(fname) >= GSI64_KEYSIZE) return 0; - if (idx > SQD_UINT16_MAX) Die("GSI64: file index out of range"); - if (fmt > SQD_UINT64_MAX) Die("GSI64: format index out of range"); - - f1 = (sqd_uint16) idx; - f2 = (sqd_uint64) fmt; -#if 0 /* hack : no byteswap */ - f1 = sre_htons(f1); - f2 = sre_htonl(f2); -#endif - - if (fwrite(fname, 1, GSI64_KEYSIZE, fp) < GSI64_KEYSIZE) PANIC; - if (fwrite(&f1, 2, 1, fp) < 1) PANIC; - if (fwrite(&f2, 8, 1, fp) < 1) PANIC; - return 1; -} - - -/* Function: GSI64WriteKeyRecord() - * Date: SRE, Wed Aug 5 10:52:30 1998 [St. Louis] - * - * Purpose: Write a key record to a GSI64 file. - * - * Args: fp - open GSI64 file for writing - * key - key (max 31 char + \0) - * fileidx - which file number to find this key in - * offset - offset for this key - * - * Returns: 1 on success, else 0. - * will fail if key >= 32 chars, for instance. - */ -int -GSI64WriteKeyRecord(FILE *fp, char *key, int fileidx, long long offset) -{ - sqd_uint16 f1; - sqd_uint64 f2; - - if (strlen(key) >= GSI64_KEYSIZE) return 0; - if (fileidx > SQD_UINT16_MAX) Die("GSI64: file index out of range"); - if (offset > SQD_UINT64_MAX) Die("GSI64: offset out of range"); - - f1 = (sqd_uint16) fileidx; - f2 = (sqd_uint64) offset; -#if 0 /* HACK! */ - f1 = sre_htons(f1); - f2 = sre_htonl(f2); -#endif - - if (fwrite(key, 1, GSI64_KEYSIZE, fp) < GSI64_KEYSIZE) PANIC; - if (fwrite(&f1, 2, 1, fp) < 1) PANIC; - if (fwrite(&f2, 8, 1, fp) < 1) PANIC; - return 1; -} - -#endif /*USE_GSI64 */ diff --git a/forester/archive/RIO/others/hmmer/squid/gsi64.h b/forester/archive/RIO/others/hmmer/squid/gsi64.h deleted file mode 100644 index 99f7296..0000000 --- a/forester/archive/RIO/others/hmmer/squid/gsi64.h +++ /dev/null @@ -1,101 +0,0 @@ -/***************************************************************** - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - *****************************************************************/ - -#ifndef GSI64H_INCLUDED -#define GSI64H_INCLUDED -#ifdef USE_GSI64 - -/* gsi64.h - * Database indexing (GSI64 format support) - * CVS $Id: gsi64.h,v 1.1.1.1 2005/03/22 08:34:29 cmzmasek Exp $ - * - * A GSI64 (generic sequence index, 64 bit hack) file is composed of - * recnum + nfiles + 1 records. Each record contains - * three fields; key, file number, and disk offset. - * Record 0 contains: - * [ "GSI64" ] [ nfiles ] [ recnum ] - * Records 1..nfiles map file names to file numbers, and contain: - * [ filename ] [ file number, 1..nfiles ] [ 0 (unused) ] - * Records nfiles+1 to recnum+nfiles+1 provide disk offset - * and file number indices for every key: - * [ key ] [ file number ] [ offset] - * - * Because the file is binary, we take some (but not - * complete) care to improve portability amongst platforms. - * This means using network order integers (see ntohl()) - * and defining types for 16 and 64 bit integers. - * - * A short test program that verifies the sizes of these - * data types would be a good idea... - * - * Because we use 64-bit offsets, ftell64(), and fseek64(), - * we rely on the OS actually providing these. This is - * a temporary hack for human genome analysis. - */ -typedef unsigned long long sqd_uint64; /* 64 bit integer. */ - -#define GSI64_KEYSIZE 32 /* keys are 32 bytes long */ -#define GSI64_RECSIZE 42 /* 32 + 2 + 8 bytes */ -#define SQD_UINT16_MAX 65535 /* 2^16-1 */ -#define SQD_UINT64_MAX 18446744073709551615LU /* 2^64-1 */ - -struct gsi64_s { - FILE *gsifp; /* open GSI index file */ - sqd_uint16 nfiles; /* number of files = 16 bit int */ - sqd_uint64 recnum; /* number of records = 64 bit int */ -}; -typedef struct gsi64_s GSI64FILE; - -struct gsi64key_s { - char key[GSI64_KEYSIZE]; - sqd_uint16 filenum; - sqd_uint64 offset; -}; -struct gsi64index_s { - char **filenames; - int *fmt; - sqd_uint16 nfiles; - - struct gsi64key_s *elems; - sqd_uint64 nkeys; -}; - - - -/* if ntohl() and friends are not available, you - * can slip replacements in by providing sre_ntohl() - * functions. (i.e., there is a possible portability problem here.) - */ -#if 0 -#define sre_ntohl(x) ntohl(x); -#define sre_ntohs(x) ntohs(x); -#define sre_htonl(x) htonl(x); -#define sre_htons(x) htons(x); -#endif - -/* from gsi64.c - */ -extern GSI64FILE *GSI64Open(char *gsifile); -extern int GSI64GetRecord(GSI64FILE *gsi, char *f1, sqd_uint16 *f2, sqd_uint64 *f3); -extern int GSI64GetOffset(GSI64FILE *gsi, char *key, char *sqfile, - int *fmt, long long *ret_offset); -extern void GSI64Close(GSI64FILE *gsi); -extern struct gsi64index_s *GSI64AllocIndex(void); -extern void GSI64FreeIndex(struct gsi64index_s *g); -extern void GSI64AddFileToIndex(struct gsi64index_s *g, char *filename, int fmt); -extern void GSI64AddKeyToIndex(struct gsi64index_s *g, char *key, int filenum, long long offset); -extern void GSI64SortIndex(struct gsi64index_s *g); -extern void GSI64WriteIndex(FILE *fp, struct gsi64index_s *g); -extern void GSI64WriteHeader(FILE *fp, int nfiles, long long nkeys); -extern int GSI64WriteFileRecord(FILE *fp, char *fname, int idx, int fmt); -extern int GSI64WriteKeyRecord(FILE *fp, char *key, int fileidx, long long offset); - -#endif /* USE_GSI64 */ -#endif /*GSIH_INCLUDED*/ diff --git a/forester/archive/RIO/others/hmmer/squid/hsregex.c b/forester/archive/RIO/others/hmmer/squid/hsregex.c deleted file mode 100644 index 6113900..0000000 --- a/forester/archive/RIO/others/hmmer/squid/hsregex.c +++ /dev/null @@ -1,1314 +0,0 @@ -/***************************************************************** - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - *****************************************************************/ - -/***************************************************************** - * This code is an altered version of Henry Spencer's - * regex library. Alterations are limited to minor streamlining, - * and some name changes to protect the SQUID namespace. - * Henry's copyright notice appears below. - * You can obtain the original from - * ftp://ftp.zoo.toronto.edu/pub/bookregex.tar.Z - * Thanks, Henry! - * - * SRE, Fri Aug 28 11:10:17 1998 - * RCS $Id: hsregex.c,v 1.1.1.1 2005/03/22 08:34:17 cmzmasek Exp $ - *****************************************************************/ - -#include -#include -#include -#include -#include "squid.h" - -/* global sqd_parse[] are managed by Strparse(). - * WARNING: TODO: this code is not threadsafe, and needs to be revised. - */ -char *sqd_parse[10]; - -/* Function: Strparse() - * - * Purpose: Match a regexp to a string. Returns 1 if pattern matches, - * else 0. - * - * Much like Perl, Strparse() makes copies of the matching - * substrings available via globals, sqd_parse[]. - * sqd_parse[0] contains a copy of the complete matched - * text. sqd_parse[1-9] contain copies of up to nine - * different substrings matched within parentheses. - * The memory for these strings is internally managed and - * volatile; the next call to Strparse() may destroy them. - * If the caller needs the matched substrings to persist - * beyond a new Strparse() call, it must make its own - * copies. - * - * A minor drawback of the memory management is that - * there will be a small amount of unfree'd memory being - * managed by Strparse() when a program exits; this may - * confuse memory debugging (Purify, dbmalloc). The - * general cleanup function SqdClean() is provided; - * you can call this before exiting. - * - * Uses an extended POSIX regular expression interface. - * A copylefted GNU implementation is included in the squid - * implementation (gnuregex.c) for use on non-POSIX compliant - * systems. POSIX 1003.2-compliant systems (all UNIX, - * some WinNT, I believe) can omit the GNU code if necessary. - * - * I built this for ease of use, not speed nor efficiency. - * - * Example: Strparse("foo-...-baz", "foo-bar-baz") returns 0 - * Strparse("foo-(...)-baz", "foo-bar-baz") - * returns 0; sqd_parse[0] is "foo-bar-baz"; - * sqd_parse[1] is "bar". - * - * Args: rexp - regular expression, extended POSIX form - * s - string to match against - * ntok - number of () substrings we will save (maximum NSUBEXP-1) - * - * Return: 1 on match, 0 if no match - */ -int -Strparse(char *rexp, char *s, int ntok) -{ - sqd_regexp *pat; - int code; - int len; - int i; - /* sanity check */ - if (ntok >= NSUBEXP ) Die("Strparse(): ntok must be <= %d", NSUBEXP-1); - - /* Free previous global substring buffers - */ - for (i = 0; i <= ntok; i++) - if (sqd_parse[i] != NULL) - { - free(sqd_parse[i]); - sqd_parse[i] = NULL; - } - - /* Compile and match the pattern, using our modified - * copy of Henry Spencer's regexp library - */ - if ((pat = sqd_regcomp(rexp)) == NULL) - Die("regexp compilation failed."); - code = sqd_regexec(pat, s); - - /* Fill the global substring buffers - */ - if (code == 1) - for (i = 0; i <= ntok; i++) - if (pat->startp[i] != NULL && pat->endp[i] != NULL) - { - len = pat->endp[i] - pat->startp[i]; - sqd_parse[i] = (char *) MallocOrDie(sizeof(char) * (len+1)); - strncpy(sqd_parse[i], pat->startp[i], len); - sqd_parse[i][len] = '\0'; - } - - free(pat); - return code; -} - -/* Function: SqdClean() - * Date: SRE, Wed Oct 29 12:52:08 1997 [TWA 721] - * - * Purpose: Clean up any squid library allocations before exiting - * a program, so we don't leave unfree'd memory around - * and confuse a malloc debugger like Purify or dbmalloc. - */ -void -SqdClean(void) -{ - int i; - - /* Free global substring buffers that Strparse() uses - */ - for (i = 0; i <= 9; i++) - if (sqd_parse[i] != NULL) { - free(sqd_parse[i]); - sqd_parse[i] = NULL; - } -} - - - -/* all code below is: - * Copyright (c) 1986, 1993, 1995 by University of Toronto. - * Written by Henry Spencer. Not derived from licensed software. - * - * Permission is granted to anyone to use this software for any - * purpose on any computer system, and to redistribute it in any way, - * subject to the following restrictions: - * - * 1. The author is not responsible for the consequences of use of - * this software, no matter how awful, even if they arise - * from defects in it. - * - * 2. The origin of this software must not be misrepresented, either - * by explicit claim or by omission. - * - * 3. Altered versions must be plainly marked as such, and must not - * be misrepresented (by explicit claim or omission) as being - * the original software. - * - * 4. This notice must not be removed or altered. - */ - -/* - * sqd_regcomp and sqd_regexec -- sqd_regsub and sqd_regerror are elsewhere - */ - -/* - * The first byte of the regexp internal "program" is actually this magic - * number; the start node begins in the second byte. - */ -#define SQD_REGMAGIC 0234 - -/* - * The "internal use only" fields in regexp.h are present to pass info from - * compile to execute that permits the execute phase to run lots faster on - * simple cases. They are: - * - * regstart char that must begin a match; '\0' if none obvious - * reganch is the match anchored (at beginning-of-line only)? - * regmust string (pointer into program) that match must include, or NULL - * regmlen length of regmust string - * - * Regstart and reganch permit very fast decisions on suitable starting points - * for a match, cutting down the work a lot. Regmust permits fast rejection - * of lines that cannot possibly match. The regmust tests are costly enough - * that sqd_regcomp() supplies a regmust only if the r.e. contains something - * potentially expensive (at present, the only such thing detected is * or + - * at the start of the r.e., which can involve a lot of backup). Regmlen is - * supplied because the test in sqd_regexec() needs it and sqd_regcomp() is computing - * it anyway. - */ - -/* - * Structure for regexp "program". This is essentially a linear encoding - * of a nondeterministic finite-state machine (aka syntax charts or - * "railroad normal form" in parsing technology). Each node is an opcode - * plus a "next" pointer, possibly plus an operand. "Next" pointers of - * all nodes except BRANCH implement concatenation; a "next" pointer with - * a BRANCH on both ends of it is connecting two alternatives. (Here we - * have one of the subtle syntax dependencies: an individual BRANCH (as - * opposed to a collection of them) is never concatenated with anything - * because of operator precedence.) The operand of some types of node is - * a literal string; for others, it is a node leading into a sub-FSM. In - * particular, the operand of a BRANCH node is the first node of the branch. - * (NB this is *not* a tree structure: the tail of the branch connects - * to the thing following the set of BRANCHes.) The opcodes are: - */ - -/* definition number opnd? meaning */ -#define END 0 /* no End of program. */ -#define BOL 1 /* no Match beginning of line. */ -#define EOL 2 /* no Match end of line. */ -#define ANY 3 /* no Match any character. */ -#define ANYOF 4 /* str Match any of these. */ -#define ANYBUT 5 /* str Match any but one of these. */ -#define BRANCH 6 /* node Match this, or the next..\&. */ -#define BACK 7 /* no "next" ptr points backward. */ -#define EXACTLY 8 /* str Match this string. */ -#define NOTHING 9 /* no Match empty string. */ -#define STAR 10 /* node Match this 0 or more times. */ -#define PLUS 11 /* node Match this 1 or more times. */ -#define OPEN 20 /* no Sub-RE starts here. */ - /* OPEN+1 is number 1, etc. */ -#define CLOSE 30 /* no Analogous to OPEN. */ - -/* - * Opcode notes: - * - * BRANCH The set of branches constituting a single choice are hooked - * together with their "next" pointers, since precedence prevents - * anything being concatenated to any individual branch. The - * "next" pointer of the last BRANCH in a choice points to the - * thing following the whole choice. This is also where the - * final "next" pointer of each individual branch points; each - * branch starts with the operand node of a BRANCH node. - * - * BACK Normal "next" pointers all implicitly point forward; BACK - * exists to make loop structures possible. - * - * STAR,PLUS '?', and complex '*' and '+', are implemented as circular - * BRANCH structures using BACK. Simple cases (one character - * per match) are implemented with STAR and PLUS for speed - * and to minimize recursive plunges. - * - * OPEN,CLOSE ...are numbered at compile time. - */ - -/* - * A node is one char of opcode followed by two chars of "next" pointer. - * "Next" pointers are stored as two 8-bit pieces, high order first. The - * value is a positive offset from the opcode of the node containing it. - * An operand, if any, simply follows the node. (Note that much of the - * code generation knows about this implicit relationship.) - * - * Using two bytes for the "next" pointer is vast overkill for most things, - * but allows patterns to get big without disasters. - */ -#define OP(p) (*(p)) -#define NEXT(p) (((*((p)+1)&0177)<<8) + (*((p)+2)&0377)) -#define OPERAND(p) ((p) + 3) - -/* - * Utility definitions. - */ -#define FAIL(m) { sqd_regerror(m); return(NULL); } -#define ISREPN(c) ((c) == '*' || (c) == '+' || (c) == '?') -#define META "^$.[()|?+*\\" - -/* - * Flags to be passed up and down. - */ -#define HASWIDTH 01 /* Known never to match null string. */ -#define SIMPLE 02 /* Simple enough to be STAR/PLUS operand. */ -#define SPSTART 04 /* Starts with * or +. */ -#define WORST 0 /* Worst case. */ - -/* - * Work-variable struct for sqd_regcomp(). - */ -struct comp { - char *regparse; /* Input-scan pointer. */ - int regnpar; /* () count. */ - char *regcode; /* Code-emit pointer; ®dummy = don't. */ - char regdummy[3]; /* NOTHING, 0 next ptr */ - long regsize; /* Code size. */ -}; -#define EMITTING(cp) ((cp)->regcode != (cp)->regdummy) - -/* - * Forward declarations for sqd_regcomp()'s friends. - */ -static char *reg(struct comp *cp, int paren, int *flagp); -static char *regbranch(struct comp *cp, int *flagp); -static char *regpiece(struct comp *cp, int *flagp); -static char *regatom(struct comp *cp, int *flagp); -static char *regnode(struct comp *cp, int op); -static char *regnext(char *node); -static void regc(struct comp *cp, int c); -static void reginsert(struct comp *cp, int op, char *opnd); -static void regtail(struct comp *cp, char *p, char *val); -static void regoptail(struct comp *cp, char *p, char *val); - -/* - - sqd_regcomp - compile a regular expression into internal code - * - * We can't allocate space until we know how big the compiled form will be, - * but we can't compile it (and thus know how big it is) until we've got a - * place to put the code. So we cheat: we compile it twice, once with code - * generation turned off and size counting turned on, and once "for real". - * This also means that we don't allocate space until we are sure that the - * thing really will compile successfully, and we never have to move the - * code and thus invalidate pointers into it. (Note that it has to be in - * one piece because free() must be able to free it all.) - * - * Beware that the optimization-preparation code in here knows about some - * of the structure of the compiled regexp. - */ -sqd_regexp * -sqd_regcomp(exp) -const char *exp; -{ - register sqd_regexp *r; - register char *scan; - int flags; - struct comp co; - - if (exp == NULL) - FAIL("NULL argument to sqd_regcomp"); - - /* First pass: determine size, legality. */ - co.regparse = (char *)exp; - co.regnpar = 1; - co.regsize = 0L; - co.regdummy[0] = NOTHING; - co.regdummy[1] = co.regdummy[2] = 0; - co.regcode = co.regdummy; - regc(&co, SQD_REGMAGIC); - if (reg(&co, 0, &flags) == NULL) - return(NULL); - - /* Small enough for pointer-storage convention? */ - if (co.regsize >= 0x7fffL) /* Probably could be 0xffffL. */ - FAIL("regexp too big"); - - /* Allocate space. */ - r = (sqd_regexp *)malloc(sizeof(sqd_regexp) + (size_t)co.regsize); - if (r == NULL) - FAIL("out of space"); - - /* Second pass: emit code. */ - co.regparse = (char *)exp; - co.regnpar = 1; - co.regcode = r->program; - regc(&co, SQD_REGMAGIC); - if (reg(&co, 0, &flags) == NULL) - return(NULL); - - /* Dig out information for optimizations. */ - r->regstart = '\0'; /* Worst-case defaults. */ - r->reganch = 0; - r->regmust = NULL; - r->regmlen = 0; - scan = r->program+1; /* First BRANCH. */ - if (OP(regnext(scan)) == END) { /* Only one top-level choice. */ - scan = OPERAND(scan); - - /* Starting-point info. */ - if (OP(scan) == EXACTLY) - r->regstart = *OPERAND(scan); - else if (OP(scan) == BOL) - r->reganch = 1; - - /* - * If there's something expensive in the r.e., find the - * longest literal string that must appear and make it the - * regmust. Resolve ties in favor of later strings, since - * the regstart check works with the beginning of the r.e. - * and avoiding duplication strengthens checking. Not a - * strong reason, but sufficient in the absence of others. - */ - if (flags&SPSTART) { - register char *longest = NULL; - register size_t len = 0; - - for (; scan != NULL; scan = regnext(scan)) - if (OP(scan) == EXACTLY && strlen(OPERAND(scan)) >= len) { - longest = OPERAND(scan); - len = strlen(OPERAND(scan)); - } - r->regmust = longest; - r->regmlen = (int)len; - } - } - - return(r); -} - -/* - - reg - regular expression, i.e. main body or parenthesized thing - * - * Caller must absorb opening parenthesis. - * - * Combining parenthesis handling with the base level of regular expression - * is a trifle forced, but the need to tie the tails of the branches to what - * follows makes it hard to avoid. - */ -static char * -reg(cp, paren, flagp) -register struct comp *cp; -int paren; /* Parenthesized? */ -int *flagp; -{ - register char *ret = NULL; /* SRE: NULL init added to silence gcc */ - register char *br; - register char *ender; - register int parno = 0; /* SRE: init added to silence gcc */ - int flags; - - *flagp = HASWIDTH; /* Tentatively. */ - - if (paren) { - /* Make an OPEN node. */ - if (cp->regnpar >= NSUBEXP) - FAIL("too many ()"); - parno = cp->regnpar; - cp->regnpar++; - ret = regnode(cp, OPEN+parno); - } - - /* Pick up the branches, linking them together. */ - br = regbranch(cp, &flags); - if (br == NULL) - return(NULL); - if (paren) - regtail(cp, ret, br); /* OPEN -> first. */ - else - ret = br; - *flagp &= ~(~flags&HASWIDTH); /* Clear bit if bit 0. */ - *flagp |= flags&SPSTART; - while (*cp->regparse == '|') { - cp->regparse++; - br = regbranch(cp, &flags); - if (br == NULL) - return(NULL); - regtail(cp, ret, br); /* BRANCH -> BRANCH. */ - *flagp &= ~(~flags&HASWIDTH); - *flagp |= flags&SPSTART; - } - - /* Make a closing node, and hook it on the end. */ - ender = regnode(cp, (paren) ? CLOSE+parno : END); - regtail(cp, ret, ender); - - /* Hook the tails of the branches to the closing node. */ - for (br = ret; br != NULL; br = regnext(br)) - regoptail(cp, br, ender); - - /* Check for proper termination. */ - if (paren && *cp->regparse++ != ')') { - FAIL("unterminated ()"); - } else if (!paren && *cp->regparse != '\0') { - if (*cp->regparse == ')') { - FAIL("unmatched ()"); - } else - FAIL("internal error: junk on end"); - /* NOTREACHED */ - } - - return(ret); -} - -/* - - regbranch - one alternative of an | operator - * - * Implements the concatenation operator. - */ -static char * -regbranch(cp, flagp) -register struct comp *cp; -int *flagp; -{ - register char *ret; - register char *chain; - register char *latest; - int flags; - register int c; - - *flagp = WORST; /* Tentatively. */ - - ret = regnode(cp, BRANCH); - chain = NULL; - while ((c = *cp->regparse) != '\0' && c != '|' && c != ')') { - latest = regpiece(cp, &flags); - if (latest == NULL) - return(NULL); - *flagp |= flags&HASWIDTH; - if (chain == NULL) /* First piece. */ - *flagp |= flags&SPSTART; - else - regtail(cp, chain, latest); - chain = latest; - } - if (chain == NULL) /* Loop ran zero times. */ - (void) regnode(cp, NOTHING); - - return(ret); -} - -/* - - regpiece - something followed by possible [*+?] - * - * Note that the branching code sequences used for ? and the general cases - * of * and + are somewhat optimized: they use the same NOTHING node as - * both the endmarker for their branch list and the body of the last branch. - * It might seem that this node could be dispensed with entirely, but the - * endmarker role is not redundant. - */ -static char * -regpiece(cp, flagp) -register struct comp *cp; -int *flagp; -{ - register char *ret; - register char op; - register char *next; - int flags; - - ret = regatom(cp, &flags); - if (ret == NULL) - return(NULL); - - op = *cp->regparse; - if (!ISREPN(op)) { - *flagp = flags; - return(ret); - } - - if (!(flags&HASWIDTH) && op != '?') - FAIL("*+ operand could be empty"); - switch (op) { - case '*': *flagp = WORST|SPSTART; break; - case '+': *flagp = WORST|SPSTART|HASWIDTH; break; - case '?': *flagp = WORST; break; - } - - if (op == '*' && (flags&SIMPLE)) - reginsert(cp, STAR, ret); - else if (op == '*') { - /* Emit x* as (x&|), where & means "self". */ - reginsert(cp, BRANCH, ret); /* Either x */ - regoptail(cp, ret, regnode(cp, BACK)); /* and loop */ - regoptail(cp, ret, ret); /* back */ - regtail(cp, ret, regnode(cp, BRANCH)); /* or */ - regtail(cp, ret, regnode(cp, NOTHING)); /* null. */ - } else if (op == '+' && (flags&SIMPLE)) - reginsert(cp, PLUS, ret); - else if (op == '+') { - /* Emit x+ as x(&|), where & means "self". */ - next = regnode(cp, BRANCH); /* Either */ - regtail(cp, ret, next); - regtail(cp, regnode(cp, BACK), ret); /* loop back */ - regtail(cp, next, regnode(cp, BRANCH)); /* or */ - regtail(cp, ret, regnode(cp, NOTHING)); /* null. */ - } else if (op == '?') { - /* Emit x? as (x|) */ - reginsert(cp, BRANCH, ret); /* Either x */ - regtail(cp, ret, regnode(cp, BRANCH)); /* or */ - next = regnode(cp, NOTHING); /* null. */ - regtail(cp, ret, next); - regoptail(cp, ret, next); - } - cp->regparse++; - if (ISREPN(*cp->regparse)) - FAIL("nested *?+"); - - return(ret); -} - -/* - - regatom - the lowest level - * - * Optimization: gobbles an entire sequence of ordinary characters so that - * it can turn them into a single node, which is smaller to store and - * faster to run. Backslashed characters are exceptions, each becoming a - * separate node; the code is simpler that way and it's not worth fixing. - */ -static char * -regatom(cp, flagp) -register struct comp *cp; -int *flagp; -{ - register char *ret; - int flags; - - *flagp = WORST; /* Tentatively. */ - - switch (*cp->regparse++) { - case '^': - ret = regnode(cp, BOL); - break; - case '$': - ret = regnode(cp, EOL); - break; - case '.': - ret = regnode(cp, ANY); - *flagp |= HASWIDTH|SIMPLE; - break; - case '[': { - register int range; - register int rangeend; - register int c; - - if (*cp->regparse == '^') { /* Complement of range. */ - ret = regnode(cp, ANYBUT); - cp->regparse++; - } else - ret = regnode(cp, ANYOF); - if ((c = *cp->regparse) == ']' || c == '-') { - regc(cp, c); - cp->regparse++; - } - while ((c = *cp->regparse++) != '\0' && c != ']') { - if (c != '-') - regc(cp, c); - else if ((c = *cp->regparse) == ']' || c == '\0') - regc(cp, '-'); - else { - range = (unsigned char)*(cp->regparse-2); - rangeend = (unsigned char)c; - if (range > rangeend) - FAIL("invalid [] range"); - for (range++; range <= rangeend; range++) - regc(cp, range); - cp->regparse++; - } - } - regc(cp, '\0'); - if (c != ']') - FAIL("unmatched []"); - *flagp |= HASWIDTH|SIMPLE; - break; - } - case '(': - ret = reg(cp, 1, &flags); - if (ret == NULL) - return(NULL); - *flagp |= flags&(HASWIDTH|SPSTART); - break; - case '\0': - case '|': - case ')': - /* supposed to be caught earlier */ - FAIL("internal error: \\0|) unexpected"); - break; - case '?': - case '+': - case '*': - FAIL("?+* follows nothing"); - break; - case '\\': - if (*cp->regparse == '\0') - FAIL("trailing \\"); - ret = regnode(cp, EXACTLY); - regc(cp, *cp->regparse++); - regc(cp, '\0'); - *flagp |= HASWIDTH|SIMPLE; - break; - default: { - register size_t len; - register char ender; - - cp->regparse--; - len = strcspn(cp->regparse, META); - if (len == 0) - FAIL("internal error: strcspn 0"); - ender = *(cp->regparse+len); - if (len > 1 && ISREPN(ender)) - len--; /* Back off clear of ?+* operand. */ - *flagp |= HASWIDTH; - if (len == 1) - *flagp |= SIMPLE; - ret = regnode(cp, EXACTLY); - for (; len > 0; len--) - regc(cp, *cp->regparse++); - regc(cp, '\0'); - break; - } - } - - return(ret); -} - -/* - - regnode - emit a node - */ -static char * /* Location. */ -regnode(cp, op) -register struct comp *cp; -char op; -{ - register char *const ret = cp->regcode; - register char *ptr; - - if (!EMITTING(cp)) { - cp->regsize += 3; - return(ret); - } - - ptr = ret; - *ptr++ = op; - *ptr++ = '\0'; /* Null next pointer. */ - *ptr++ = '\0'; - cp->regcode = ptr; - - return(ret); -} - -/* - - regc - emit (if appropriate) a byte of code - */ -static void -regc(cp, b) -register struct comp *cp; -char b; -{ - if (EMITTING(cp)) - *cp->regcode++ = b; - else - cp->regsize++; -} - -/* - - reginsert - insert an operator in front of already-emitted operand - * - * Means relocating the operand. - */ -static void -reginsert(cp, op, opnd) -register struct comp *cp; -char op; -char *opnd; -{ - register char *place; - - if (!EMITTING(cp)) { - cp->regsize += 3; - return; - } - - (void) memmove(opnd+3, opnd, (size_t)(cp->regcode - opnd)); - cp->regcode += 3; - - place = opnd; /* Op node, where operand used to be. */ - *place++ = op; - *place++ = '\0'; - *place++ = '\0'; -} - -/* - - regtail - set the next-pointer at the end of a node chain - */ -static void -regtail(cp, p, val) -register struct comp *cp; -char *p; -char *val; -{ - register char *scan; - register char *temp; - register int offset; - - if (!EMITTING(cp)) - return; - - /* Find last node. */ - for (scan = p; (temp = regnext(scan)) != NULL; scan = temp) - continue; - - offset = (OP(scan) == BACK) ? scan - val : val - scan; - *(scan+1) = (offset>>8)&0177; - *(scan+2) = offset&0377; -} - -/* - - regoptail - regtail on operand of first argument; nop if operandless - */ -static void -regoptail(cp, p, val) -register struct comp *cp; -char *p; -char *val; -{ - /* "Operandless" and "op != BRANCH" are synonymous in practice. */ - if (!EMITTING(cp) || OP(p) != BRANCH) - return; - regtail(cp, OPERAND(p), val); -} - -/* - * sqd_regexec and friends - */ - -/* - * Work-variable struct for sqd_regexec(). - */ -struct exec { - char *reginput; /* String-input pointer. */ - char *regbol; /* Beginning of input, for ^ check. */ - char **regstartp; /* Pointer to startp array. */ - char **regendp; /* Ditto for endp. */ -}; - -/* - * Forwards. - */ -static int regtry(struct exec *ep, sqd_regexp *rp, char *string); -static int regmatch(struct exec *ep, char *prog); -static size_t regrepeat(struct exec *ep, char *node); - -#ifdef DEBUG -int regnarrate = 0; -void regdump(); -static char *regprop(); -#endif - -/* - - sqd_regexec - match a regexp against a string - */ -int -sqd_regexec(prog, str) -register sqd_regexp *prog; -const char *str; -{ - register char *string = (char *)str; /* avert const poisoning */ - register char *s; - struct exec ex; - - /* Be paranoid. */ - if (prog == NULL || string == NULL) { - sqd_regerror("NULL argument to sqd_regexec"); - return(0); - } - - /* Check validity of program. */ - if ((unsigned char)*prog->program != SQD_REGMAGIC) { - sqd_regerror("corrupted regexp"); - return(0); - } - - /* If there is a "must appear" string, look for it. */ - if (prog->regmust != NULL && strstr(string, prog->regmust) == NULL) - return(0); - - /* Mark beginning of line for ^ . */ - ex.regbol = string; - ex.regstartp = prog->startp; - ex.regendp = prog->endp; - - /* Simplest case: anchored match need be tried only once. */ - if (prog->reganch) - return(regtry(&ex, prog, string)); - - /* Messy cases: unanchored match. */ - if (prog->regstart != '\0') { - /* We know what char it must start with. */ - for (s = string; s != NULL; s = strchr(s+1, prog->regstart)) - if (regtry(&ex, prog, s)) - return(1); - return(0); - } else { - /* We don't -- general case. */ - for (s = string; !regtry(&ex, prog, s); s++) - if (*s == '\0') - return(0); - return(1); - } - /* NOTREACHED */ -} - -/* - - regtry - try match at specific point - */ -static int /* 0 failure, 1 success */ -regtry(ep, prog, string) -register struct exec *ep; -sqd_regexp *prog; -char *string; -{ - register int i; - register char **stp; - register char **enp; - - ep->reginput = string; - - stp = prog->startp; - enp = prog->endp; - for (i = NSUBEXP; i > 0; i--) { - *stp++ = NULL; - *enp++ = NULL; - } - if (regmatch(ep, prog->program + 1)) { - prog->startp[0] = string; - prog->endp[0] = ep->reginput; - return(1); - } else - return(0); -} - -/* - - regmatch - main matching routine - * - * Conceptually the strategy is simple: check to see whether the current - * node matches, call self recursively to see whether the rest matches, - * and then act accordingly. In practice we make some effort to avoid - * recursion, in particular by going through "ordinary" nodes (that don't - * need to know whether the rest of the match failed) by a loop instead of - * by recursion. - */ -static int /* 0 failure, 1 success */ -regmatch(ep, prog) -register struct exec *ep; -char *prog; -{ - register char *scan; /* Current node. */ - char *next; /* Next node. */ - -#ifdef DEBUG - if (prog != NULL && regnarrate) - fprintf(stderr, "%s(\n", regprop(prog)); -#endif - for (scan = prog; scan != NULL; scan = next) { -#ifdef DEBUG - if (regnarrate) - fprintf(stderr, "%s...\n", regprop(scan)); -#endif - next = regnext(scan); - - switch (OP(scan)) { - case BOL: - if (ep->reginput != ep->regbol) - return(0); - break; - case EOL: - if (*ep->reginput != '\0') - return(0); - break; - case ANY: - if (*ep->reginput == '\0') - return(0); - ep->reginput++; - break; - case EXACTLY: { - register size_t len; - register char *const opnd = OPERAND(scan); - - /* Inline the first character, for speed. */ - if (*opnd != *ep->reginput) - return(0); - len = strlen(opnd); - if (len > 1 && strncmp(opnd, ep->reginput, len) != 0) - return(0); - ep->reginput += len; - break; - } - case ANYOF: - if (*ep->reginput == '\0' || - strchr(OPERAND(scan), *ep->reginput) == NULL) - return(0); - ep->reginput++; - break; - case ANYBUT: - if (*ep->reginput == '\0' || - strchr(OPERAND(scan), *ep->reginput) != NULL) - return(0); - ep->reginput++; - break; - case NOTHING: - break; - case BACK: - break; - case OPEN+1: case OPEN+2: case OPEN+3: - case OPEN+4: case OPEN+5: case OPEN+6: - case OPEN+7: case OPEN+8: case OPEN+9: { - register const int no = OP(scan) - OPEN; - register char *const input = ep->reginput; - - if (regmatch(ep, next)) { - /* - * Don't set startp if some later - * invocation of the same parentheses - * already has. - */ - if (ep->regstartp[no] == NULL) - ep->regstartp[no] = input; - return(1); - } else - return(0); - break; - } - case CLOSE+1: case CLOSE+2: case CLOSE+3: - case CLOSE+4: case CLOSE+5: case CLOSE+6: - case CLOSE+7: case CLOSE+8: case CLOSE+9: { - register const int no = OP(scan) - CLOSE; - register char *const input = ep->reginput; - - if (regmatch(ep, next)) { - /* - * Don't set endp if some later - * invocation of the same parentheses - * already has. - */ - if (ep->regendp[no] == NULL) - ep->regendp[no] = input; - return(1); - } else - return(0); - break; - } - case BRANCH: { - register char *const save = ep->reginput; - - if (OP(next) != BRANCH) /* No choice. */ - next = OPERAND(scan); /* Avoid recursion. */ - else { - while (OP(scan) == BRANCH) { - if (regmatch(ep, OPERAND(scan))) - return(1); - ep->reginput = save; - scan = regnext(scan); - } - return(0); - /* NOTREACHED */ - } - break; - } - case STAR: case PLUS: { - register const char nextch = - (OP(next) == EXACTLY) ? *OPERAND(next) : '\0'; - register size_t no; - register char *const save = ep->reginput; - register const size_t min = (OP(scan) == STAR) ? 0 : 1; - - for (no = regrepeat(ep, OPERAND(scan)) + 1; no > min; no--) { - ep->reginput = save + no - 1; - /* If it could work, try it. */ - if (nextch == '\0' || *ep->reginput == nextch) - if (regmatch(ep, next)) - return(1); - } - return(0); - break; - } - case END: - return(1); /* Success! */ - break; - default: - sqd_regerror("regexp corruption"); - return(0); - break; - } - } - - /* - * We get here only if there's trouble -- normally "case END" is - * the terminating point. - */ - sqd_regerror("corrupted pointers"); - return(0); -} - -/* - - regrepeat - report how many times something simple would match - */ -static size_t -regrepeat(ep, node) -register struct exec *ep; -char *node; -{ - register size_t count; - register char *scan; - register char ch; - - switch (OP(node)) { - case ANY: - return(strlen(ep->reginput)); - break; - case EXACTLY: - ch = *OPERAND(node); - count = 0; - for (scan = ep->reginput; *scan == ch; scan++) - count++; - return(count); - break; - case ANYOF: - return(strspn(ep->reginput, OPERAND(node))); - break; - case ANYBUT: - return(strcspn(ep->reginput, OPERAND(node))); - break; - default: /* Oh dear. Called inappropriately. */ - sqd_regerror("internal error: bad call of regrepeat"); - return(0); /* Best compromise. */ - break; - } - /* NOTREACHED */ -} - -/* - - regnext - dig the "next" pointer out of a node - */ -static char * -regnext(p) -register char *p; -{ - register const int offset = NEXT(p); - - if (offset == 0) - return(NULL); - - return((OP(p) == BACK) ? p-offset : p+offset); -} - -#ifdef DEBUG - -static char *regprop(); - -/* - - regdump - dump a regexp onto stdout in vaguely comprehensible form - */ -void -regdump(r) -sqd_regexp *r; -{ - register char *s; - register char op = EXACTLY; /* Arbitrary non-END op. */ - register char *next; - - - s = r->program + 1; - while (op != END) { /* While that wasn't END last time... */ - op = OP(s); - printf("%2d%s", s-r->program, regprop(s)); /* Where, what. */ - next = regnext(s); - if (next == NULL) /* Next ptr. */ - printf("(0)"); - else - printf("(%d)", (s-r->program)+(next-s)); - s += 3; - if (op == ANYOF || op == ANYBUT || op == EXACTLY) { - /* Literal string, where present. */ - while (*s != '\0') { - putchar(*s); - s++; - } - s++; - } - putchar('\n'); - } - - /* Header fields of interest. */ - if (r->regstart != '\0') - printf("start `%c' ", r->regstart); - if (r->reganch) - printf("anchored "); - if (r->regmust != NULL) - printf("must have \"%s\"", r->regmust); - printf("\n"); -} - -/* - - regprop - printable representation of opcode - */ -static char * -regprop(op) -char *op; -{ - register char *p; - static char buf[50]; - - (void) strcpy(buf, ":"); - - switch (OP(op)) { - case BOL: - p = "BOL"; - break; - case EOL: - p = "EOL"; - break; - case ANY: - p = "ANY"; - break; - case ANYOF: - p = "ANYOF"; - break; - case ANYBUT: - p = "ANYBUT"; - break; - case BRANCH: - p = "BRANCH"; - break; - case EXACTLY: - p = "EXACTLY"; - break; - case NOTHING: - p = "NOTHING"; - break; - case BACK: - p = "BACK"; - break; - case END: - p = "END"; - break; - case OPEN+1: - case OPEN+2: - case OPEN+3: - case OPEN+4: - case OPEN+5: - case OPEN+6: - case OPEN+7: - case OPEN+8: - case OPEN+9: - sprintf(buf+strlen(buf), "OPEN%d", OP(op)-OPEN); - p = NULL; - break; - case CLOSE+1: - case CLOSE+2: - case CLOSE+3: - case CLOSE+4: - case CLOSE+5: - case CLOSE+6: - case CLOSE+7: - case CLOSE+8: - case CLOSE+9: - sprintf(buf+strlen(buf), "CLOSE%d", OP(op)-CLOSE); - p = NULL; - break; - case STAR: - p = "STAR"; - break; - case PLUS: - p = "PLUS"; - break; - default: - sqd_regerror("corrupted opcode"); - break; - } - if (p != NULL) - (void) strcat(buf, p); - return(buf); -} -#endif - - -/* - - sqd_regsub - perform substitutions after a regexp match - */ -void -sqd_regsub(rp, source, dest) -const sqd_regexp *rp; -const char *source; -char *dest; -{ - register sqd_regexp * const prog = (sqd_regexp *)rp; - register char *src = (char *)source; - register char *dst = dest; - register char c; - register int no; - register size_t len; - - if (prog == NULL || source == NULL || dest == NULL) { - sqd_regerror("NULL parameter to sqd_regsub"); - return; - } - if ((unsigned char)*(prog->program) != SQD_REGMAGIC) { - sqd_regerror("damaged regexp"); - return; - } - - while ((c = *src++) != '\0') { - if (c == '&') - no = 0; - else if (c == '\\' && isdigit((int) (*src))) - no = *src++ - '0'; - else - no = -1; - - if (no < 0) { /* Ordinary character. */ - if (c == '\\' && (*src == '\\' || *src == '&')) - c = *src++; - *dst++ = c; - } else if (prog->startp[no] != NULL && prog->endp[no] != NULL && - prog->endp[no] > prog->startp[no]) { - len = prog->endp[no] - prog->startp[no]; - (void) strncpy(dst, prog->startp[no], len); - dst += len; - if (*(dst-1) == '\0') { /* strncpy hit NUL. */ - sqd_regerror("damaged match string"); - return; - } - } - } - *dst++ = '\0'; -} - - -void -sqd_regerror(s) -char *s; -{ - fprintf(stderr, "regexp(3): %s\n", s); - exit(EXIT_FAILURE); - /* NOTREACHED */ -} diff --git a/forester/archive/RIO/others/hmmer/squid/install-sh b/forester/archive/RIO/others/hmmer/squid/install-sh deleted file mode 100755 index e9de238..0000000 --- a/forester/archive/RIO/others/hmmer/squid/install-sh +++ /dev/null @@ -1,251 +0,0 @@ -#!/bin/sh -# -# install - install a program, script, or datafile -# This comes from X11R5 (mit/util/scripts/install.sh). -# -# Copyright 1991 by the Massachusetts Institute of Technology -# -# Permission to use, copy, modify, distribute, and sell this software and its -# documentation for any purpose is hereby granted without fee, provided that -# the above copyright notice appear in all copies and that both that -# copyright notice and this permission notice appear in supporting -# documentation, and that the name of M.I.T. not be used in advertising or -# publicity pertaining to distribution of the software without specific, -# written prior permission. M.I.T. makes no representations about the -# suitability of this software for any purpose. It is provided "as is" -# without express or implied warranty. -# -# Calling this script install-sh is preferred over install.sh, to prevent -# `make' implicit rules from creating a file called install from it -# when there is no Makefile. -# -# This script is compatible with the BSD install script, but was written -# from scratch. It can only install one file at a time, a restriction -# shared with many OS's install programs. - - -# set DOITPROG to echo to test this script - -# Don't use :- since 4.3BSD and earlier shells don't like it. -doit="${DOITPROG-}" - - -# put in absolute paths if you don't have them in your path; or use env. vars. - -mvprog="${MVPROG-mv}" -cpprog="${CPPROG-cp}" -chmodprog="${CHMODPROG-chmod}" -chownprog="${CHOWNPROG-chown}" -chgrpprog="${CHGRPPROG-chgrp}" -stripprog="${STRIPPROG-strip}" -rmprog="${RMPROG-rm}" -mkdirprog="${MKDIRPROG-mkdir}" - -transformbasename="" -transform_arg="" -instcmd="$mvprog" -chmodcmd="$chmodprog 0755" -chowncmd="" -chgrpcmd="" -stripcmd="" -rmcmd="$rmprog -f" -mvcmd="$mvprog" -src="" -dst="" -dir_arg="" - -while [ x"$1" != x ]; do - case $1 in - -c) instcmd="$cpprog" - shift - continue;; - - -d) dir_arg=true - shift - continue;; - - -m) chmodcmd="$chmodprog $2" - shift - shift - continue;; - - -o) chowncmd="$chownprog $2" - shift - shift - continue;; - - -g) chgrpcmd="$chgrpprog $2" - shift - shift - continue;; - - -s) stripcmd="$stripprog" - shift - continue;; - - -t=*) transformarg=`echo $1 | sed 's/-t=//'` - shift - continue;; - - -b=*) transformbasename=`echo $1 | sed 's/-b=//'` - shift - continue;; - - *) if [ x"$src" = x ] - then - src=$1 - else - # this colon is to work around a 386BSD /bin/sh bug - : - dst=$1 - fi - shift - continue;; - esac -done - -if [ x"$src" = x ] -then - echo "install: no input file specified" - exit 1 -else - true -fi - -if [ x"$dir_arg" != x ]; then - dst=$src - src="" - - if [ -d $dst ]; then - instcmd=: - chmodcmd="" - else - instcmd=mkdir - fi -else - -# Waiting for this to be detected by the "$instcmd $src $dsttmp" command -# might cause directories to be created, which would be especially bad -# if $src (and thus $dsttmp) contains '*'. - - if [ -f $src -o -d $src ] - then - true - else - echo "install: $src does not exist" - exit 1 - fi - - if [ x"$dst" = x ] - then - echo "install: no destination specified" - exit 1 - else - true - fi - -# If destination is a directory, append the input filename; if your system -# does not like double slashes in filenames, you may need to add some logic - - if [ -d $dst ] - then - dst="$dst"/`basename $src` - else - true - fi -fi - -## this sed command emulates the dirname command -dstdir=`echo $dst | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'` - -# Make sure that the destination directory exists. -# this part is taken from Noah Friedman's mkinstalldirs script - -# Skip lots of stat calls in the usual case. -if [ ! -d "$dstdir" ]; then -defaultIFS=' -' -IFS="${IFS-${defaultIFS}}" - -oIFS="${IFS}" -# Some sh's can't handle IFS=/ for some reason. -IFS='%' -set - `echo ${dstdir} | sed -e 's@/@%@g' -e 's@^%@/@'` -IFS="${oIFS}" - -pathcomp='' - -while [ $# -ne 0 ] ; do - pathcomp="${pathcomp}${1}" - shift - - if [ ! -d "${pathcomp}" ] ; - then - $mkdirprog "${pathcomp}" - else - true - fi - - pathcomp="${pathcomp}/" -done -fi - -if [ x"$dir_arg" != x ] -then - $doit $instcmd $dst && - - if [ x"$chowncmd" != x ]; then $doit $chowncmd $dst; else true ; fi && - if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dst; else true ; fi && - if [ x"$stripcmd" != x ]; then $doit $stripcmd $dst; else true ; fi && - if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dst; else true ; fi -else - -# If we're going to rename the final executable, determine the name now. - - if [ x"$transformarg" = x ] - then - dstfile=`basename $dst` - else - dstfile=`basename $dst $transformbasename | - sed $transformarg`$transformbasename - fi - -# don't allow the sed command to completely eliminate the filename - - if [ x"$dstfile" = x ] - then - dstfile=`basename $dst` - else - true - fi - -# Make a temp file name in the proper directory. - - dsttmp=$dstdir/#inst.$$# - -# Move or copy the file name to the temp name - - $doit $instcmd $src $dsttmp && - - trap "rm -f ${dsttmp}" 0 && - -# and set any options; do chmod last to preserve setuid bits - -# If any of these fail, we abort the whole thing. If we want to -# ignore errors from any of these, just make sure not to ignore -# errors from the above "$doit $instcmd $src $dsttmp" command. - - if [ x"$chowncmd" != x ]; then $doit $chowncmd $dsttmp; else true;fi && - if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dsttmp; else true;fi && - if [ x"$stripcmd" != x ]; then $doit $stripcmd $dsttmp; else true;fi && - if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dsttmp; else true;fi && - -# Now rename the file to the real destination. - - $doit $rmcmd -f $dstdir/$dstfile && - $doit $mvcmd $dsttmp $dstdir/$dstfile - -fi && - - -exit 0 diff --git a/forester/archive/RIO/others/hmmer/squid/iupac.c b/forester/archive/RIO/others/hmmer/squid/iupac.c deleted file mode 100644 index 9f2a577..0000000 --- a/forester/archive/RIO/others/hmmer/squid/iupac.c +++ /dev/null @@ -1,220 +0,0 @@ -/***************************************************************** - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - *****************************************************************/ - -/* iupac.c - * - * Globally defines the IUPAC symbols for nucleic acid sequence - * Slowly evolving into a repository of globals. Tue Apr 20 1993 - * - * RCS $Id: iupac.c,v 1.1.1.1 2005/03/22 08:34:32 cmzmasek Exp $ - */ -#include "squid.h" - -/* Default expected nucleotide occurrence frequencies, A/C/G/T. - * Used (for instance) as the default distribution for - * i.i.d. random nucleotide sequences. - */ -float dnafq[4] = { 0.25, 0.25, 0.25, 0.25 }; - -/* Dayhoff f(i) amino acid occurrence frequencies. - * From SwissProt 34: 21,210,388 residues - * In alphabetic order by single-letter code. - * Used (for instance) as the default distribution for - * i.i.d. random protein sequences. - */ -float aafq[20] = { - 0.075520, /* A */ - 0.016973, /* C */ - 0.053029, /* D */ - 0.063204, /* E */ - 0.040762, /* F */ - 0.068448, /* G */ - 0.022406, /* H */ - 0.057284, /* I */ - 0.059398, /* K */ - 0.093399, /* L */ - 0.023569, /* M */ - 0.045293, /* N */ - 0.049262, /* P */ - 0.040231, /* Q */ - 0.051573, /* R */ - 0.072214, /* S */ - 0.057454, /* T */ - 0.065252, /* V */ - 0.012513, /* W */ - 0.031985 /* Y */ -}; - -char aa_alphabet[] = AMINO_ALPHABET; - /* aa_index converts to pam's 27x27 scheme */ -int aa_index[20] = { 0, 2, 3, 4, 5, 6, 7, 8, 10, 11, - 12, 13, 15, 16, 17, 18, 19, 21, 22, 24 }; - - /* IUPAC code translations */ - /* note: sequence chars are UPPER CASE */ -struct iupactype iupac[] = { - { 'A', 'T', NTA, NTT, }, - { 'C', 'G', NTC, NTG, }, - { 'G', 'C', NTG, NTC, }, - { 'T', 'A', NTT, NTA, }, - { 'U', 'A', NTU, NTA, }, - { 'N', 'N', NTN, NTN, }, - { ' ', ' ', NTGAP, NTGAP, }, - { 'R', 'Y', NTR, NTY, }, - { 'Y', 'R', NTY, NTR, }, - { 'M', 'K', NTM, NTK, }, - { 'K', 'M', NTK, NTM, }, - { 'S', 'S', NTS, NTS, }, - { 'W', 'W', NTW, NTW, }, - { 'H', 'D', NTH, NTD, }, - { 'B', 'V', NTB, NTV, }, - { 'V', 'B', NTV, NTB, }, - { 'D', 'H', NTD, NTH, }, - }; - - -char *stdcode1[65] = { - "K", /* AAA */ - "N", /* AAC */ - "K", /* AAG */ - "N", /* AAU */ - "T", /* ACA */ - "T", /* ACC */ - "T", /* ACG */ - "T", /* ACU */ - "R", /* AGA */ - "S", /* AGC */ - "R", /* AGG */ - "S", /* AGU */ - "I", /* AUA */ - "I", /* AUC */ - "M", /* AUG */ - "I", /* AUU */ - "Q", /* CAA */ - "H", /* CAC */ - "Q", /* CAG */ - "H", /* CAU */ - "P", /* CCA */ - "P", /* CCC */ - "P", /* CCG */ - "P", /* CCU */ - "R", /* CGA */ - "R", /* CGC */ - "R", /* CGG */ - "R", /* CGU */ - "L", /* CUA */ - "L", /* CUC */ - "L", /* CUG */ - "L", /* CUU */ - "E", /* GAA */ - "D", /* GAC */ - "E", /* GAG */ - "D", /* GAU */ - "A", /* GCA */ - "A", /* GCC */ - "A", /* GCG */ - "A", /* GCU */ - "G", /* GGA */ - "G", /* GGC */ - "G", /* GGG */ - "G", /* GGU */ - "V", /* GUA */ - "V", /* GUC */ - "V", /* GUG */ - "V", /* GUU */ - "*", /* UAA */ - "Y", /* UAC */ - "*", /* UAG */ - "Y", /* UAU */ - "S", /* UCA */ - "S", /* UCC */ - "S", /* UCG */ - "S", /* UCU */ - "*", /* UGA */ - "C", /* UGC */ - "W", /* UGG */ - "C", /* UGU */ - "L", /* UUA */ - "F", /* UUC */ - "L", /* UUG */ - "F", /* UUU */ - "X", /* unknown */ -}; - - - - -char *stdcode3[65] = { - "Lys", /* AAA */ - "Asn", /* AAC */ - "Lys", /* AAG */ - "Asn", /* AAU */ - "Thr", /* ACA */ - "Thr", /* ACC */ - "Thr", /* ACG */ - "Thr", /* ACU */ - "Arg", /* AGA */ - "Ser", /* AGC */ - "Arg", /* AGG */ - "Ser", /* AGU */ - "Ile", /* AUA */ - "Ile", /* AUC */ - "Met", /* AUG */ - "Ile", /* AUU */ - "Gln", /* CAA */ - "His", /* CAC */ - "Gln", /* CAG */ - "His", /* CAU */ - "Pro", /* CCA */ - "Pro", /* CCC */ - "Pro", /* CCG */ - "Pro", /* CCU */ - "Arg", /* CGA */ - "Arg", /* CGC */ - "Arg", /* CGG */ - "Arg", /* CGU */ - "Leu", /* CUA */ - "Leu", /* CUC */ - "Leu", /* CUG */ - "Leu", /* CUU */ - "Glu", /* GAA */ - "Asp", /* GAC */ - "Glu", /* GAG */ - "Asp", /* GAU */ - "Ala", /* GCA */ - "Ala", /* GCC */ - "Ala", /* GCG */ - "Ala", /* GCU */ - "Gly", /* GGA */ - "Gly", /* GGC */ - "Gly", /* GGG */ - "Gly", /* GGU */ - "Val", /* GUA */ - "Val", /* GUC */ - "Val", /* GUG */ - "Val", /* GUU */ - "***", /* UAA */ - "Tyr", /* UAC */ - "***", /* UAG */ - "Tyr", /* UAU */ - "Ser", /* UCA */ - "Ser", /* UCC */ - "Ser", /* UCG */ - "Ser", /* UCU */ - "***", /* UGA */ - "Cys", /* UGC */ - "Trp", /* UGG */ - "Cys", /* UGU */ - "Leu", /* UUA */ - "Phe", /* UUC */ - "Leu", /* UUG */ - "Trp", /* UUU */ - "XXX", /* unknown */ -}; diff --git a/forester/archive/RIO/others/hmmer/squid/msa.c b/forester/archive/RIO/others/hmmer/squid/msa.c deleted file mode 100644 index 03bd57a..0000000 --- a/forester/archive/RIO/others/hmmer/squid/msa.c +++ /dev/null @@ -1,1394 +0,0 @@ -/***************************************************************** - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - *****************************************************************/ - -/* msa.c - * SRE, Mon May 17 10:48:47 1999 - * - * SQUID's interface for multiple sequence alignment - * manipulation: access to the MSA object. - * - * RCS $Id: msa.c,v 1.1.1.1 2005/03/22 08:34:19 cmzmasek Exp $ - */ - -#include -#include -#include -#include "squid.h" -#include "msa.h" /* multiple sequence alignment object support */ -#include "gki.h" /* string indexing hashtable code */ -#include "ssi.h" /* SSI sequence file indexing code */ - -/* Function: MSAAlloc() - * Date: SRE, Tue May 18 10:45:47 1999 [St. Louis] - * - * Purpose: Allocate an MSA structure, return a pointer - * to it. - * - * Designed to be used in three ways: - * 1) We know exactly the dimensions of the alignment: - * both nseq and alen. - * msa = MSAAlloc(nseq, alen); - * - * 2) We know the number of sequences but not alen. - * (We add sequences later.) - * msa = MSAAlloc(nseq, 0); - * - * 3) We even don't know the number of sequences, so - * we'll have to dynamically expand allocations. - * We provide a blocksize for the allocation expansion, - * and expand when needed. - * msa = MSAAlloc(10, 0); - * if (msa->nseq == msa->nseqalloc) MSAExpand(msa); - * - * Args: nseq - number of sequences, or nseq allocation blocksize - * alen - length of alignment in columns, or 0 - * - * Returns: pointer to new MSA object, w/ all values initialized. - * Note that msa->nseq is initialized to 0, though space - * is allocated. - * - * Diagnostics: "always works". Die()'s on memory allocation failure. - * - */ -MSA * -MSAAlloc(int nseq, int alen) -{ - MSA *msa; - int i; - - msa = MallocOrDie(sizeof(MSA)); - msa->aseq = MallocOrDie(sizeof(char *) * nseq); - msa->sqname = MallocOrDie(sizeof(char *) * nseq); - msa->sqlen = MallocOrDie(sizeof(int) * nseq); - msa->wgt = MallocOrDie(sizeof(float) * nseq); - - for (i = 0; i < nseq; i++) - { - msa->sqname[i] = NULL; - msa->sqlen[i] = 0; - msa->wgt[i] = -1.0; - - if (alen != 0) msa->aseq[i] = MallocOrDie(sizeof(char) * (alen+1)); - else msa->aseq[i] = NULL; - } - - msa->alen = alen; - msa->nseq = 0; - msa->nseqalloc = nseq; - msa->nseqlump = nseq; - - msa->flags = 0; - msa->type = kOtherSeq; - msa->name = NULL; - msa->desc = NULL; - msa->acc = NULL; - msa->au = NULL; - msa->ss_cons = NULL; - msa->sa_cons = NULL; - msa->rf = NULL; - msa->sqacc = NULL; - msa->sqdesc = NULL; - msa->ss = NULL; - msa->sslen = NULL; - msa->sa = NULL; - msa->salen = NULL; - msa->index = GKIInit(); - msa->lastidx = 0; - - /* Initialize unparsed optional markup - */ - msa->comment = NULL; - msa->ncomment = 0; - msa->alloc_ncomment = 0; - - msa->gf_tag = NULL; - msa->gf = NULL; - msa->ngf = 0; - - msa->gs_tag = NULL; - msa->gs = NULL; - msa->gs_idx = NULL; - msa->ngs = 0; - - msa->gc_tag = NULL; - msa->gc = NULL; - msa->gc_idx = NULL; - msa->ngc = 0; - - msa->gr_tag = NULL; - msa->gr = NULL; - msa->gr_idx = NULL; - msa->ngr = 0; - - /* Done. Return the alloced, initialized structure - */ - return msa; -} - -/* Function: MSAExpand() - * Date: SRE, Tue May 18 11:06:53 1999 [St. Louis] - * - * Purpose: Increase the sequence allocation in an MSA - * by msa->nseqlump. (Typically used when we're reading - * in an alignment sequentially from a file, - * so we don't know nseq until we're done.) - * - * Args: msa - the MSA object - * - * Returns: (void) - * - */ -void -MSAExpand(MSA *msa) -{ - int i,j; - - msa->nseqalloc += msa->nseqlump; - - msa->aseq = ReallocOrDie(msa->aseq, sizeof(char *) * msa->nseqalloc); - msa->sqname = ReallocOrDie(msa->sqname, sizeof(char *) * msa->nseqalloc); - msa->sqlen = ReallocOrDie(msa->sqlen, sizeof(char *) * msa->nseqalloc); - msa->wgt = ReallocOrDie(msa->wgt, sizeof(float) * msa->nseqalloc); - - if (msa->ss != NULL) { - msa->ss = ReallocOrDie(msa->ss, sizeof(char *) * msa->nseqalloc); - msa->sslen = ReallocOrDie(msa->sslen, sizeof(int) * msa->nseqalloc); - } - if (msa->sa != NULL) { - msa->sa = ReallocOrDie(msa->sa, sizeof(char *) * msa->nseqalloc); - msa->salen = ReallocOrDie(msa->salen, sizeof(int) * msa->nseqalloc); - } - if (msa->sqacc != NULL) - msa->sqacc = ReallocOrDie(msa->sqacc, sizeof(char *) * msa->nseqalloc); - if (msa->sqdesc != NULL) - msa->sqdesc =ReallocOrDie(msa->sqdesc,sizeof(char *) * msa->nseqalloc); - - for (i = msa->nseqalloc-msa->nseqlump; i < msa->nseqalloc; i++) - { - msa->sqname[i] = NULL; - msa->wgt[i] = -1.0; - - if (msa->sqacc != NULL) msa->sqacc[i] = NULL; - if (msa->sqdesc != NULL) msa->sqdesc[i] = NULL; - - if (msa->alen != 0) - msa->aseq[i] = ReallocOrDie(msa->aseq[i], sizeof(char) * (msa->alen+1)); - else msa->aseq[i] = NULL; - msa->sqlen[i] = 0; - - if (msa->ss != NULL) { - if (msa->alen != 0) - msa->ss[i] = ReallocOrDie(msa->ss[i], sizeof(char) * (msa->alen+1)); - else msa->ss[i] = NULL; - msa->sslen[i] = 0; - } - if (msa->sa != NULL) { - if (msa->alen != 0) - msa->sa[i] = ReallocOrDie(msa->ss[i], sizeof(char) * (msa->alen+1)); - else - msa->sa[i] = NULL; - msa->salen[i] = 0; - } - } - - /* Reallocate and re-init for unparsed #=GS tags, if we have some. - * gs is [0..ngs-1][0..nseq-1][], so we're reallocing the middle - * set of pointers. - */ - if (msa->gs != NULL) - for (i = 0; i < msa->ngs; i++) - { - if (msa->gs[i] != NULL) - { - msa->gs[i] = ReallocOrDie(msa->gs[i], sizeof(char *) * msa->nseqalloc); - for (j = msa->nseqalloc-msa->nseqlump; j < msa->nseqalloc; j++) - msa->gs[i][j] = NULL; - } - } - - /* Reallocate and re-init for unparsed #=GR tags, if we have some. - * gr is [0..ngs-1][0..nseq-1][], so we're reallocing the middle - * set of pointers. - */ - if (msa->gr != NULL) - for (i = 0; i < msa->ngr; i++) - { - if (msa->gr[i] != NULL) - { - msa->gr[i] = ReallocOrDie(msa->gr[i], sizeof(char *) * msa->nseqalloc); - for (j = msa->nseqalloc-msa->nseqlump; j < msa->nseqalloc; j++) - msa->gr[i][j] = NULL; - } - } - - return; -} - -/* Function: MSAFree() - * Date: SRE, Tue May 18 11:20:16 1999 [St. Louis] - * - * Purpose: Free a multiple sequence alignment structure. - * - * Args: msa - the alignment - * - * Returns: (void) - */ -void -MSAFree(MSA *msa) -{ - Free2DArray((void **) msa->aseq, msa->nseq); - Free2DArray((void **) msa->sqname, msa->nseq); - Free2DArray((void **) msa->sqacc, msa->nseq); - Free2DArray((void **) msa->sqdesc, msa->nseq); - Free2DArray((void **) msa->ss, msa->nseq); - Free2DArray((void **) msa->sa, msa->nseq); - - if (msa->sqlen != NULL) free(msa->sqlen); - if (msa->wgt != NULL) free(msa->wgt); - - if (msa->name != NULL) free(msa->name); - if (msa->desc != NULL) free(msa->desc); - if (msa->acc != NULL) free(msa->acc); - if (msa->au != NULL) free(msa->au); - if (msa->ss_cons != NULL) free(msa->ss_cons); - if (msa->sa_cons != NULL) free(msa->sa_cons); - if (msa->rf != NULL) free(msa->rf); - if (msa->sslen != NULL) free(msa->sslen); - if (msa->salen != NULL) free(msa->salen); - - Free2DArray((void **) msa->comment, msa->ncomment); - Free2DArray((void **) msa->gf_tag, msa->ngf); - Free2DArray((void **) msa->gf, msa->ngf); - Free2DArray((void **) msa->gs_tag, msa->ngs); - Free3DArray((void ***)msa->gs, msa->ngs, msa->nseq); - Free2DArray((void **) msa->gc_tag, msa->ngc); - Free2DArray((void **) msa->gc, msa->ngc); - Free2DArray((void **) msa->gr_tag, msa->ngr); - Free3DArray((void ***)msa->gr, msa->ngr, msa->nseq); - - GKIFree(msa->index); - GKIFree(msa->gs_idx); - GKIFree(msa->gc_idx); - GKIFree(msa->gr_idx); - - free(msa); -} - - -/* Function: MSASetSeqAccession() - * Date: SRE, Mon Jun 21 04:13:33 1999 [Sanger Centre] - * - * Purpose: Set a sequence accession in an MSA structure. - * Handles some necessary allocation/initialization. - * - * Args: msa - multiple alignment to add accession to - * seqidx - index of sequence to attach accession to - * acc - accession - * - * Returns: void - */ -void -MSASetSeqAccession(MSA *msa, int seqidx, char *acc) -{ - int x; - - if (msa->sqacc == NULL) { - msa->sqacc = MallocOrDie(sizeof(char *) * msa->nseqalloc); - for (x = 0; x < msa->nseqalloc; x++) - msa->sqacc[x] = NULL; - } - msa->sqacc[seqidx] = sre_strdup(acc, -1); -} - -/* Function: MSASetSeqDescription() - * Date: SRE, Mon Jun 21 04:21:09 1999 [Sanger Centre] - * - * Purpose: Set a sequence description in an MSA structure. - * Handles some necessary allocation/initialization. - * - * Args: msa - multiple alignment to add accession to - * seqidx - index of sequence to attach accession to - * desc - description - * - * Returns: void - */ -void -MSASetSeqDescription(MSA *msa, int seqidx, char *desc) -{ - int x; - - if (msa->sqdesc == NULL) { - msa->sqdesc = MallocOrDie(sizeof(char *) * msa->nseqalloc); - for (x = 0; x < msa->nseqalloc; x++) - msa->sqdesc[x] = NULL; - } - msa->sqdesc[seqidx] = sre_strdup(desc, -1); -} - - -/* Function: MSAAddComment() - * Date: SRE, Tue Jun 1 17:37:21 1999 [St. Louis] - * - * Purpose: Add an (unparsed) comment line to the MSA structure, - * allocating as necessary. - * - * Args: msa - a multiple alignment - * s - comment line to add - * - * Returns: (void) - */ -void -MSAAddComment(MSA *msa, char *s) -{ - /* If this is our first recorded comment, we need to malloc(); - * and if we've filled available space, we need to realloc(). - * Note the arbitrary lumpsize of 10 lines per allocation... - */ - if (msa->comment == NULL) { - msa->comment = MallocOrDie (sizeof(char *) * 10); - msa->alloc_ncomment = 10; - } - if (msa->ncomment == msa->alloc_ncomment) { - msa->alloc_ncomment += 10; - msa->comment = ReallocOrDie(msa->comment, sizeof(char *) * msa->alloc_ncomment); - } - - msa->comment[msa->ncomment] = sre_strdup(s, -1); - msa->ncomment++; - return; -} - -/* Function: MSAAddGF() - * Date: SRE, Wed Jun 2 06:53:54 1999 [bus to Madison] - * - * Purpose: Add an unparsed #=GF markup line to the MSA - * structure, allocating as necessary. - * - * Args: msa - a multiple alignment - * tag - markup tag (e.g. "AU") - * value - free text markup (e.g. "Alex Bateman") - * - * Returns: (void) - */ -void -MSAAddGF(MSA *msa, char *tag, char *value) -{ - /* If this is our first recorded unparsed #=GF line, we need to malloc(); - * if we've filled availabl space If we already have a hash index, and the GF - * Note the arbitrary lumpsize of 10 lines per allocation... - */ - if (msa->gf_tag == NULL) { - msa->gf_tag = MallocOrDie (sizeof(char *) * 10); - msa->gf = MallocOrDie (sizeof(char *) * 10); - msa->alloc_ngf = 10; - } - if (msa->ngf == msa->alloc_ngf) { - msa->alloc_ngf += 10; - msa->gf_tag = ReallocOrDie(msa->gf_tag, sizeof(char *) * msa->alloc_ngf); - msa->gf = ReallocOrDie(msa->gf, sizeof(char *) * msa->alloc_ngf); - } - - msa->gf_tag[msa->ngf] = sre_strdup(tag, -1); - msa->gf[msa->ngf] = sre_strdup(value, -1); - msa->ngf++; - - return; -} - - -/* Function: MSAAddGS() - * Date: SRE, Wed Jun 2 06:57:03 1999 [St. Louis] - * - * Purpose: Add an unparsed #=GS markup line to the MSA - * structure, allocating as necessary. - * - * It's possible that we could get more than one - * of the same type of GS tag per sequence; for - * example, "DR PDB;" structure links in Pfam. - * Hack: handle these by appending to the string, - * in a \n separated fashion. - * - * Args: msa - multiple alignment structure - * tag - markup tag (e.g. "AC") - * sqidx - index of sequence to assoc markup with (0..nseq-1) - * value - markup (e.g. "P00666") - * - * Returns: 0 on success - */ -void -MSAAddGS(MSA *msa, char *tag, int sqidx, char *value) -{ - int tagidx; - int i; - - /* Is this an unparsed tag name that we recognize? - * If not, handle adding it to index, and reallocating - * as needed. - */ - if (msa->gs_tag == NULL) /* first tag? init w/ malloc */ - { - msa->gs_idx = GKIInit(); - tagidx = GKIStoreKey(msa->gs_idx, tag); - SQD_DASSERT1((tagidx == 0)); - msa->gs_tag = MallocOrDie(sizeof(char *)); - msa->gs = MallocOrDie(sizeof(char **)); - msa->gs[0] = MallocOrDie(sizeof(char *) * msa->nseqalloc); - for (i = 0; i < msa->nseqalloc; i++) - msa->gs[0][i] = NULL; - } - else - { - /* new tag? */ - tagidx = GKIKeyIndex(msa->gs_idx, tag); - if (tagidx < 0) { /* it's a new tag name; realloc */ - tagidx = GKIStoreKey(msa->gs_idx, tag); - /* since we alloc in blocks of 1, - we always realloc upon seeing - a new tag. */ - SQD_DASSERT1((tagidx == msa->ngs)); - msa->gs_tag = ReallocOrDie(msa->gs_tag, (msa->ngs+1) + sizeof(char *)); - msa->gs = ReallocOrDie(msa->gs, (msa->ngs+1) + sizeof(char **)); - msa->gs[msa->ngs] = MallocOrDie(sizeof(char *) * msa->nseqalloc); - for (i = 0; i < msa->nseqalloc; i++) - msa->gs[msa->ngs][i] = NULL; - } - } - - if (tagidx == msa->ngs) { - msa->gs_tag[tagidx] = sre_strdup(tag, -1); - msa->ngs++; - } - - if (msa->gs[tagidx][sqidx] == NULL) /* first annotation of this seq with this tag? */ - msa->gs[tagidx][sqidx] = sre_strdup(value, -1); - else { - /* >1 annotation of this seq with this tag; append */ - int len; - if ((len = sre_strcat(&(msa->gs[tagidx][sqidx]), -1, "\n", 1)) < 0) - Die("failed to sre_strcat()"); - if (sre_strcat(&(msa->gs[tagidx][sqidx]), len, value, -1) < 0) - Die("failed to sre_strcat()"); - } - return; -} - -/* Function: MSAAppendGC() - * Date: SRE, Thu Jun 3 06:25:14 1999 [Madison] - * - * Purpose: Add an unparsed #=GC markup line to the MSA - * structure, allocating as necessary. - * - * When called multiple times for the same tag, - * appends value strings together -- used when - * parsing multiblock alignment files, for - * example. - * - * Args: msa - multiple alignment structure - * tag - markup tag (e.g. "CS") - * value - markup, one char per aligned column - * - * Returns: (void) - */ -void -MSAAppendGC(MSA *msa, char *tag, char *value) -{ - int tagidx; - - /* Is this an unparsed tag name that we recognize? - * If not, handle adding it to index, and reallocating - * as needed. - */ - if (msa->gc_tag == NULL) /* first tag? init w/ malloc */ - { - msa->gc_tag = MallocOrDie(sizeof(char *)); - msa->gc = MallocOrDie(sizeof(char **)); - msa->gc_idx = GKIInit(); - tagidx = GKIStoreKey(msa->gc_idx, tag); - SQD_DASSERT1((tagidx == 0)); - msa->gc[0] = NULL; - } - else - { /* new tag? */ - tagidx = GKIKeyIndex(msa->gc_idx, tag); - if (tagidx < 0) { /* it's a new tag name; realloc */ - tagidx = GKIStoreKey(msa->gc_idx, tag); - /* since we alloc in blocks of 1, - we always realloc upon seeing - a new tag. */ - SQD_DASSERT1((tagidx == msa->ngc)); - msa->gc_tag = ReallocOrDie(msa->gc_tag, (msa->ngc+1) + sizeof(char *)); - msa->gc = ReallocOrDie(msa->gc, (msa->ngc+1) + sizeof(char **)); - msa->gc[tagidx] = NULL; - } - } - - if (tagidx == msa->ngc) { - msa->gc_tag[tagidx] = sre_strdup(tag, -1); - msa->ngc++; - } - sre_strcat(&(msa->gc[tagidx]), -1, value, -1); - return; -} - -/* Function: MSAGetGC() - * Date: SRE, Fri Aug 13 13:25:57 1999 [St. Louis] - * - * Purpose: Given a tagname for a miscellaneous #=GC column - * annotation, return a pointer to the annotation - * string. - * - * Args: msa - alignment and its annotation - * tag - name of the annotation - * - * Returns: ptr to the annotation string. Caller does *not* - * free; is managed by msa object still. - */ -char * -MSAGetGC(MSA *msa, char *tag) -{ - int tagidx; - - if (msa->gc_idx == NULL) return NULL; - if ((tagidx = GKIKeyIndex(msa->gc_idx, tag)) < 0) return NULL; - return msa->gc[tagidx]; -} - - -/* Function: MSAAppendGR() - * Date: SRE, Thu Jun 3 06:34:38 1999 [Madison] - * - * Purpose: Add an unparsed #=GR markup line to the - * MSA structure, allocating as necessary. - * - * When called multiple times for the same tag, - * appends value strings together -- used when - * parsing multiblock alignment files, for - * example. - * - * Args: msa - multiple alignment structure - * tag - markup tag (e.g. "SS") - * sqidx - index of seq to assoc markup with (0..nseq-1) - * value - markup, one char per aligned column - * - * Returns: (void) - */ -void -MSAAppendGR(MSA *msa, char *tag, int sqidx, char *value) -{ - int tagidx; - int i; - - /* Is this an unparsed tag name that we recognize? - * If not, handle adding it to index, and reallocating - * as needed. - */ - if (msa->gr_tag == NULL) /* first tag? init w/ malloc */ - { - msa->gr_tag = MallocOrDie(sizeof(char *)); - msa->gr = MallocOrDie(sizeof(char **)); - msa->gr[0] = MallocOrDie(sizeof(char *) * msa->nseqalloc); - msa->gr_idx = GKIInit(); - tagidx = GKIStoreKey(msa->gr_idx, tag); - SQD_DASSERT1((tagidx == 0)); - } - else - { - /* new tag? */ - tagidx = GKIKeyIndex(msa->gr_idx, tag); - if (tagidx < 0) { /* it's a new tag name; realloc */ - tagidx = GKIStoreKey(msa->gr_idx, tag); - /* since we alloc in blocks of 1, - we always realloc upon seeing - a new tag. */ - SQD_DASSERT1((tagidx == msa->ngr)); - msa->gr_tag = ReallocOrDie(msa->gr_tag, (msa->ngr+1) + sizeof(char *)); - msa->gr = ReallocOrDie(msa->gr, (msa->ngr+1) + sizeof(char **)); - msa->gr[msa->ngr] = MallocOrDie(sizeof(char *) * msa->nseqalloc); - for (i = 0; i < msa->nseqalloc; i++) - msa->gr[msa->ngr][i] = NULL; - } - } - - if (tagidx == msa->ngr) { - msa->gr_tag[tagidx] = sre_strdup(tag, -1); - msa->ngr++; - } - sre_strcat(&(msa->gr[tagidx][sqidx]), -1, value, -1); - return; -} - - -/* Function: MSAVerifyParse() - * Date: SRE, Sat Jun 5 14:24:24 1999 [Madison, 1999 worm mtg] - * - * Purpose: Last function called after a multiple alignment is - * parsed. Checks that parse was successful; makes sure - * required information is present; makes sure required - * information is consistent. Some fields that are - * only use during parsing may be freed (sqlen, for - * example). - * - * Some fields in msa may be modified (msa->alen is set, - * for example). - * - * Args: msa - the multiple alignment - * sqname, aseq must be set - * nseq must be correct - * alen need not be set; will be set here. - * wgt will be set here if not already set - * - * Returns: (void) - * Will Die() here with diagnostics on error. - * - * Example: - */ -void -MSAVerifyParse(MSA *msa) -{ - int idx; - - if (msa->nseq == 0) Die("Parse error: no sequences were found for alignment %s", - msa->name != NULL ? msa->name : ""); - - msa->alen = msa->sqlen[0]; - - /* We can rely on msa->sqname[] being valid for any index, - * because of the way the line parsers always store any name - * they add to the index. - */ - for (idx = 0; idx < msa->nseq; idx++) - { - /* aseq is required. */ - if (msa->aseq[idx] == NULL) - Die("Parse error: No sequence for %s in alignment %s", msa->sqname[idx], - msa->name != NULL ? msa->name : ""); - /* either all weights must be set, or none of them */ - if ((msa->flags & MSA_SET_WGT) && msa->wgt[idx] == -1.0) - Die("Parse error: some weights are set, but %s doesn't have one in alignment %s", - msa->sqname[idx], - msa->name != NULL ? msa->name : ""); - /* all aseq must be same length. */ - if (msa->sqlen[idx] != msa->alen) - Die("Parse error: sequence %s: length %d, expected %d in alignment %s", - msa->sqname[idx], msa->sqlen[idx], msa->alen, - msa->name != NULL ? msa->name : ""); - /* if SS is present, must have length right */ - if (msa->ss != NULL && msa->ss[idx] != NULL && msa->sslen[idx] != msa->alen) - Die("Parse error: #=GR SS annotation for %s: length %d, expected %d in alignment %s", - msa->sqname[idx], msa->sslen[idx], msa->alen, - msa->name != NULL ? msa->name : ""); - /* if SA is present, must have length right */ - if (msa->sa != NULL && msa->sa[idx] != NULL && msa->salen[idx] != msa->alen) - Die("Parse error: #=GR SA annotation for %s: length %d, expected %d in alignment %s", - msa->sqname[idx], msa->salen[idx], msa->alen, - msa->name != NULL ? msa->name : ""); - } - - /* if cons SS is present, must have length right */ - if (msa->ss_cons != NULL && strlen(msa->ss_cons) != msa->alen) - Die("Parse error: #=GC SS_cons annotation: length %d, expected %d in alignment %s", - strlen(msa->ss_cons), msa->alen, - msa->name != NULL ? msa->name : ""); - - /* if cons SA is present, must have length right */ - if (msa->sa_cons != NULL && strlen(msa->sa_cons) != msa->alen) - Die("Parse error: #=GC SA_cons annotation: length %d, expected %d in alignment %s", - strlen(msa->sa_cons), msa->alen, - msa->name != NULL ? msa->name : ""); - - /* if RF is present, must have length right */ - if (msa->rf != NULL && strlen(msa->rf) != msa->alen) - Die("Parse error: #=GC RF annotation: length %d, expected %d in alignment %s", - strlen(msa->rf), msa->alen, - msa->name != NULL ? msa->name : ""); - - /* Check that all or no weights are set */ - if (!(msa->flags & MSA_SET_WGT)) - FSet(msa->wgt, msa->nseq, 1.0); /* default weights */ - - /* Clean up a little from the parser */ - if (msa->sqlen != NULL) { free(msa->sqlen); msa->sqlen = NULL; } - if (msa->sslen != NULL) { free(msa->sslen); msa->sslen = NULL; } - if (msa->salen != NULL) { free(msa->salen); msa->salen = NULL; } - - return; -} - - - - -/* Function: MSAFileOpen() - * Date: SRE, Tue May 18 13:22:01 1999 [St. Louis] - * - * Purpose: Open an alignment database file and prepare - * for reading one alignment, or sequentially - * in the (rare) case of multiple MSA databases - * (e.g. Stockholm format). - * - * Args: filename - name of file to open - * if "-", read stdin - * if it ends in ".gz", read from pipe to gunzip -dc - * format - format of file (e.g. MSAFILE_STOCKHOLM) - * env - environment variable for path (e.g. BLASTDB) - * - * Returns: opened MSAFILE * on success. - * NULL on failure: - * usually, because the file doesn't exist; - * for gzip'ed files, may also mean that gzip isn't in the path. - */ -MSAFILE * -MSAFileOpen(char *filename, int format, char *env) -{ - MSAFILE *afp; - - afp = MallocOrDie(sizeof(MSAFILE)); - if (strcmp(filename, "-") == 0) - { - afp->f = stdin; - afp->do_stdin = TRUE; - afp->do_gzip = FALSE; - afp->fname = sre_strdup("[STDIN]", -1); - afp->ssi = NULL; /* can't index stdin because we can't seek*/ - } -#ifndef SRE_STRICT_ANSI - /* popen(), pclose() aren't portable to non-POSIX systems; disable */ - else if (Strparse("^.*\\.gz$", filename, 0)) - { - char cmd[256]; - - /* Note that popen() will return "successfully" - * if file doesn't exist, because gzip works fine - * and prints an error! So we have to check for - * existence of file ourself. - */ - if (! FileExists(filename)) - Die("%s: file does not exist", filename); - if (strlen(filename) + strlen("gzip -dc ") >= 256) - Die("filename > 255 char in MSAFileOpen()"); - sprintf(cmd, "gzip -dc %s", filename); - if ((afp->f = popen(cmd, "r")) == NULL) - return NULL; - - afp->do_stdin = FALSE; - afp->do_gzip = TRUE; - afp->fname = sre_strdup(filename, -1); - /* we can't index a .gz file, because we can't seek in a pipe afaik */ - afp->ssi = NULL; - } -#endif /*SRE_STRICT_ANSI*/ - else - { - char *ssifile; - char *dir; - - /* When we open a file, it may be either in the current - * directory, or in the directory indicated by the env - * argument - and we have to construct the SSI filename accordingly. - */ - if ((afp->f = fopen(filename, "r")) != NULL) - { - ssifile = MallocOrDie(sizeof(char) * (strlen(filename) + 5)); - sprintf(ssifile, "%s.ssi", filename); - } - else if ((afp->f = EnvFileOpen(filename, env, &dir)) != NULL) - { - char *full; - full = FileConcat(dir, filename); - ssifile = MallocOrDie(sizeof(char) * (strlen(full) + strlen(filename) + 5)); - sprintf(ssifile, "%s.ssi", full); - free(dir); - } - else return NULL; - - afp->do_stdin = FALSE; - afp->do_gzip = FALSE; - afp->fname = sre_strdup(filename, -1); - afp->ssi = NULL; - - /* Open the SSI index file. If it doesn't exist, or - * it's corrupt, or some error happens, afp->ssi stays NULL. - */ - SSIOpen(ssifile, &(afp->ssi)); - free(ssifile); - } - - /* Invoke autodetection if we haven't already been told what - * to expect. - */ - if (format == MSAFILE_UNKNOWN) - { - if (afp->do_stdin == TRUE || afp->do_gzip) - Die("Can't autodetect alignment file format from a stdin or gzip pipe"); - format = MSAFileFormat(afp); - if (format == MSAFILE_UNKNOWN) - Die("Can't determine format of multiple alignment file %s", afp->fname); - } - - afp->format = format; - afp->linenumber = 0; - afp->buf = NULL; - afp->buflen = 0; - - return afp; -} - - -/* Function: MSAFilePositionByKey() - * MSAFilePositionByIndex() - * MSAFileRewind() - * - * Date: SRE, Tue Nov 9 19:02:54 1999 [St. Louis] - * - * Purpose: Family of functions for repositioning in - * open MSA files; analogous to a similarly - * named function series in HMMER's hmmio.c. - * - * Args: afp - open alignment file - * offset - disk offset in bytes - * key - key to look up in SSI indices - * idx - index of alignment. - * - * Returns: 0 on failure. - * 1 on success. - * If called on a non-fseek()'able file (e.g. a gzip'ed - * or pipe'd alignment), returns 0 as a failure flag. - */ -int -MSAFileRewind(MSAFILE *afp) -{ - if (afp->do_gzip || afp->do_stdin) return 0; - rewind(afp->f); - return 1; -} -int -MSAFilePositionByKey(MSAFILE *afp, char *key) -{ - int fh; /* filehandle is ignored */ - SSIOFFSET offset; /* offset of the key alignment */ - - if (afp->ssi == NULL) return 0; - if (SSIGetOffsetByName(afp->ssi, key, &fh, &offset) != 0) return 0; - if (SSISetFilePosition(afp->f, &offset) != 0) return 0; - return 1; -} -int -MSAFilePositionByIndex(MSAFILE *afp, int idx) -{ - int fh; /* filehandled is passed but ignored */ - SSIOFFSET offset; /* disk offset of desired alignment */ - - if (afp->ssi == NULL) return 0; - if (SSIGetOffsetByNumber(afp->ssi, idx, &fh, &offset) != 0) return 0; - if (SSISetFilePosition(afp->f, &offset) != 0) return 0; - return 1; -} - - -/* Function: MSAFileRead() - * Date: SRE, Fri May 28 16:01:43 1999 [St. Louis] - * - * Purpose: Read the next msa from an open alignment file. - * This is a wrapper around format-specific calls. - * - * Args: afp - open alignment file - * - * Returns: next alignment, or NULL if out of alignments - */ -MSA * -MSAFileRead(MSAFILE *afp) -{ - MSA *msa = NULL; - - switch (afp->format) { - case MSAFILE_STOCKHOLM: msa = ReadStockholm(afp); break; - case MSAFILE_MSF: msa = ReadMSF(afp); break; - case MSAFILE_A2M: msa = ReadA2M(afp); break; - case MSAFILE_CLUSTAL: msa = ReadClustal(afp); break; - case MSAFILE_SELEX: msa = ReadSELEX(afp); break; - case MSAFILE_PHYLIP: msa = ReadPhylip(afp); break; - default: - Die("MSAFILE corrupted: bad format index"); - } - return msa; -} - -/* Function: MSAFileClose() - * Date: SRE, Tue May 18 14:05:28 1999 [St. Louis] - * - * Purpose: Close an open MSAFILE. - * - * Args: afp - ptr to an open MSAFILE. - * - * Returns: void - */ -void -MSAFileClose(MSAFILE *afp) -{ -#ifndef SRE_STRICT_ANSI /* gzip functionality only on POSIX systems */ - if (afp->do_gzip) pclose(afp->f); -#endif - if (! afp->do_stdin) fclose(afp->f); - if (afp->buf != NULL) free(afp->buf); - if (afp->ssi != NULL) SSIClose(afp->ssi); - if (afp->fname != NULL) free(afp->fname); - free(afp); -} - -char * -MSAFileGetLine(MSAFILE *afp) -{ - char *s; - if ((s = sre_fgets(&(afp->buf), &(afp->buflen), afp->f)) == NULL) - return NULL; - afp->linenumber++; - return afp->buf; -} - -void -MSAFileWrite(FILE *fp, MSA *msa, int outfmt, int do_oneline) -{ - switch (outfmt) { - case MSAFILE_A2M: WriteA2M(stdout, msa); break; - case MSAFILE_CLUSTAL: WriteClustal(stdout, msa); break; - case MSAFILE_MSF: WriteMSF(stdout, msa); break; - case MSAFILE_PHYLIP: WritePhylip(stdout, msa); break; - case MSAFILE_SELEX: WriteSELEX(stdout, msa); break; - case MSAFILE_STOCKHOLM: - if (do_oneline) WriteStockholmOneBlock(stdout, msa); - else WriteStockholm(stdout, msa); - break; - default: - Die("can't write. no such alignment format %d\n", outfmt); - } -} - -/* Function: MSAGetSeqidx() - * Date: SRE, Wed May 19 15:08:25 1999 [St. Louis] - * - * Purpose: From a sequence name, return seqidx appropriate - * for an MSA structure. - * - * 1) try to guess the index. (pass -1 if you can't guess) - * 2) Look up name in msa's hashtable. - * 3) If it's a new name, store in msa's hashtable; - * expand allocs as needed; - * save sqname. - * - * Args: msa - alignment object - * name - a sequence name - * guess - a guess at the right index, or -1 if no guess. - * - * Returns: seqidx - */ -int -MSAGetSeqidx(MSA *msa, char *name, int guess) -{ - int seqidx; - /* can we guess? */ - if (guess >= 0 && guess < msa->nseq && strcmp(name, msa->sqname[guess]) == 0) - return guess; - /* else, a lookup in the index */ - if ((seqidx = GKIKeyIndex(msa->index, name)) >= 0) - return seqidx; - /* else, it's a new name */ - seqidx = GKIStoreKey(msa->index, name); - if (seqidx >= msa->nseqalloc) MSAExpand(msa); - - msa->sqname[seqidx] = sre_strdup(name, -1); - msa->nseq++; - return seqidx; -} - - -/* Function: MSAFromAINFO() - * Date: SRE, Mon Jun 14 11:22:24 1999 [St. Louis] - * - * Purpose: Convert the old aseq/ainfo alignment structure - * to new MSA structure. Enables more rapid conversion - * of codebase to the new world order. - * - * Args: aseq - [0..nseq-1][0..alen-1] alignment - * ainfo - old-style optional info - * - * Returns: MSA * - */ -MSA * -MSAFromAINFO(char **aseq, AINFO *ainfo) -{ - MSA *msa; - int i, j; - - msa = MSAAlloc(ainfo->nseq, ainfo->alen); - for (i = 0; i < ainfo->nseq; i++) - { - strcpy(msa->aseq[i], aseq[i]); - msa->wgt[i] = ainfo->wgt[i]; - msa->sqname[i] = sre_strdup(ainfo->sqinfo[i].name, -1); - msa->sqlen[i] = msa->alen; - GKIStoreKey(msa->index, msa->sqname[i]); - - if (ainfo->sqinfo[i].flags & SQINFO_ACC) - MSASetSeqAccession(msa, i, ainfo->sqinfo[i].acc); - - if (ainfo->sqinfo[i].flags & SQINFO_DESC) - MSASetSeqDescription(msa, i, ainfo->sqinfo[i].desc); - - if (ainfo->sqinfo[i].flags & SQINFO_SS) { - if (msa->ss == NULL) { - msa->ss = MallocOrDie(sizeof(char *) * msa->nseqalloc); - msa->sslen = MallocOrDie(sizeof(int) * msa->nseqalloc); - for (j = 0; j < msa->nseqalloc; j++) { - msa->ss[j] = NULL; - msa->sslen[j] = 0; - } - } - MakeAlignedString(msa->aseq[i], msa->alen, ainfo->sqinfo[i].ss, &(msa->ss[i])); - msa->sslen[i] = msa->alen; - } - - if (ainfo->sqinfo[i].flags & SQINFO_SA) { - if (msa->sa == NULL) { - msa->sa = MallocOrDie(sizeof(char *) * msa->nseqalloc); - msa->salen = MallocOrDie(sizeof(int) * msa->nseqalloc); - for (j = 0; j < msa->nseqalloc; j++) { - msa->sa[j] = NULL; - msa->salen[j] = 0; - } - } - MakeAlignedString(msa->aseq[i], msa->alen, ainfo->sqinfo[i].sa, &(msa->sa[i])); - msa->salen[i] = msa->alen; - } - } - /* note that sre_strdup() returns NULL when passed NULL */ - msa->name = sre_strdup(ainfo->name, -1); - msa->desc = sre_strdup(ainfo->desc, -1); - msa->acc = sre_strdup(ainfo->acc, -1); - msa->au = sre_strdup(ainfo->au, -1); - msa->ss_cons = sre_strdup(ainfo->cs, -1); - msa->rf = sre_strdup(ainfo->rf, -1); - if (ainfo->flags & AINFO_TC) - { msa->tc1 = ainfo->tc1; msa->tc2 = ainfo->tc2; msa->flags |= MSA_SET_TC; } - if (ainfo->flags & AINFO_NC) - { msa->nc1 = ainfo->nc1; msa->nc2 = ainfo->nc2; msa->flags |= MSA_SET_NC; } - if (ainfo->flags & AINFO_GA) - { msa->ga1 = ainfo->ga1; msa->ga2 = ainfo->ga2; msa->flags |= MSA_SET_GA; } - - msa->nseq = ainfo->nseq; - msa->alen = ainfo->alen; - return msa; -} - - - - -/* Function: MSAFileFormat() - * Date: SRE, Fri Jun 18 14:26:49 1999 [Sanger Centre] - * - * Purpose: (Attempt to) determine the format of an alignment file. - * Since it rewinds the file pointer when it's done, - * cannot be used on a pipe or gzip'ed file. Works by - * calling SeqfileFormat() from sqio.c, then making sure - * that the format is indeed an alignment. If the format - * comes back as FASTA, it assumes that the format as A2M - * (e.g. aligned FASTA). - * - * Args: fname - file to evaluate - * - * Returns: format code; e.g. MSAFILE_STOCKHOLM - */ -int -MSAFileFormat(MSAFILE *afp) -{ - int fmt; - - fmt = SeqfileFormat(afp->f); - - if (fmt == SQFILE_FASTA) fmt = MSAFILE_A2M; - - if (fmt != MSAFILE_UNKNOWN && ! IsAlignmentFormat(fmt)) - Die("File %s does not appear to be an alignment file;\n\ -rather, it appears to be an unaligned file in %s format.\n\ -I'm expecting an alignment file in this context.\n", - afp->fname, - SeqfileFormat2String(fmt)); - return fmt; -} - - -/* Function: MSAMingap() - * Date: SRE, Mon Jun 28 18:57:54 1999 [on jury duty, St. Louis Civil Court] - * - * Purpose: Remove all-gap columns from a multiple sequence alignment - * and its associated per-residue data. - * - * Args: msa - the alignment - * - * Returns: (void) - */ -void -MSAMingap(MSA *msa) -{ - int *useme; /* array of TRUE/FALSE flags for which columns to keep */ - int apos; /* position in original alignment */ - int idx; /* sequence index */ - - useme = MallocOrDie(sizeof(int) * msa->alen); - for (apos = 0; apos < msa->alen; apos++) - { - for (idx = 0; idx < msa->nseq; idx++) - if (! isgap(msa->aseq[idx][apos])) - break; - if (idx == msa->nseq) useme[apos] = FALSE; else useme[apos] = TRUE; - } - MSAShorterAlignment(msa, useme); - free(useme); - return; -} - -/* Function: MSANogap() - * Date: SRE, Wed Nov 17 09:59:51 1999 [St. Louis] - * - * Purpose: Remove all columns from a multiple sequence alignment that - * contain any gaps -- used for filtering before phylogenetic - * analysis. - * - * Args: msa - the alignment - * - * Returns: (void). The alignment is modified, so if you want to keep - * the original for something, make a copy. - */ -void -MSANogap(MSA *msa) -{ - int *useme; /* array of TRUE/FALSE flags for which columns to keep */ - int apos; /* position in original alignment */ - int idx; /* sequence index */ - - useme = MallocOrDie(sizeof(int) * msa->alen); - for (apos = 0; apos < msa->alen; apos++) - { - for (idx = 0; idx < msa->nseq; idx++) - if (isgap(msa->aseq[idx][apos])) - break; - if (idx == msa->nseq) useme[apos] = TRUE; else useme[apos] = FALSE; - } - MSAShorterAlignment(msa, useme); - free(useme); - return; -} - - -/* Function: MSAShorterAlignment() - * Date: SRE, Wed Nov 17 09:49:32 1999 [St. Louis] - * - * Purpose: Given an array "useme" (0..alen-1) of TRUE/FALSE flags, - * where TRUE means "keep this column in the new alignment": - * Remove all columns annotated as "FALSE" in the useme - * array. - * - * Args: msa - the alignment. The alignment is changed, so - * if you don't want the original screwed up, make - * a copy of it first. - * useme - TRUE/FALSE flags for columns to keep: 0..alen-1 - * - * Returns: (void) - */ -void -MSAShorterAlignment(MSA *msa, int *useme) -{ - int apos; /* position in original alignment */ - int mpos; /* position in new alignment */ - int idx; /* sequence index */ - int i; /* markup index */ - - /* Since we're minimizing, we can overwrite, using already allocated - * memory. - */ - for (apos = 0, mpos = 0; apos < msa->alen; apos++) - { - if (useme[apos] == FALSE) continue; - - /* shift alignment and associated per-column+per-residue markup */ - if (mpos != apos) - { - for (idx = 0; idx < msa->nseq; idx++) - { - msa->aseq[idx][mpos] = msa->aseq[idx][apos]; - if (msa->ss != NULL && msa->ss[idx] != NULL) msa->ss[idx][mpos] = msa->ss[idx][apos]; - if (msa->sa != NULL && msa->sa[idx] != NULL) msa->sa[idx][mpos] = msa->sa[idx][apos]; - - for (i = 0; i < msa->ngr; i++) - if (msa->gr[i][idx] != NULL) msa->gr[i][idx][mpos] = msa->gr[i][idx][apos]; - } - - if (msa->ss_cons != NULL) msa->ss_cons[mpos] = msa->ss_cons[apos]; - if (msa->sa_cons != NULL) msa->sa_cons[mpos] = msa->sa_cons[apos]; - if (msa->rf != NULL) msa->rf[mpos] = msa->rf[apos]; - - for (i = 0; i < msa->ngc; i++) - msa->gc[i][mpos] = msa->gc[i][apos]; - } - mpos++; - } - - msa->alen = mpos; /* set new length */ - /* null terminate everything */ - for (idx = 0; idx < msa->nseq; idx++) - { - msa->aseq[idx][mpos] = '\0'; - if (msa->ss != NULL && msa->ss[idx] != NULL) msa->ss[idx][mpos] = '\0'; - if (msa->sa != NULL && msa->sa[idx] != NULL) msa->sa[idx][mpos] = '\0'; - - for (i = 0; i < msa->ngr; i++) - if (msa->gr[i][idx] != NULL) msa->gr[i][idx][mpos] = '\0'; - } - - if (msa->ss_cons != NULL) msa->ss_cons[mpos] = '\0'; - if (msa->sa_cons != NULL) msa->sa_cons[mpos] = '\0'; - if (msa->rf != NULL) msa->rf[mpos] = '\0'; - - for (i = 0; i < msa->ngc; i++) - msa->gc[i][mpos] = '\0'; - - return; -} - - -/* Function: MSASmallerAlignment() - * Date: SRE, Wed Jun 30 09:56:08 1999 [St. Louis] - * - * Purpose: Given an array "useme" of TRUE/FALSE flags for - * each sequence in an alignment, construct - * and return a new alignment containing only - * those sequences that are flagged useme=TRUE. - * - * Used by routines such as MSAFilterAlignment() - * and MSASampleAlignment(). - * - * Limitations: - * Does not copy unparsed Stockholm markup. - * - * Does not make assumptions about meaning of wgt; - * if you want the new wgt vector renormalized, do - * it yourself with FNorm(new->wgt, new->nseq). - * - * Args: msa -- the original (larger) alignment - * useme -- [0..nseq-1] array of TRUE/FALSE flags; TRUE means include - * this seq in new alignment - * ret_new -- RETURN: new alignment - * - * Returns: void - * ret_new is allocated here; free with MSAFree() - */ -void -MSASmallerAlignment(MSA *msa, int *useme, MSA **ret_new) -{ - MSA *new; /* RETURN: new alignment */ - int nnew; /* number of seqs in new msa (e.g. # of TRUEs) */ - int oidx, nidx; /* old, new indices */ - - nnew = 0; - for (oidx = 0; oidx < msa->nseq; oidx++) - if (useme[oidx]) nnew++; - if (nnew == 0) { *ret_new = NULL; return; } - - new = MSAAlloc(nnew, 0); - nidx = 0; - for (oidx = 0; oidx < msa->nseq; oidx++) - if (useme[oidx]) - { - new->aseq[nidx] = sre_strdup(msa->aseq[oidx], msa->alen); - new->sqname[nidx] = sre_strdup(msa->sqname[oidx], msa->alen); - GKIStoreKey(new->index, msa->sqname[oidx]); - new->wgt[nidx] = msa->wgt[oidx]; - if (msa->sqacc != NULL) - MSASetSeqAccession(new, nidx, msa->sqacc[oidx]); - if (msa->sqdesc != NULL) - MSASetSeqDescription(new, nidx, msa->sqdesc[oidx]); - if (msa->ss != NULL && msa->ss[oidx] != NULL) - { - if (new->ss == NULL) new->ss = MallocOrDie(sizeof(char *) * new->nseq); - new->ss[nidx] = sre_strdup(msa->ss[oidx], -1); - } - if (msa->sa != NULL && msa->sa[oidx] != NULL) - { - if (new->sa == NULL) new->sa = MallocOrDie(sizeof(char *) * new->nseq); - new->sa[nidx] = sre_strdup(msa->sa[oidx], -1); - } - nidx++; - } - - new->nseq = nnew; - new->alen = msa->alen; - new->flags = msa->flags; - new->type = msa->type; - new->name = sre_strdup(msa->name, -1); - new->desc = sre_strdup(msa->desc, -1); - new->acc = sre_strdup(msa->acc, -1); - new->au = sre_strdup(msa->au, -1); - new->ss_cons = sre_strdup(msa->ss_cons, -1); - new->sa_cons = sre_strdup(msa->sa_cons, -1); - new->rf = sre_strdup(msa->rf, -1); - new->tc1 = msa->tc1; - new->tc2 = msa->tc2; - new->nc1 = msa->nc1; - new->nc2 = msa->nc2; - new->ga1 = msa->ga1; - new->ga2 = msa->ga2; - free(new->sqlen); - - MSAMingap(new); - *ret_new = new; - return; -} - - -/***************************************************************** - * Retrieval routines - * - * Access to MSA structure data is possible through these routines. - * I'm not doing this because of object oriented design, though - * it might work in my favor someday. - * I'm doing this because lots of MSA data is optional, and - * checking through the chain of possible NULLs is a pain. - *****************************************************************/ - -char * -MSAGetSeqAccession(MSA *msa, int idx) -{ - if (msa->sqacc != NULL && msa->sqacc[idx] != NULL) - return msa->sqacc[idx]; - else - return NULL; -} -char * -MSAGetSeqDescription(MSA *msa, int idx) -{ - if (msa->sqdesc != NULL && msa->sqdesc[idx] != NULL) - return msa->sqdesc[idx]; - else - return NULL; -} -char * -MSAGetSeqSS(MSA *msa, int idx) -{ - if (msa->ss != NULL && msa->ss[idx] != NULL) - return msa->ss[idx]; - else - return NULL; -} -char * -MSAGetSeqSA(MSA *msa, int idx) -{ - if (msa->sa != NULL && msa->sa[idx] != NULL) - return msa->sa[idx]; - else - return NULL; -} diff --git a/forester/archive/RIO/others/hmmer/squid/msa.h b/forester/archive/RIO/others/hmmer/squid/msa.h deleted file mode 100644 index ff52f60..0000000 --- a/forester/archive/RIO/others/hmmer/squid/msa.h +++ /dev/null @@ -1,286 +0,0 @@ -/***************************************************************** - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - *****************************************************************/ - -#ifndef SQUID_MSA_INCLUDED -#define SQUID_MSA_INCLUDED - -/* msa.h - * SRE, Mon May 17 10:24:30 1999 - * - * Header file for SQUID's multiple sequence alignment - * manipulation code. - * - * RCS $Id: msa.h,v 1.1.1.1 2005/03/22 08:34:19 cmzmasek Exp $ - */ - -#include /* FILE support */ -#include "gki.h" /* hash table support */ -#include "ssi.h" /* sequence file index support */ -#include "squid.h" /* need SQINFO */ - -/**************************************************** - * Obsolete alignment information, AINFO - * Superceded by MSA structure further below; but we - * need AINFO for the near future for backwards - * compatibility. - ****************************************************/ -/* Structure: aliinfo_s - * - * Purpose: Optional information returned from an alignment file. - * - * flags: always used. Flags for which info is valid/alloced. - * - * alen: mandatory. Alignments are always flushed right - * with gaps so that all aseqs are the same length, alen. - * Available for all alignment formats. - * - * nseq: mandatory. Aligned seqs are indexed 0..nseq-1. - * - * wgt: 0..nseq-1 vector of sequence weights. Mandatory. - * If not explicitly set, weights are initialized to 1.0. - * - * cs: 0..alen-1, just like the alignment. Contains single-letter - * secondary structure codes for consensus structure; "<>^+" - * for RNA, "EHL." for protein. May be NULL if unavailable - * from seqfile. Only available for SELEX format files. - * - * rf: 0..alen-1, just like the alignment. rf is an arbitrary string - * of characters, used for annotating columns. Blanks are - * interpreted as non-canonical columns and anything else is - * considered canonical. Only available from SELEX files. - * - * sqinfo: mandatory. Array of 0..nseq-1 - * per-sequence information structures, carrying - * name, id, accession, coords. - * - */ -struct aliinfo_s { - int flags; /* flags for what info is valid */ - int alen; /* length of alignment (columns) */ - int nseq; /* number of seqs in alignment */ - float *wgt; /* sequence weights [0..nseq-1] */ - char *cs; /* consensus secondary structure string */ - char *rf; /* reference coordinate system */ - struct seqinfo_s *sqinfo; /* name, id, coord info for each sequence */ - - /* Pfam/HMMER pick-ups */ - char *name; /* name of alignment */ - char *desc; /* description of alignment */ - char *acc; /* accession of alignment */ - char *au; /* "author" information */ - float tc1, tc2; /* trusted score cutoffs (per-seq, per-domain) */ - float nc1, nc2; /* noise score cutoffs (per-seq, per-domain) */ - float ga1, ga2; /* gathering cutoffs */ -}; -typedef struct aliinfo_s AINFO; -#define AINFO_TC (1 << 0) -#define AINFO_NC (1 << 1) -#define AINFO_GA (1 << 2) - -/***************************************************************** - * MSA - * SRE, Sun Jun 27 15:03:35 1999 [TW 723 over Greenland] - * - * Defines the new data structure and API for multiple - * sequence alignment i/o. - *****************************************************************/ - -/* Structure: MSA - * SRE, Tue May 18 11:33:08 1999 - * - * Our object for a multiple sequence alignment. - */ -typedef struct msa_struct { - /* Mandatory information associated with the alignment. - */ - char **aseq; /* the alignment itself, [0..nseq-1][0..alen-1] */ - char **sqname; /* names of sequences, [0..nseq-1][0..alen-1] */ - float *wgt; /* sequence weights [0..nseq-1] */ - int alen; /* length of alignment (columns) */ - int nseq; /* number of seqs in alignment */ - - /* Optional information that we understand, and might have. - */ - int flags; /* flags for what optional info is valid */ - int type; /* kOtherSeq, kRNA/hmmNUCLEIC, or kAmino/hmmAMINO */ - char *name; /* name of alignment, or NULL */ - char *desc; /* description of alignment, or NULL */ - char *acc; /* accession of alignment, or NULL */ - char *au; /* "author" information, or NULL */ - char *ss_cons; /* consensus secondary structure string, or NULL */ - char *sa_cons; /* consensus surface accessibility string, or NULL */ - char *rf; /* reference coordinate system, or NULL */ - char **sqacc; /* accession numbers for individual sequences */ - char **sqdesc; /* description lines for individual sequences */ - char **ss; /* per-seq secondary structure annotation, or NULL */ - char **sa; /* per-seq surface accessibility annotation, or NULL */ - float tc1, tc2; /* trusted score cutoffs (per-seq, per-domain) */ - float nc1, nc2; /* noise score cutoffs (per-seq, per-domain) */ - float ga1, ga2; /* gathering cutoffs (per-seq, per-domain) */ - - /* Optional information that we don't understand. - * That is, we know what type of information it is, but it's - * either (interpreted as) free-text comment, or it's Stockholm - * markup with unfamiliar tags. - */ - char **comment; /* free text comments, or NULL */ - int ncomment; /* number of comment lines */ - int alloc_ncomment; /* number of comment lines alloc'ed */ - - char **gf_tag; /* markup tags for unparsed #=GF lines */ - char **gf; /* annotations for unparsed #=GF lines */ - int ngf; /* number of unparsed #=GF lines */ - int alloc_ngf; /* number of gf lines alloc'ed */ - - char **gs_tag; /* markup tags for unparsed #=GS lines */ - char ***gs; /* [0..ngs-1][0..nseq-1][free text] markup */ - GKI *gs_idx; /* hash of #=GS tag types */ - int ngs; /* number of #=GS tag types */ - - char **gc_tag; /* markup tags for unparsed #=GC lines */ - char **gc; /* [0..ngc-1][0..alen-1] markup */ - GKI *gc_idx; /* hash of #=GC tag types */ - int ngc; /* number of #=GC tag types */ - - char **gr_tag; /* markup tags for unparsed #=GR lines */ - char ***gr; /* [0..ngr][0..nseq-1][0..alen-1] markup */ - GKI *gr_idx; /* hash of #=GR tag types */ - int ngr; /* number of #=GR tag types */ - - /* Stuff we need for our own maintenance of the data structure - */ - GKI *index; /* name ->seqidx hash table */ - int nseqalloc; /* number of seqs currently allocated for */ - int nseqlump; /* lump size for dynamic expansions of nseq */ - int *sqlen; /* individual sequence lengths during parsing */ - int *sslen; /* individual ss lengths during parsing */ - int *salen; /* individual sa lengths during parsing */ - int lastidx; /* last index we saw; use for guessing next */ -} MSA; -#define MSA_SET_TC (1 << 0) -#define MSA_SET_NC (1 << 1) -#define MSA_SET_GA (1 << 2) -#define MSA_SET_WGT (1 << 3) - -/* Structure: MSAFILE - * SRE, Tue May 18 11:36:54 1999 - * - * Defines an alignment file that's open for reading. - */ -typedef struct msafile_struct { - FILE *f; /* open file pointer */ - char *fname; /* name of file. used for diagnostic output */ - int linenumber; /* what line are we on in the file */ - - char *buf; /* buffer for line input w/ sre_fgets() */ - int buflen; /* current allocated length for buf */ - - SSIFILE *ssi; /* open SSI index file; or NULL, if none. */ - - int do_gzip; /* TRUE if f is a pipe from gzip -dc (need pclose(f)) */ - int do_stdin; /* TRUE if f is stdin (don't close f, not our problem) */ - int format; /* format of alignment file we're reading */ -} MSAFILE; - - -/* Alignment file formats. - * Must coexist with sqio.c/squid.h unaligned file format codes. - * Rules: - * - 0 is an unknown/unassigned format - * - <100 reserved for unaligned formats - * - >100 reserved for aligned formats - */ -#define MSAFILE_UNKNOWN 0 /* unknown format */ -#define MSAFILE_STOCKHOLM 101 /* Pfam/HMMER's Stockholm format */ -#define MSAFILE_SELEX 102 /* Obsolete(!): old HMMER/SELEX format */ -#define MSAFILE_MSF 103 /* GCG MSF format */ -#define MSAFILE_CLUSTAL 104 /* Clustal V/W format */ -#define MSAFILE_A2M 105 /* aligned FASTA (A2M is UCSC terminology) */ -#define MSAFILE_PHYLIP 106 /* Felsenstein's PHYLIP format */ -#define MSAFILE_EPS 107 /* Encapsulated PostScript (output only) */ - -#define IsAlignmentFormat(fmt) ((fmt) > 100) - - -/* from msa.c - */ -extern MSAFILE *MSAFileOpen(char *filename, int format, char *env); -extern MSA *MSAFileRead(MSAFILE *afp); -extern void MSAFileClose(MSAFILE *afp); -extern void MSAFree(MSA *msa); -extern void MSAFileWrite(FILE *fp, MSA *msa, int outfmt, int do_oneline); - -extern int MSAFileRewind(MSAFILE *afp); -extern int MSAFilePositionByKey(MSAFILE *afp, char *key); -extern int MSAFilePositionByIndex(MSAFILE *afp, int idx); - -extern int MSAFileFormat(MSAFILE *afp); -extern MSA *MSAAlloc(int nseq, int alen); -extern void MSAExpand(MSA *msa); -extern char *MSAFileGetLine(MSAFILE *afp); -extern void MSASetSeqAccession(MSA *msa, int seqidx, char *acc); -extern void MSASetSeqDescription(MSA *msa, int seqidx, char *desc); -extern void MSAAddComment(MSA *msa, char *s); -extern void MSAAddGF(MSA *msa, char *tag, char *value); -extern void MSAAddGS(MSA *msa, char *tag, int seqidx, char *value); -extern void MSAAppendGC(MSA *msa, char *tag, char *value); -extern char *MSAGetGC(MSA *msa, char *tag); -extern void MSAAppendGR(MSA *msa, char *tag, int seqidx, char *value); -extern void MSAVerifyParse(MSA *msa); -extern int MSAGetSeqidx(MSA *msa, char *name, int guess); - -extern MSA *MSAFromAINFO(char **aseq, AINFO *ainfo); - -extern void MSAMingap(MSA *msa); -extern void MSANogap(MSA *msa); -extern void MSAShorterAlignment(MSA *msa, int *useme); -extern void MSASmallerAlignment(MSA *msa, int *useme, MSA **ret_new); - -extern char *MSAGetSeqAccession(MSA *msa, int idx); -extern char *MSAGetSeqDescription(MSA *msa, int idx); -extern char *MSAGetSeqSS(MSA *msa, int idx); -extern char *MSAGetSeqSA(MSA *msa, int idx); - -/* from a2m.c - */ -extern MSA *ReadA2M(MSAFILE *afp); -extern void WriteA2M(FILE *fp, MSA *msa); - -/* from clustal.c - */ -extern MSA *ReadClustal(MSAFILE *afp); -extern void WriteClustal(FILE *fp, MSA *msa); - -/* from eps.c - */ -extern void EPSWriteSmallMSA(FILE *fp, MSA *msa); - -/* from msf.c - */ -extern MSA *ReadMSF(MSAFILE *afp); -extern void WriteMSF(FILE *fp, MSA *msa); - -/* from phylip.c - */ -extern MSA *ReadPhylip(MSAFILE *afp); -extern void WritePhylip(FILE *fp, MSA *msa); - -/* from selex.c - */ -extern MSA *ReadSELEX(MSAFILE *afp); -extern void WriteSELEX(FILE *fp, MSA *msa); - -/* from stockholm.c - */ -extern MSA *ReadStockholm(MSAFILE *afp); -extern void WriteStockholm(FILE *fp, MSA *msa); -extern void WriteStockholmOneBlock(FILE *fp, MSA *msa); - -#endif /*SQUID_MSA_INCLUDED*/ diff --git a/forester/archive/RIO/others/hmmer/squid/msf.c b/forester/archive/RIO/others/hmmer/squid/msf.c deleted file mode 100644 index ffbfa14..0000000 --- a/forester/archive/RIO/others/hmmer/squid/msf.c +++ /dev/null @@ -1,389 +0,0 @@ -/***************************************************************** - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - *****************************************************************/ - -/* msf.c - * SRE, Sun Jul 11 16:17:32 1993 - * - * Import/export of GCG MSF multiple sequence alignment - * formatted files. Designed using format specifications - * kindly provided by Steve Smith of Genetics Computer Group. - * - * RCS $Id: msf.c,v 1.1.1.1 2005/03/22 08:34:20 cmzmasek Exp $ - */ - -#include -#include -#include -#include -#include -#include "squid.h" -#include "msa.h" - -#ifdef TESTDRIVE_MSF -/***************************************************************** - * msf.c test driver: - * cc -DTESTDRIVE_MSF -g -O2 -Wall -o test msf.c msa.c gki.c sqerror.c sre_string.c file.c hsregex.c sre_math.c sre_ctype.c sqio.c alignio.c selex.c interleaved.c types.c -lm - * - */ -int -main(int argc, char **argv) -{ - MSAFILE *afp; - MSA *msa; - char *file; - - file = argv[1]; - - if ((afp = MSAFileOpen(file, MSAFILE_STOCKHOLM, NULL)) == NULL) - Die("Couldn't open %s\n", file); - - while ((msa = ReadMSF(afp)) != NULL) - { - WriteMSF(stdout, msa); - MSAFree(msa); - } - - MSAFileClose(afp); - exit(0); -} -/******************************************************************/ -#endif /* testdrive_msf */ - - - -/* Function: ReadMSF() - * Date: SRE, Tue Jun 1 08:07:22 1999 [St. Louis] - * - * Purpose: Parse an alignment read from an open MSF format - * alignment file. (MSF is a single-alignment format.) - * Return the alignment, or NULL if we've already - * read the alignment. - * - * Args: afp - open alignment file - * - * Returns: MSA * - an alignment object - * caller responsible for an MSAFree() - * NULL if no more alignments - * - * Diagnostics: - * Will Die() here with a (potentially) useful message - * if a parsing error occurs. - */ -MSA * -ReadMSF(MSAFILE *afp) -{ - MSA *msa; - char *s; - int alleged_alen; - int alleged_type; - int alleged_checksum; - char *tok; - char *sp; - int slen; - int sqidx; - char *name; - char *seq; - - if (feof(afp->f)) return NULL; - if ((s = MSAFileGetLine(afp)) == NULL) return NULL; - - /* The first line is the header. - * This is a new-ish GCG feature. Don't count on it, so - * we can be a bit more tolerant towards non-GCG software - * generating "MSF" files. - */ - msa = MSAAlloc(10, 0); - if (strncmp(s, "!!AA_MULTIPLE_ALIGNMENT", 23) == 0) { - msa->type = kAmino; - if ((s = MSAFileGetLine(afp)) == NULL) return NULL; - } else if (strncmp(s, "!!NA_MULTIPLE_ALIGNMENT", 23) == 0) { - msa->type = kRNA; - if ((s = MSAFileGetLine(afp)) == NULL) return NULL; - } - - /* Now we're in the free text comment section of the MSF file. - * It ends when we see the "MSF: Type: Check: .." line. - * This line must be present. - */ - do - { - if ((strstr(s, "..") != NULL && strstr(s, "MSF:") != NULL) && - Strparse("^.+MSF: +([0-9]+) +Type: +([PNX]).+Check: +([0-9]+) +\\.\\.", s, 3)) - { - alleged_alen = atoi(sqd_parse[0]); - switch (*(sqd_parse[1])) { - case 'N' : alleged_type = kRNA; break; - case 'P' : alleged_type = kAmino; break; - case 'X' : alleged_type = kOtherSeq; break; - default : alleged_type = kOtherSeq; - } - alleged_checksum = atoi(sqd_parse[3]); - if (msa->type == kOtherSeq) msa->type = alleged_type; - break; /* we're done with comment section. */ - } - if (! IsBlankline(s)) - MSAAddComment(msa, s); - } while ((s = MSAFileGetLine(afp)) != NULL); - - /* Now we're in the name section. - * GCG has a relatively poorly documented feature: only sequences that - * appear in this list will be read from the alignment section. Commenting - * out sequences in the name list (by preceding them with "!") is - * allowed as a means of manually defining subsets of sequences in - * the alignment section. We can support this feature reasonably - * easily because of the hash table for names in the MSA: we - * only add names to the hash table when we see 'em in the name section. - */ - while ((s = MSAFileGetLine(afp)) != NULL) - { - while ((*s == ' ' || *s == '\t') && *s) s++; /* skip leading whitespace */ - - if (*s == '\n') continue; /* skip blank lines */ - else if (*s == '!') MSAAddComment(msa, s); - else if ((sp = strstr(s, "Name:")) != NULL) - { - /* We take the name and the weigh, and that's it */ - sp += 5; - tok = sre_strtok(&sp, " \t", &slen); /* */ - sqidx = GKIStoreKey(msa->index, tok); - if (sqidx >= msa->nseqalloc) MSAExpand(msa); - msa->sqname[sqidx] = sre_strdup(tok, slen); - msa->nseq++; - - if ((sp = strstr(sp, "Weight:")) == NULL) - Die("No Weight: on line %d for %s in name section of MSF file %s\n", - afp->linenumber, msa->sqname[sqidx], afp->fname); - sp += 7; - tok = sre_strtok(&sp, " \t", &slen); - msa->wgt[sqidx] = atof(tok); - msa->flags |= MSA_SET_WGT; - } - else if (strncmp(s, "//", 2) == 0) - break; - else - { - Die("Invalid line (probably %d) in name section of MSF file %s:\n%s\n", - afp->linenumber, afp->fname, s); - squid_errno = SQERR_FORMAT; /* NOT THREADSAFE */ - return NULL; - } - - } - - /* And now we're in the sequence section. - * As discussed above, if we haven't seen a sequence name, then we - * don't include the sequence in the alignment. - * Also, watch out for coordinate-only lines. - */ - while ((s = MSAFileGetLine(afp)) != NULL) - { - sp = s; - if ((name = sre_strtok(&sp, " \t", NULL)) == NULL) continue; - if ((seq = sre_strtok(&sp, "\n", &slen)) == NULL) continue; - - /* The test for a coord line: digits starting both fields - */ - if (isdigit(*name) && isdigit(*seq)) - continue; - - /* It's not blank, and it's not a coord line: must be sequence - */ - sqidx = GKIKeyIndex(msa->index, name); - if (sqidx < 0) continue; /* not a sequence we recognize */ - - msa->sqlen[sqidx] = sre_strcat(&(msa->aseq[sqidx]), msa->sqlen[sqidx], seq, slen); - } - - /* We've left blanks in the aseqs; take them back out. - */ - for (sqidx = 0; sqidx < msa->nseq; sqidx++) - { - if (msa->aseq[sqidx] == NULL) - Die("Didn't find a sequence for %s in MSF file %s\n", msa->sqname[sqidx], afp->fname); - - for (s = sp = msa->aseq[sqidx]; *s != '\0'; s++) - { - if (*s == ' ' || *s == '\t') { - msa->sqlen[sqidx]--; - } else { - *sp = *s; - sp++; - } - } - *sp = '\0'; - } - - MSAVerifyParse(msa); /* verifies, and also sets alen and wgt. */ - return msa; -} - - -/* Function: WriteMSF() - * Date: SRE, Mon May 31 11:25:18 1999 [St. Louis] - * - * Purpose: Write an alignment in MSF format to an open file. - * - * Args: fp - file that's open for writing. - * msa - alignment to write. - * - * Note that msa->type, usually optional, must be - * set for WriteMSF to work. If it isn't, a fatal - * error is generated. - * - * Returns: (void) - */ -void -WriteMSF(FILE *fp, MSA *msa) -{ - time_t now; /* current time as a time_t */ - char date[64]; /* today's date in GCG's format "October 3, 1996 15:57" */ - char **gcg_aseq; /* aligned sequences with gaps converted to GCG format */ - char **gcg_sqname; /* sequence names with GCG-valid character sets */ - int idx; /* counter for sequences */ - char *s; /* pointer into sqname or seq */ - int len; /* tmp variable for name lengths */ - int namelen; /* maximum name length used */ - int pos; /* position counter */ - char buffer[51]; /* buffer for writing seq */ - int i; /* another position counter */ - - /***************************************************************** - * Make copies of sequence names and sequences. - * GCG recommends that name characters should only contain - * alphanumeric characters, -, or _ - * Some GCG and GCG-compatible software is sensitive to this. - * We silently convert all other characters to '_'. - * - * For sequences, GCG allows only ~ and . for gaps. - * Otherwise, everthing is interpreted as a residue; - * so squid's IUPAC-restricted chars are fine. ~ means - * an external gap. . means an internal gap. - *****************************************************************/ - - /* make copies that we can edit */ - gcg_aseq = MallocOrDie(sizeof(char *) * msa->nseq); - gcg_sqname = MallocOrDie(sizeof(char *) * msa->nseq); - for (idx = 0; idx < msa->nseq; idx++) - { - gcg_aseq[idx] = sre_strdup(msa->aseq[idx], msa->alen); - gcg_sqname[idx] = sre_strdup(msa->sqname[idx], -1); - } - /* alter names as needed */ - for (idx = 0; idx < msa->nseq; idx++) - for (s = gcg_sqname[idx]; *s != '\0'; s++) - if (! isalnum((int) *s) && *s != '-' && *s != '_') - *s = '_'; - /* alter gap chars in seq */ - for (idx = 0; idx < msa->nseq; idx++) - { - for (s = gcg_aseq[idx]; *s != '\0' && isgap(*s); s++) - *s = '~'; - for (; *s != '\0'; s++) - if (isgap(*s)) *s = '.'; - for (pos = msa->alen-1; pos > 0 && isgap(gcg_aseq[idx][pos]); pos--) - gcg_aseq[idx][pos] = '~'; - } - /* calculate max namelen used */ - namelen = 0; - for (idx = 0; idx < msa->nseq; idx++) - if ((len = strlen(msa->sqname[idx])) > namelen) - namelen = len; - - /***************************************************** - * Write the MSF header - *****************************************************/ - /* required file type line */ - if (msa->type == kOtherSeq) - msa->type = GuessAlignmentSeqtype(msa->aseq, msa->nseq); - - if (msa->type == kRNA) fprintf(fp, "!!NA_MULTIPLE_ALIGNMENT 1.0\n"); - else if (msa->type == kDNA) fprintf(fp, "!!NA_MULTIPLE_ALIGNMENT 1.0\n"); - else if (msa->type == kAmino) fprintf(fp, "!!AA_MULTIPLE_ALIGNMENT 1.0\n"); - else if (msa->type == kOtherSeq) - Die("WriteMSF(): couldn't guess whether that alignment is RNA or protein.\n"); - else - Die("Invalid sequence type %d in WriteMSF()\n", msa->type); - - /* free text comments */ - if (msa->ncomment > 0) - { - for (idx = 0; idx < msa->ncomment; idx++) - fprintf(fp, "%s\n", msa->comment[idx]); - fprintf(fp, "\n"); - } - /* required checksum line */ - now = time(NULL); - if (strftime(date, 64, "%B %d, %Y %H:%M", localtime(&now)) == 0) - Die("What time is it on earth? strftime() failed in WriteMSF().\n"); - fprintf(fp, " %s MSF: %d Type: %c %s Check: %d ..\n", - msa->name != NULL ? msa->name : "squid.msf", - msa->alen, - msa->type == kRNA ? 'N' : 'P', - date, - GCGMultchecksum(gcg_aseq, msa->nseq)); - fprintf(fp, "\n"); - - /***************************************************** - * Names/weights section - *****************************************************/ - - for (idx = 0; idx < msa->nseq; idx++) - { - fprintf(fp, " Name: %-*.*s Len: %5d Check: %4d Weight: %.2f\n", - namelen, namelen, - gcg_sqname[idx], - msa->alen, - GCGchecksum(gcg_aseq[idx], msa->alen), - msa->wgt[idx]); - } - fprintf(fp, "\n"); - fprintf(fp, "//\n"); - - /***************************************************** - * Write the sequences - *****************************************************/ - - for (pos = 0; pos < msa->alen; pos += 50) - { - fprintf(fp, "\n"); /* Blank line between sequence blocks */ - - /* Coordinate line */ - len = (pos + 50) > msa->alen ? msa->alen - pos : 50; - if (len > 10) - fprintf(fp, "%*s %-6d%*s%6d\n", namelen, "", - pos+1, - len + ((len-1)/10) - 12, "", - pos + len); - else - fprintf(fp, "%*s %-6d\n", namelen, "", pos+1); - - for (idx = 0; idx < msa->nseq; idx++) - { - fprintf(fp, "%-*s ", namelen, gcg_sqname[idx]); - /* get next line's worth of 50 from seq */ - strncpy(buffer, gcg_aseq[idx] + pos, 50); - buffer[50] = '\0'; - /* draw the sequence line */ - for (i = 0; i < len; i++) - { - if (! (i % 10)) fputc(' ', fp); - fputc(buffer[i], fp); - } - fputc('\n', fp); - } - } - - Free2DArray((void **) gcg_aseq, msa->nseq); - Free2DArray((void **) gcg_sqname, msa->nseq); - return; -} - - - diff --git a/forester/archive/RIO/others/hmmer/squid/phylip.c b/forester/archive/RIO/others/hmmer/squid/phylip.c deleted file mode 100644 index e2980f1..0000000 --- a/forester/archive/RIO/others/hmmer/squid/phylip.c +++ /dev/null @@ -1,174 +0,0 @@ -/***************************************************************** - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - *****************************************************************/ - -/* phylip.c - * SRE, Mon Jun 14 14:08:33 1999 [St. Louis] - * - * Import/export of PHYLIP interleaved multiple sequence alignment - * format files. - * - * RCS $Id: phylip.c,v 1.1.1.1 2005/03/22 08:34:25 cmzmasek Exp $ - */ - -#include -#include -#include -#include -#include "squid.h" -#include "msa.h" - -#ifdef TESTDRIVE_PHYLIP -/***************************************************************** - * phylip.c test driver: - * - */ -int -main(int argc, char **argv) -{ - MSAFILE *afp; - MSA *msa; - char *file; - - file = argv[1]; - - if ((afp = MSAFileOpen(file, MSAFILE_UNKNOWN, NULL)) == NULL) - Die("Couldn't open %s\n", file); - - printf("format %d\n", afp->format); - - while ((msa = ReadPhylip(afp)) != NULL) - { - WritePhylip(stdout, msa); - MSAFree(msa); - } - - MSAFileClose(afp); - exit(0); -} -/******************************************************************/ -#endif /* testdrive_phylip */ - - - -/* Function: ReadPhylip() - * Date: SRE, Fri Jun 18 12:59:37 1999 [Sanger Centre] - * - * Purpose: Parse an alignment from an open Phylip format - * alignment file. Phylip is a single-alignment format. - * Return the alignment, or NULL if we have no data. - * - * Args: afp - open alignment file - * - * Returns: MSA * - an alignment object - * Caller responsible for an MSAFree() - * NULL if no more alignments - */ -MSA * -ReadPhylip(MSAFILE *afp) -{ - MSA *msa; - char *s, *s1, *s2; - char name[11]; /* seq name max len = 10 char */ - int nseq, alen; - int idx; /* index of current sequence */ - int slen; - int nblock; - - if (feof(afp->f)) return NULL; - - /* Skip until we see a nonblank line; it's the header, - * containing nseq/alen - */ - nseq = 0; alen = 0; - while ((s = MSAFileGetLine(afp)) != NULL) - { - if ((s1 = sre_strtok(&s, WHITESPACE, NULL)) == NULL) continue; - if ((s2 = sre_strtok(&s, WHITESPACE, NULL)) == NULL) - Die("Failed to parse nseq/alen from first line of PHYLIP file %s\n", afp->fname); - if (! IsInt(s1) || ! IsInt(s2)) - Die("nseq and/or alen not an integer in first line of PHYLIP file %s\n", afp->fname); - nseq = atoi(s1); - alen = atoi(s2); - break; - } - - msa = MSAAlloc(nseq, 0); - idx = 0; - nblock = 0; - while ((s = MSAFileGetLine(afp)) != NULL) - { - /* ignore blank lines. nonblank lines start w/ nonblank char */ - if (isspace(*s)) continue; - /* First block has seq names */ - if (nblock == 0) { - strncpy(name, s, 10); - name[10] = '\0'; - GKIStoreKey(msa->index, name); - msa->sqname[idx] = sre_strdup(name, -1); - s += 10; - } - /* be careful of trailing whitespace on lines */ - if ((s1 = sre_strtok(&s, WHITESPACE, &slen)) == NULL) - Die("Failed to parse sequence at line %d of PHYLIP file %s\n", - afp->linenumber, afp->fname); - msa->sqlen[idx] = sre_strcat(&(msa->aseq[idx]), msa->sqlen[idx], s1, slen); - - idx++; - if (idx == nseq) { idx = 0; nblock++; } - } - msa->nseq = nseq; - MSAVerifyParse(msa); /* verifies; sets alen, wgt; frees sqlen[] */ - return msa; -} - - - -/* Function: WritePhylip() - * Date: SRE, Fri Jun 18 12:07:41 1999 [Sanger Centre] - * - * Purpose: Write an alignment in Phylip format to an open file. - * - * Args: fp - file that's open for writing. - * msa - alignment to write. - * - * Returns: (void) - */ -void -WritePhylip(FILE *fp, MSA *msa) -{ - int idx; /* counter for sequences */ - int cpl = 50; /* 50 seq char per line */ - char buf[51]; /* buffer for writing seq */ - int pos; - - /* First line has nseq, alen - */ - fprintf(fp, " %d %d\n", msa->nseq, msa->alen); - - /* Alignment section. - * PHYLIP is a multiblock format, blocks (optionally) separated - * by blanks; names only attached to first block. Names are - * restricted to ten char; we achieve this by simple truncation (!). - * (Do we need to convert gap characters from our ./- convention?) - */ - for (pos = 0; pos < msa->alen; pos += cpl) - { - if (pos > 0) fprintf(fp, "\n"); - - for (idx = 0; idx < msa->nseq; idx++) - { - strncpy(buf, msa->aseq[idx] + pos, cpl); - buf[cpl] = '\0'; - if (pos > 0) fprintf(fp, "%s\n", buf); - else fprintf(fp, "%-10.10s%s\n", msa->sqname[idx], buf); - } - } - return; -} diff --git a/forester/archive/RIO/others/hmmer/squid/revcomp.c b/forester/archive/RIO/others/hmmer/squid/revcomp.c deleted file mode 100644 index a245e2b..0000000 --- a/forester/archive/RIO/others/hmmer/squid/revcomp.c +++ /dev/null @@ -1,62 +0,0 @@ -/***************************************************************** - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - *****************************************************************/ - -/* revcomp.c - * - * Reverse complement of a IUPAC character string - * RCS $Id: revcomp.c,v 1.1.1.1 2005/03/22 08:34:16 cmzmasek Exp $ - */ - -#include -#include -#include -#include "squid.h" - - -#ifdef MEMDEBUG -#include "dbmalloc.h" -#endif - - -char * -revcomp(char *comp, char *seq) -{ - long bases; - char *bckp, *fwdp; - int idx; - long pos; - int c; - - if (comp == NULL) return NULL; - if (seq == NULL) return NULL; - bases = strlen(seq); - - fwdp = comp; - bckp = seq + bases -1; - for (pos = 0; pos < bases; pos++) - { - c = *bckp; - c = sre_toupper(c); - for (idx = 0; c != iupac[idx].sym && idx < IUPACSYMNUM; idx++); - if (idx == IUPACSYMNUM) - { - Warn("Can't reverse complement an %c, pal. Using N.", c); - *fwdp = 'N'; - } - else - *fwdp = iupac[idx].symcomp; - if (islower((int) *bckp)) *fwdp = (char) sre_tolower((int) *fwdp); - fwdp++; - bckp--; - } - *fwdp = '\0'; - return comp; -} - diff --git a/forester/archive/RIO/others/hmmer/squid/revcomp_main.c b/forester/archive/RIO/others/hmmer/squid/revcomp_main.c deleted file mode 100644 index 130bff1..0000000 --- a/forester/archive/RIO/others/hmmer/squid/revcomp_main.c +++ /dev/null @@ -1,93 +0,0 @@ -/***************************************************************** - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - *****************************************************************/ - -/* main for revcomp - * - * revcomp - generate reverse complement of sequences - * SRE, Thu Aug 5 17:36:57 1993 - * RCS $Id: revcomp_main.c,v 1.1.1.1 2005/03/22 08:34:31 cmzmasek Exp $ - */ - -#include -#include -#include "squid.h" -#include "version.h" - -#define OPTIONS "h" - -char usage[] = "Usage: revcomp [-options] \n\ - Reverse complement a nucleic acid sequence.\n\ - Available options:\n\ - -h : help; print version and usage info\n"; - -int -main(int argc, char **argv) -{ - char *seqfile; /* name of sequence file */ - SQFILE *dbfp; /* open sequence file */ - int fmt; /* format of seqfile */ - char *seq; /* sequence */ - SQINFO sqinfo; /* additional sequence info */ - char *rev; /* reverse complement */ - int swap; - - int optchar; /* option character, command line */ - extern int optind; - - /*********************************************** - * Parse command line - ***********************************************/ - - fmt = SQFILE_UNKNOWN; - - while ((optchar = getopt(argc, argv, OPTIONS)) != -1) - switch (optchar) { - case 'h': - printf("revcomp %s, %s\n%s\n", RELEASE, RELEASEDATE, usage); - exit(EXIT_SUCCESS); - default: - Die("%s\n", usage); - } - - if (argc - optind != 1) Die("%s\n", usage); - seqfile = argv[optind]; - - if ((dbfp = SeqfileOpen(seqfile, fmt, NULL)) == NULL) - Die("Failed to open sequence file %s for reading", seqfile); - - while (ReadSeq(dbfp, dbfp->format, &seq, &sqinfo)) - { - if ((rev = (char *) malloc ((sqinfo.len + 1) * sizeof(char))) == NULL) - Die("malloc failed"); - - revcomp(rev, seq); - if (sqinfo.flags & (SQINFO_START | SQINFO_STOP)) - { - swap = sqinfo.start; - sqinfo.start = sqinfo.stop; - sqinfo.stop = swap; - } - /* secondary structure of reverse strand is nonsense - */ - if (sqinfo.flags & SQINFO_SS) - { - sqinfo.flags = sqinfo.flags & ~SQINFO_SS; - free(sqinfo.ss); - } - - WriteSeq(stdout, SQFILE_FASTA, rev, &sqinfo); - - free(rev); - FreeSequence(seq, &sqinfo); - } - - SeqfileClose(dbfp); - return 0; -} diff --git a/forester/archive/RIO/others/hmmer/squid/rk.c b/forester/archive/RIO/others/hmmer/squid/rk.c deleted file mode 100644 index 9ae0c68..0000000 --- a/forester/archive/RIO/others/hmmer/squid/rk.c +++ /dev/null @@ -1,134 +0,0 @@ -/***************************************************************** - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - *****************************************************************/ - -/* rk.c (originally from rnabob's patsearch.c) - * - * Contains a compiler and a search engine for Rabin-Karp - * based primary sequence pattern searching on encoded - * sequences. - * - * See Sedgewick, _Algorithms_, for a general discussion of - * the Rabin-Karp algorithm. See the rkcomp or rkexec man - * pages for specific details. - * - * RCS $Id: rk.c,v 1.1.1.1 2005/03/22 08:34:16 cmzmasek Exp $ - */ - -#include -#include -#include -#include "squid.h" /* seq encoding utilities and typedefs */ -#include "rk.h" - - -#ifdef MEMDEBUG -#include "dbmalloc.h" -#endif - -Hashseq -rkcomp(char *probe) /* A,C,G,T/U, N probe string, 0-8 nt long */ -{ - Hashseq hashprobe = 0; - char coded[RK_HASHSIZE + 1]; - int len; - int i; - /* check bounds violation on probe */ - if ((len = strlen(probe)) > RK_HASHSIZE) return 0; - /* encode the probe */ - if (seqencode(coded, probe) == 0) return 0; - /* pack the probe into a Hashseq */ - for (i = 0; i < len; i++) - { - hashprobe <<= 4; - hashprobe |= (Hashseq) coded[i]; - } - /* left adjust as needed */ - for (; i < RK_HASHSIZE; i++) - { - hashprobe <<= 4; - hashprobe |= (Hashseq) NTN; - } - /* return the compiled probe */ - return hashprobe; -} - -int -rkseq(Hashseq hashprobe, /* up to 8 nt packed into the probe */ - char *sequence) /* encoded sequence */ -{ - long i; - long pos = 0; - Hashseq target = 0; - - /* initialize the target hashseq */ - for (i = 0; i < RK_HASHSIZE; i++) - { - if (*(sequence + i) == NTEND) - break; - target <<= 4; - target |= (Hashseq) (*(sequence + i)); - } - - while (*(sequence + pos + RK_HASHSIZE -1) != NTEND) - { -#ifdef DEBUG - printf("hashprobe: "); - writehash(hashprobe); - printf("\ttarget: "); - writehash(target); - printf("\nhashprobe & target: "); - writehash(hashprobe & target); - printf("\n"); -#endif - if ((hashprobe & target) == target) - return ((int) pos); - target <<= 4; - target |= (Hashseq) (*(sequence + pos + RK_HASHSIZE)); - pos++; - } - /* now we deal with an end effect */ - for (i = 0; i < RK_HASHSIZE; i++) - { - target |= (Hashseq) NTN; - if ((hashprobe & target) == target) - return ((int) pos); - target <<=4; - pos++; - } - - return(-1); -} - - -#ifdef DEBUG /* Debugging aids */ - -static void -writehash(Hashseq hashseq) -{ - int idx; - int sym; - - if (hashseq/16) - writehash(hashseq/16); - - sym = (int) (hashseq % 16); - if (sym == 0) - putchar('-'); - else - { - for (idx = 0; sym != iupac[idx].code && idx < IUPACSYMNUM; idx++); - if (idx > IUPACSYMNUM) - printf("(%d)", sym); - else - putchar(iupac[idx].sym); - } -} - -#endif diff --git a/forester/archive/RIO/others/hmmer/squid/rk.h b/forester/archive/RIO/others/hmmer/squid/rk.h deleted file mode 100644 index e9ff0b2..0000000 --- a/forester/archive/RIO/others/hmmer/squid/rk.h +++ /dev/null @@ -1,40 +0,0 @@ -/***************************************************************** - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - *****************************************************************/ - -#ifndef SQRKH_INCLUDED -#define SQRKH_INCLUDED - -/* rk.h - * - * Header file for Rabin-Karp pattern searching on encoded - * sequence strings. - * - * Sean Eddy, Thu Oct 1 11:45:42 1992 - * RCS $Id: rk.h,v 1.1.1.1 2005/03/22 08:34:16 cmzmasek Exp $ - */ - - - /* expect 32 bits for 8 nt */ -typedef unsigned long Hashseq; - /* but we count to be sure... - RK_HASHSIZE is the number of nt that fit - in one probe */ -#define RK_HASHSIZE (sizeof(Hashseq)*2) - /* empirically, how many nt minimum we require - in a pattern before we abandon rk and - go with something else */ -#define RK_REQUIRE 4 - -extern int rkseq(Hashseq hashprobe, char *sequence); -extern Hashseq rkcomp(char *probe); /* compile a Hashseq from a pattern */ - - - -#endif /* SQRKH_INCLUDED */ diff --git a/forester/archive/RIO/others/hmmer/squid/selex.c b/forester/archive/RIO/others/hmmer/squid/selex.c deleted file mode 100644 index 25f63d3..0000000 --- a/forester/archive/RIO/others/hmmer/squid/selex.c +++ /dev/null @@ -1,814 +0,0 @@ -/***************************************************************** - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - *****************************************************************/ - -/* selex.c - * - * SRE, Mon Jun 14 11:08:38 1999 - * SELEX obsolete as the preferred HMMER/SQUID format - * replaced by Stockholm format - * selex support retained for backwards compatibility - * kludged to use the MSA interface - * - * SRE, Mon Jan 30 14:41:49 1995: - * #=SA side chain % surface accessibility annotation supported - * - * SRE, Tue Nov 9 17:40:50 1993: - * major revision. #= special comments and aliinfo_s optional - * alignment info support added. Support for #=CS (consensus - * secondary structure), #=SS (individual secondary structure), - * #=RF (reference coordinate system), #=SQ (per-sequence header info), - * and #=AU ("author") added. - * - * Fri Dec 4 17:43:24 1992, SRE: - * Reading and writing aligned sequences to/from disk files. - * Implements a new, broader specification of SELEX format - * and supercedes alignio.c. - * - * SELEX format is documented in Docs/formats.tex. - **************************************************************************** - * RCS $Id: selex.c,v 1.1.1.1 2005/03/22 08:34:24 cmzmasek Exp $ - */ - -#include -#include -#include -#include -#include -#include "squid.h" -#include "msa.h" - -static int copy_alignment_line(char *aseq, int apos, int name_rcol, - char *buffer, int lcol, int rcol, char gapsym); -static void actually_write_selex(FILE *fp, MSA *msa, int cpl); - -static char commentsyms[] = "%#"; - -/* Function: ReadSELEX() - * Date: SRE, Sun Jun 6 18:24:09 1999 [St. Louis] - * - * Purpose: Parse an alignment read from an open SELEX format - * alignment file. (SELEX is a single alignment format). - * Return the alignment, or NULL if we've already read the - * alignment or there's no alignment data in the file. - * - * Limitations: SELEX is the only remaining multipass parser for - * alignment files. It cannot read from gzip or from stdin. - * It Die()'s here if you try. The reason for this - * that SELEX allows space characters as gaps, so we don't - * know the borders of an alignment block until we've seen - * the whole block. I could rewrite to allow single-pass - * parsing (by storing the whole block in memory) but - * since SELEX is now legacy, why bother. - * - * Note that the interface is totally kludged: fastest - * possible adaptation of old ReadSELEX() to the new - * MSA interface. - * - * Args: afp - open alignment file - * - * Returns: MSA * - an alignment object - * caller responsible for an MSAFree() - * NULL if no alignment data. - */ -MSA * -ReadSELEX(MSAFILE *afp) -{ - MSA *msa; /* RETURN: mult seq alignment */ - FILE *fp; /* ptr to opened seqfile */ - char **aseqs; /* aligned seqs */ - int num = 0; /* number of seqs read */ - char buffer[LINEBUFLEN]; /* input buffer for lines */ - char bufcpy[LINEBUFLEN]; /* strtok'able copy of buffer */ - struct block_struc { /** alignment data for a block: */ - int lcol; /* furthest left aligned sym */ - int rcol; /* furthest right aligned sym */ - } *blocks = NULL; - int blocknum; /* number of blocks in file */ - char *nptr; /* ptr to start of name on line */ - char *sptr; /* ptr into sequence on line */ - int currnum; /* num. seqs in given block */ - int currblock; /* index for blocks */ - int i; /* loop counter */ - int seqidx; /* counter for seqs */ - int alen; /* length of alignment */ - int warn_names; /* becomes TRUE if names don't match between blocks */ - int headnum; /* seqidx in per-sequence header info */ - int currlen; - int count; - int have_cs = 0; - int have_rf = 0; - AINFO base_ainfo, *ainfo; /* hack: used to be passed ptr to AINFO */ - - - /* Convert from MSA interface to what old ReadSELEX() did: - * - copy our open fp, rather than opening file - * - verify that we're not reading a gzip or stdin - */ - if (feof(afp->f)) return NULL; - if (afp->do_gzip || afp->do_stdin) - Die("Can't read a SELEX format alignment from a pipe, stdin, or gzip'ed file"); - fp = afp->f; - ainfo = &base_ainfo; - - /*************************************************** - * First pass across file. - * Count seqs, get names, determine column info - * Determine what sorts of info are active in this file. - ***************************************************/ - - InitAinfo(ainfo); - /* get first line of the block - * (non-comment, non-blank) */ - do - { - if (fgets(buffer, LINEBUFLEN, fp) == NULL) - { squid_errno = SQERR_NODATA; return 0; } - strcpy(bufcpy, buffer); - if (*buffer == '#') - { - if (strncmp(buffer, "#=CS", 4) == 0) have_cs = 1; - else if (strncmp(buffer, "#=RF", 4) == 0) have_rf = 1; - } - } - while ((nptr = strtok(bufcpy, WHITESPACE)) == NULL || - (strchr(commentsyms, *nptr) != NULL)); - - blocknum = 0; - warn_names = FALSE; - while (!feof(fp)) - { - /* allocate for info about this block. */ - if (blocknum == 0) - blocks = (struct block_struc *) MallocOrDie (sizeof(struct block_struc)); - else - blocks = (struct block_struc *) ReallocOrDie (blocks, (blocknum+1) * sizeof(struct block_struc)); - blocks[blocknum].lcol = LINEBUFLEN+1; - blocks[blocknum].rcol = -1; - - currnum = 0; - while (nptr != NULL) /* becomes NULL when this block ends. */ - { - /* First block only: save names */ - if (blocknum == 0) - { - if (currnum == 0) - ainfo->sqinfo = (SQINFO *) MallocOrDie (sizeof(SQINFO)); - else - ainfo->sqinfo = (SQINFO *) ReallocOrDie (ainfo->sqinfo, (currnum + 1) * sizeof(SQINFO)); - - ainfo->sqinfo[currnum].flags = 0; - SetSeqinfoString(&(ainfo->sqinfo[currnum]), nptr, SQINFO_NAME); - } - else /* in each additional block: check names */ - { - if (strcmp(ainfo->sqinfo[currnum].name, nptr) != 0) - warn_names = TRUE; - } - currnum++; - - /* check rcol, lcol */ - if ((sptr = strtok(NULL, WHITESPACE)) != NULL) - { - /* is this the furthest left we've - seen word 2 in this block? */ - if (sptr - bufcpy < blocks[blocknum].lcol) - blocks[blocknum].lcol = sptr - bufcpy; - /* look for right side in buffer */ - for (sptr = buffer + strlen(buffer) - 1; - strchr(WHITESPACE, *sptr) != NULL; - sptr --) - /* do nothing */ ; - if (sptr - buffer > blocks[blocknum].rcol) - blocks[blocknum].rcol = sptr - buffer; - } - - /* get the next line; blank line means end of block */ - do - { - if (fgets(buffer, LINEBUFLEN, fp) == NULL) - { nptr = NULL; break; } - strcpy(bufcpy, buffer); - - if (strncmp(buffer, "#=SS", 4) == 0) ainfo->sqinfo[currnum-1].flags |= SQINFO_SS; - else if (strncmp(buffer, "#=SA", 4) == 0) ainfo->sqinfo[currnum-1].flags |= SQINFO_SA; - else if (strncmp(buffer, "#=CS", 4) == 0) have_cs = 1; - else if (strncmp(buffer, "#=RF", 4) == 0) have_rf = 1; - - if ((nptr = strtok(bufcpy, WHITESPACE)) == NULL) - break; - } while (strchr(commentsyms, *nptr) != NULL); - } - - - /* check that number of sequences matches expected */ - if (blocknum == 0) - num = currnum; - else if (currnum != num) - Die("Parse error in ReadSELEX()"); - blocknum++; - - /* get first line of next block - * (non-comment, non-blank) */ - do - { - if (fgets(buffer, LINEBUFLEN, fp) == NULL) { nptr = NULL; break; } - strcpy(bufcpy, buffer); - } - while ((nptr = strtok(bufcpy, WHITESPACE)) == NULL || - (strchr(commentsyms, *nptr) != NULL)); - } - - - /*************************************************** - * Get ready for second pass: - * figure out the length of the alignment - * malloc space - * rewind the file - ***************************************************/ - - alen = 0; - for (currblock = 0; currblock < blocknum; currblock++) - alen += blocks[currblock].rcol - blocks[currblock].lcol + 1; - - rewind(fp); - - /* allocations. we can't use AllocateAlignment because of - * the way we already used ainfo->sqinfo. - */ - aseqs = (char **) MallocOrDie (num * sizeof(char *)); - if (have_cs) - ainfo->cs = (char *) MallocOrDie ((alen+1) * sizeof(char)); - if (have_rf) - ainfo->rf = (char *) MallocOrDie ((alen+1) * sizeof(char)); - - - - for (i = 0; i < num; i++) - { - aseqs[i] = (char *) MallocOrDie ((alen+1) * sizeof(char)); - if (ainfo->sqinfo[i].flags & SQINFO_SS) - ainfo->sqinfo[i].ss = (char *) MallocOrDie ((alen+1) * sizeof(char)); - if (ainfo->sqinfo[i].flags & SQINFO_SA) - ainfo->sqinfo[i].sa = (char *) MallocOrDie ((alen+1) * sizeof(char)); - } - - ainfo->alen = alen; - ainfo->nseq = num; - ainfo->wgt = (float *) MallocOrDie (sizeof(float) * num); - FSet(ainfo->wgt, num, 1.0); - - /*************************************************** - * Second pass across file. Parse header; assemble sequences - ***************************************************/ - /* We've now made a complete first pass over the file. We know how - * many blocks it contains, we know the number of seqs in the first - * block, and we know every block has the same number of blocks; - * so we can be a bit more cavalier about error-checking as we - * make the second pass. - */ - - /* Look for header - */ - headnum = 0; - for (;;) - { - if (fgets(buffer, LINEBUFLEN, fp) == NULL) - Die("Parse error in ReadSELEX()"); - strcpy(bufcpy, buffer); - if ((nptr = strtok(bufcpy, WHITESPACE)) == NULL) continue; /* skip blank lines */ - - if (strcmp(nptr, "#=AU") == 0 && (sptr = strtok(NULL, "\n")) != NULL) - ainfo->au = Strdup(sptr); - else if (strcmp(nptr, "#=ID") == 0 && (sptr = strtok(NULL, "\n")) != NULL) - ainfo->name = Strdup(sptr); - else if (strcmp(nptr, "#=AC") == 0 && (sptr = strtok(NULL, "\n")) != NULL) - ainfo->acc = Strdup(sptr); - else if (strcmp(nptr, "#=DE") == 0 && (sptr = strtok(NULL, "\n")) != NULL) - ainfo->desc = Strdup(sptr); - else if (strcmp(nptr, "#=GA") == 0) - { - if ((sptr = strtok(NULL, WHITESPACE)) == NULL) - Die("Parse error in #=GA line in ReadSELEX()"); - ainfo->ga1 = atof(sptr); - - if ((sptr = strtok(NULL, WHITESPACE)) == NULL) - Die("Parse error in #=GA line in ReadSELEX()"); - ainfo->ga2 = atof(sptr); - - ainfo->flags |= AINFO_GA; - } - else if (strcmp(nptr, "#=TC") == 0) - { - if ((sptr = strtok(NULL, WHITESPACE)) == NULL) - Die("Parse error in #=TC line in ReadSELEX()"); - ainfo->tc1 = atof(sptr); - - if ((sptr = strtok(NULL, WHITESPACE)) == NULL) - Die("Parse error in #=TC line in ReadSELEX()"); - ainfo->tc2 = atof(sptr); - - ainfo->flags |= AINFO_TC; - } - else if (strcmp(nptr, "#=NC") == 0) - { - if ((sptr = strtok(NULL, WHITESPACE)) == NULL) - Die("Parse error in #=NC line in ReadSELEX()"); - ainfo->nc1 = atof(sptr); - - if ((sptr = strtok(NULL, WHITESPACE)) == NULL) - Die("Parse error in #=NC line in ReadSELEX()"); - ainfo->nc2 = atof(sptr); - - ainfo->flags |= AINFO_NC; - } - else if (strcmp(nptr, "#=SQ") == 0) /* per-sequence header info */ - { - /* first field is the name */ - if ((sptr = strtok(NULL, WHITESPACE)) == NULL) - Die("Parse error in #=SQ line in ReadSELEX()"); - if (strcmp(sptr, ainfo->sqinfo[headnum].name) != 0) warn_names = TRUE; - - /* second field is the weight */ - if ((sptr = strtok(NULL, WHITESPACE)) == NULL) - Die("Parse error in #=SQ line in ReadSELEX()"); - if (!IsReal(sptr)) - Die("Parse error in #=SQ line in ReadSELEX(): weight is not a number"); - ainfo->wgt[headnum] = atof(sptr); - - /* third field is database source id */ - if ((sptr = strtok(NULL, WHITESPACE)) == NULL) - Die("Parse error in #=SQ line in ReadSELEX(): incomplete line"); - SetSeqinfoString(&(ainfo->sqinfo[headnum]), sptr, SQINFO_ID); - - /* fourth field is database accession number */ - if ((sptr = strtok(NULL, WHITESPACE)) == NULL) - Die("Parse error in #=SQ line in ReadSELEX(): incomplete line"); - SetSeqinfoString(&(ainfo->sqinfo[headnum]), sptr, SQINFO_ACC); - - /* fifth field is start..stop::olen */ - if ((sptr = strtok(NULL, ".:")) == NULL) - Die("Parse error in #=SQ line in ReadSELEX(): incomplete line"); - SetSeqinfoString(&(ainfo->sqinfo[headnum]), sptr, SQINFO_START); - - if ((sptr = strtok(NULL, ".:")) == NULL) - Die("Parse error in #=SQ line in ReadSELEX(): incomplete line"); - SetSeqinfoString(&(ainfo->sqinfo[headnum]), sptr, SQINFO_STOP); - - if ((sptr = strtok(NULL, ":\t ")) == NULL) - Die("Parse error in #=SQ line in ReadSELEX(): incomplete line"); - SetSeqinfoString(&(ainfo->sqinfo[headnum]), sptr, SQINFO_OLEN); - - /* rest of line is optional description */ - if ((sptr = strtok(NULL, "\n")) != NULL) - SetSeqinfoString(&(ainfo->sqinfo[headnum]), sptr, SQINFO_DESC); - - headnum++; - } - else if (strcmp(nptr, "#=CS") == 0) break; - else if (strcmp(nptr, "#=RF") == 0) break; - else if (strchr(commentsyms, *nptr) == NULL) break; /* non-comment, non-header */ - } - - - currlen = 0; - for (currblock = 0 ; currblock < blocknum; currblock++) - { - /* parse the block */ - seqidx = 0; - while (nptr != NULL) - { - /* Consensus structure */ - if (strcmp(nptr, "#=CS") == 0) - { - if (! copy_alignment_line(ainfo->cs, currlen, strlen(nptr)-1, - buffer, blocks[currblock].lcol, blocks[currblock].rcol, (char) '.')) - Die("Parse error in #=CS line in ReadSELEX()"); - } - - /* Reference coordinates */ - else if (strcmp(nptr, "#=RF") == 0) - { - if (! copy_alignment_line(ainfo->rf, currlen, strlen(nptr)-1, - buffer, blocks[currblock].lcol, blocks[currblock].rcol, (char) '.')) - Die("Parse error in #=RF line in ReadSELEX()"); - } - /* Individual secondary structure */ - else if (strcmp(nptr, "#=SS") == 0) - { - if (! copy_alignment_line(ainfo->sqinfo[seqidx-1].ss, currlen, strlen(nptr)-1, - buffer, blocks[currblock].lcol, - blocks[currblock].rcol, (char) '.')) - Die("Parse error in #=SS line in ReadSELEX()"); - } - - /* Side chain % surface accessibility code */ - else if (strcmp(nptr, "#=SA") == 0) - { - if (! copy_alignment_line(ainfo->sqinfo[seqidx-1].sa, currlen, strlen(nptr)-1, - buffer, blocks[currblock].lcol, - blocks[currblock].rcol, (char) '.')) - Die("Parse error in #=SA line in ReadSELEX()"); - } - /* Aligned sequence; avoid unparsed machine comments */ - else if (strncmp(nptr, "#=", 2) != 0) - { - if (! copy_alignment_line(aseqs[seqidx], currlen, strlen(nptr)-1, - buffer, blocks[currblock].lcol, blocks[currblock].rcol, (char) '.')) - Die("Parse error in alignment line in ReadSELEX()"); - seqidx++; - } - - /* get next line */ - for (;;) - { - nptr = NULL; - if (fgets(buffer, LINEBUFLEN, fp) == NULL) break; /* EOF */ - strcpy(bufcpy, buffer); - if ((nptr = strtok(bufcpy, WHITESPACE)) == NULL) break; /* blank */ - if (strncmp(buffer, "#=", 2) == 0) break; /* machine comment */ - if (strchr(commentsyms, *nptr) == NULL) break; /* data */ - } - } /* end of a block */ - - currlen += blocks[currblock].rcol - blocks[currblock].lcol + 1; - - /* get line 1 of next block */ - for (;;) - { - if (fgets(buffer, LINEBUFLEN, fp) == NULL) break; /* no data */ - strcpy(bufcpy, buffer); - if ((nptr = strtok(bufcpy, WHITESPACE)) == NULL) continue; /* blank */ - if (strncmp(buffer, "#=", 2) == 0) break; /* machine comment */ - if (strchr(commentsyms, *nptr) == NULL) break; /* non-comment */ - } - } /* end of the file */ - - /* Lengths in sqinfo are for raw sequence (ungapped), - * and SS, SA are 0..rlen-1 not 0..alen-1. - * Only the seqs with structures come out of here with lengths set. - */ - for (seqidx = 0; seqidx < num; seqidx++) - { - int apos, rpos; - /* secondary structures */ - if (ainfo->sqinfo[seqidx].flags & SQINFO_SS) - { - for (apos = rpos = 0; apos < alen; apos++) - if (! isgap(aseqs[seqidx][apos])) - { - ainfo->sqinfo[seqidx].ss[rpos] = ainfo->sqinfo[seqidx].ss[apos]; - rpos++; - } - ainfo->sqinfo[seqidx].ss[rpos] = '\0'; - } - /* Surface accessibility */ - if (ainfo->sqinfo[seqidx].flags & SQINFO_SA) - { - for (apos = rpos = 0; apos < alen; apos++) - if (! isgap(aseqs[seqidx][apos])) - { - ainfo->sqinfo[seqidx].sa[rpos] = ainfo->sqinfo[seqidx].sa[apos]; - rpos++; - } - ainfo->sqinfo[seqidx].sa[rpos] = '\0'; - } - } - - /* NULL-terminate all the strings */ - if (ainfo->rf != NULL) ainfo->rf[alen] = '\0'; - if (ainfo->cs != NULL) ainfo->cs[alen] = '\0'; - for (seqidx = 0; seqidx < num; seqidx++) - aseqs[seqidx][alen] = '\0'; - - /* find raw sequence lengths for sqinfo */ - for (seqidx = 0; seqidx < num; seqidx++) - { - count = 0; - for (sptr = aseqs[seqidx]; *sptr != '\0'; sptr++) - if (!isgap(*sptr)) count++; - ainfo->sqinfo[seqidx].len = count; - ainfo->sqinfo[seqidx].flags |= SQINFO_LEN; - } - - - /*************************************************** - * Garbage collection and return - ***************************************************/ - free(blocks); - if (warn_names) - Warn("sequences may be in different orders in blocks of %s?", afp->fname); - - /* Convert back to MSA structure. (Wasteful kludge.) - */ - msa = MSAFromAINFO(aseqs, ainfo); - MSAVerifyParse(msa); - FreeAlignment(aseqs, ainfo); - return msa; -} - - -/* Function: WriteSELEX() - * Date: SRE, Mon Jun 14 13:13:14 1999 [St. Louis] - * - * Purpose: Write a SELEX file in multiblock format. - * - * Args: fp - file that's open for writing - * msa - multiple sequence alignment object - * - * Returns: (void) - */ -void -WriteSELEX(FILE *fp, MSA *msa) -{ - actually_write_selex(fp, msa, 50); /* 50 char per block */ -} - -/* Function: WriteSELEXOneBlock() - * Date: SRE, Mon Jun 14 13:14:56 1999 [St. Louis] - * - * Purpose: Write a SELEX alignment file in Pfam's single-block - * format style. A wrapper for actually_write_selex(). - * - * Args: fp - file that's open for writing - * msa- alignment to write - * - * Returns: (void) - */ -void -WriteSELEXOneBlock(FILE *fp, MSA *msa) -{ - actually_write_selex(fp, msa, msa->alen); /* one big block */ -} - - -/* Function: actually_write_selex() - * Date: SRE, Mon Jun 14 12:54:46 1999 [St. Louis] - * - * Purpose: Write an alignment in SELEX format to an open - * file. This is the function that actually does - * the work. The API's WriteSELEX() and - * WriteSELEXOneBlock() are wrappers. - * - * Args: fp - file that's open for writing - * msa - alignment to write - * cpl - characters to write per line in alignment block - * - * Returns: (void) - */ -static void -actually_write_selex(FILE *fp, MSA *msa, int cpl) -{ - int i; - int len = 0; - int namewidth; - char *buf; - int currpos; - - buf = malloc(sizeof(char) * (cpl+101)); /* 100 chars allowed for name, etc. */ - - /* Figure out how much space we need for name + markup - * to keep the alignment in register, for easier human viewing -- - * even though Stockholm format doesn't care about visual - * alignment. - */ - namewidth = 0; - for (i = 0; i < msa->nseq; i++) - if ((len = strlen(msa->sqname[i])) > namewidth) - namewidth = len; - if (namewidth < 6) namewidth = 6; /* minimum space for markup tags */ - - /* Free text comments - */ - for (i = 0; i < msa->ncomment; i++) - fprintf(fp, "# %s\n", msa->comment[i]); - if (msa->ncomment > 0) fprintf(fp, "\n"); - - /* Per-file annotation - */ - if (msa->name != NULL) fprintf(fp, "#=ID %s\n", msa->name); - if (msa->acc != NULL) fprintf(fp, "#=AC %s\n", msa->acc); - if (msa->desc != NULL) fprintf(fp, "#=DE %s\n", msa->desc); - if (msa->au != NULL) fprintf(fp, "#=AU %s\n", msa->au); - if (msa->flags & MSA_SET_GA) fprintf(fp, "#=GA %.1f %.1f\n", msa->ga1, msa->ga2); - if (msa->flags & MSA_SET_NC) fprintf(fp, "#=NC %.1f %.1f\n", msa->nc1, msa->nc2); - if (msa->flags & MSA_SET_TC) fprintf(fp, "#=TC %.1f %.1f\n", msa->tc1, msa->tc2); - - /* Per-sequence annotation - */ - for (i = 0; i < msa->nseq; i++) - fprintf(fp, "#=SQ %-*.*s %6.4f %s %s %d..%d::%d %s\n", - namewidth, namewidth, msa->sqname[i], - msa->wgt[i], - "-", /* MSA has no ID field */ - (msa->sqacc != NULL && msa->sqacc[i] != NULL) ? msa->sqacc[i] : "-", - 0, 0, 0, /* MSA has no start, stop, olen field */ - (msa->sqdesc != NULL && msa->sqdesc[i] != NULL) ? msa->sqdesc[i] : "-"); - fprintf(fp, "\n"); - - /* Alignment section: - */ - for (currpos = 0; currpos < msa->alen; currpos += cpl) - { - if (currpos > 0) fprintf(fp, "\n"); - - if (msa->ss_cons != NULL) { - strncpy(buf, msa->ss_cons + currpos, cpl); - buf[cpl] = '\0'; - fprintf(fp, "%-*.*s %s\n", namewidth, namewidth, "#=CS", buf); - } - if (msa->rf != NULL) { - strncpy(buf, msa->rf + currpos, cpl); - buf[cpl] = '\0'; - fprintf(fp, "%-*.*s %s\n", namewidth, namewidth, "#=RF", buf); - } - for (i = 0; i < msa->nseq; i++) - { - strncpy(buf, msa->aseq[i] + currpos, cpl); - buf[cpl] = '\0'; - fprintf(fp, "%-*.*s %s\n", namewidth, namewidth, msa->sqname[i], buf); - - if (msa->ss != NULL && msa->ss[i] != NULL) { - strncpy(buf, msa->ss[i] + currpos, cpl); - buf[cpl] = '\0'; - fprintf(fp, "%-*.*s %s\n", namewidth, namewidth, "#=SS", buf); - } - if (msa->sa != NULL && msa->sa[i] != NULL) { - strncpy(buf, msa->sa[i] + currpos, cpl); - buf[cpl] = '\0'; - fprintf(fp, "%-*.*s %s\n", namewidth, namewidth, "#=SA", buf); - } - } - } - free(buf); -} - - -/* Function: copy_alignment_line() - * - * Purpose: Given a line from an alignment file, and bounds lcol,rcol - * on what part of it may be sequence, save the alignment into - * aseq starting at position apos. - * - * name_rcol is set to the rightmost column this aseqs's name - * occupies; if name_rcol >= lcol, we have a special case in - * which the name intrudes into the sequence zone. - */ -static int -copy_alignment_line(char *aseq, int apos, int name_rcol, - char *buffer, int lcol, int rcol, char gapsym) -{ - char *s1, *s2; - int i; - - s1 = aseq + apos; - s2 = buffer; /* be careful that buffer doesn't end before lcol! */ - for (i = 0; i < lcol; i++) - if (*s2) s2++; - - for (i = lcol; i <= rcol; i++) - { - if (*s2 == '\t') { - Warn("TAB characters will corrupt a SELEX alignment! Please remove them first."); - return 0; - } - if (name_rcol >= i) /* name intrusion special case: pad left w/ gaps */ - *s1 = gapsym; - /* short buffer special case: pad right w/ gaps */ - else if (*s2 == '\0' || *s2 == '\n') - *s1 = gapsym; - - else if (*s2 == ' ') /* new: disallow spaces as gap symbols */ - *s1 = gapsym; - - else /* normal case: copy buffer into aseq */ - *s1 = *s2; - - s1++; - if (*s2) s2++; - } - return 1; -} - - - - - -/* Function: DealignAseqs() - * - * Given an array of (num) aligned sequences aseqs, - * strip the gaps. Store the raw sequences in a new allocated array. - * - * Caller is responsible for free'ing the memory allocated to - * rseqs. - * - * Returns 1 on success. Returns 0 and sets squid_errno on - * failure. - */ -int -DealignAseqs(char **aseqs, int num, char ***ret_rseqs) -{ - char **rseqs; /* de-aligned sequence array */ - int idx; /* counter for sequences */ - int depos; /* position counter for dealigned seq*/ - int apos; /* position counter for aligned seq */ - int seqlen; /* length of aligned seq */ - - /* alloc space */ - rseqs = (char **) MallocOrDie (num * sizeof(char *)); - /* main loop */ - for (idx = 0; idx < num; idx++) - { - seqlen = strlen(aseqs[idx]); - /* alloc space */ - rseqs[idx] = (char *) MallocOrDie ((seqlen + 1) * sizeof(char)); - - /* strip gaps */ - depos = 0; - for (apos = 0; aseqs[idx][apos] != '\0'; apos++) - if (!isgap(aseqs[idx][apos])) - { - rseqs[idx][depos] = aseqs[idx][apos]; - depos++; - } - rseqs[idx][depos] = '\0'; - } - *ret_rseqs = rseqs; - return 1; -} - - -/* Function: IsSELEXFormat() - * - * Return TRUE if filename may be in SELEX format. - * - * Accuracy is sacrificed for speed; a TRUE return does - * *not* guarantee that the file will pass the stricter - * error-checking of ReadSELEX(). All it checks is that - * the first 500 non-comment lines of a file are - * blank, or if there's a second "word" on the line - * it looks like sequence (i.e., it's not kOtherSeq). - * - * Returns TRUE or FALSE. - */ -int -IsSELEXFormat(char *filename) -{ - FILE *fp; /* ptr to open sequence file */ - char buffer[LINEBUFLEN]; - char *sptr; /* ptr to first word */ - int linenum; - - - if ((fp = fopen(filename, "r")) == NULL) - { squid_errno = SQERR_NOFILE; return 0; } - - linenum = 0; - while (linenum < 500 && - fgets(buffer, LINEBUFLEN, fp) != NULL) - { - linenum++; - /* dead giveaways for extended SELEX */ - if (strncmp(buffer, "#=AU", 4) == 0) goto DONE; - else if (strncmp(buffer, "#=ID", 4) == 0) goto DONE; - else if (strncmp(buffer, "#=AC", 4) == 0) goto DONE; - else if (strncmp(buffer, "#=DE", 4) == 0) goto DONE; - else if (strncmp(buffer, "#=GA", 4) == 0) goto DONE; - else if (strncmp(buffer, "#=TC", 4) == 0) goto DONE; - else if (strncmp(buffer, "#=NC", 4) == 0) goto DONE; - else if (strncmp(buffer, "#=SQ", 4) == 0) goto DONE; - else if (strncmp(buffer, "#=SS", 4) == 0) goto DONE; - else if (strncmp(buffer, "#=CS", 4) == 0) goto DONE; - else if (strncmp(buffer, "#=RF", 4) == 0) goto DONE; - - /* a comment? */ - if (strchr(commentsyms, *buffer) != NULL) continue; - - /* a blank line? */ - if ((sptr = strtok(buffer, WHITESPACE)) == NULL) continue; - - /* a one-word line (name only) - is possible, though rare */ - if ((sptr = strtok(NULL, "\n")) == NULL) continue; - - if (Seqtype(sptr) == kOtherSeq) {fclose(fp); return 0;} - } - - DONE: - fclose(fp); - return 1; -} - - - - - - - - diff --git a/forester/archive/RIO/others/hmmer/squid/seqencode.c b/forester/archive/RIO/others/hmmer/squid/seqencode.c deleted file mode 100644 index 6cdc265..0000000 --- a/forester/archive/RIO/others/hmmer/squid/seqencode.c +++ /dev/null @@ -1,177 +0,0 @@ -/***************************************************************** - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - *****************************************************************/ - -/* seqencode.c - * - * Routines for creating and manipulating encoded sequence strings. - * RCS $Id: seqencode.c,v 1.1.1.1 2005/03/22 08:34:29 cmzmasek Exp $ - */ -#include -#include -#include -#include "squid.h" - - -#ifdef MEMDEBUG -#include "dbmalloc.h" -#endif - /* seqcmp() - returns 0 if s1 == s2 - mismatch number otherwise */ -int -seqcmp(char *s1, char *s2, int allow) -{ - int mmat = 0; - - while ((*s1 != NTEND) && (*s2 != NTEND) && (mmat <= allow)) - { - if (!(ntmatch(*s1, *s2))) - mmat++;; - s1++; - s2++; - } - while ((*s1++ != NTEND) && (mmat <= allow)) - mmat++; - return(mmat); -} - /* seqncmp() - same as seqcmp but it looks at, - at most, n positions */ -int -seqncmp(char *s1, char *s2, int n, int allow) -{ - int mmat = 0; - - while ((*s2 != NTEND) && - (n-- != 0)) - { - if ((!(ntmatch(*s1, *s2))) && - (++mmat > allow)) - return(mmat); - s1++; - s2++; - } - while ((n-- != 0) && (*s1++ != NTEND) && (mmat <= allow)) - mmat++; - return (mmat); -} - - /* seqencode() - given a character text string str (A,C,G,T), - convert to an encoded seq string; - return 1 for success, 0 if fail */ -int -seqencode(char *codeseq, /* pre-allocated space for answer */ - char *str) /* character string to convert */ -{ - char *ptr; - int idx; - - ptr = codeseq; - while (*str != '\0') - { - if (islower((int) (*str))) *str = (char) toupper((int) (*str)); - for (idx = 0; *str != iupac[idx].sym && idx <= IUPACSYMNUM; idx++) - ; - if (idx > IUPACSYMNUM) - { - *ptr = (char) NTEND; - return 0; - } - else - *ptr = iupac[idx].code; - ptr++; - str++; - } - *ptr = NTEND; - return 1; -} - - -int -coded_revcomp(char *comp, char *seq) -{ - long bases; - char *bckp, *fwdp; - int idx; - long pos; - - bases = strlen(seq); - - fwdp = comp; - bckp = seq + bases -1; - for (pos = 0; pos < bases; pos++) - { - for (idx = 0; *bckp != iupac[idx].code && idx < IUPACSYMNUM; idx++); - if (idx > IUPACSYMNUM) - { - *fwdp = NTEND; - return 0; - } - else - *fwdp = iupac[idx].comp; - fwdp++; - bckp--; - } - *fwdp = NTEND; - return(1); -} - -int -seqdecode(char *str, char *codeseq) -{ - int idx; - int pos; - - pos = 0; - while (*codeseq != NTEND) - { - for (idx = 0; *codeseq != iupac[idx].code && idx < IUPACSYMNUM; idx++) - ; - if (idx > IUPACSYMNUM) - { - str[pos] = 'X'; - return 0; - } - else - str[pos] = iupac[idx].sym; - codeseq++; - pos++; - } - str[pos] = '\0'; - return 1; -} - -int -seqndecode( - char *str, /* pre-allocated string to write into */ - char *codeseq, /* sequence to decode */ - int n) /* how many bases to decode */ -{ - int idx; - int pos = 0; - - while (--n >= 0) - { - for (idx = 0; *codeseq != iupac[idx].code && idx < IUPACSYMNUM; idx++); - if (idx > IUPACSYMNUM) - { - str[pos] = 'X'; - return 0; - } - else - str[pos] = iupac[idx].sym; - codeseq++; - pos++; - } - str[pos] = '\0'; - return 1; -} - diff --git a/forester/archive/RIO/others/hmmer/squid/seqsplit_main.c b/forester/archive/RIO/others/hmmer/squid/seqsplit_main.c deleted file mode 100644 index a38f6fd..0000000 --- a/forester/archive/RIO/others/hmmer/squid/seqsplit_main.c +++ /dev/null @@ -1,163 +0,0 @@ -/***************************************************************** - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - *****************************************************************/ - - -/* seqsplit_main.c - * SRE, Mon Sep 25 11:43:58 2000 - * - * Split sequences into smaller chunks of defined size and overlap; - * output a FASTA file. - * - * Limitations: - * still working in 32 bits -- no sequence can be more than 2 GB - * in size. - * CVS $Id: seqsplit_main.c,v 1.1.1.1 2005/03/22 08:34:26 cmzmasek Exp $ - */ - -#include -#include -#include "squid.h" -#include "msa.h" - -static char banner[] = "seqsplit - split seqs into chunks of defined size and overlap"; - -static char usage[] = "\ -Usage: seqsplit [-options] \n\ - Available options:\n\ - -h : help; display usage and version\n\ - -o : output the new FASTA file to \n\ -"; - -static char experts[] = "\ - --informat : specify sequence file format \n\ - --length : set max length of each unique seq frag to \n\ - --overlap : set overlap length to (total frag size = length+overlap)\n\ -"; - -struct opt_s OPTIONS[] = { - { "-h", TRUE, sqdARG_NONE }, - { "-o", TRUE, sqdARG_STRING }, - { "--informat", FALSE, sqdARG_STRING }, - { "--length", FALSE, sqdARG_INT }, - { "--overlap", FALSE, sqdARG_INT }, -}; -#define NOPTIONS (sizeof(OPTIONS) / sizeof(struct opt_s)) - - -int -main(int argc, char **argv) -{ - char *seqfile; /* name of sequence file */ - char *outfile; /* name of output file */ - SQFILE *dbfp; /* open sequence file */ - FILE *ofp; /* open output file */ - int fmt; /* format of seqfile */ - char *seq; /* sequence */ - SQINFO sqinfo; /* extra info about sequence */ - char *seqfrag; /* space for a seq fragment */ - int fraglength; /* length of unique seq per frag */ - int overlap; /* length of overlap. frags are fraglength+overlap*/ - char seqname[256]; /* renamed fragment, w/ coord info */ - int num; /* number of this fragment */ - int pos; /* position in a sequence */ - int len; /* length of a fragment */ - char *desc; - - int nseqs; /* total number of sequences */ - int nsplit; /* number of seqs that get split */ - int nnewfrags; /* total number of new fragments */ - - char *optname; - char *optarg; - int optind; - - /*********************************************** - * Parse command line - ***********************************************/ - - fmt = SQFILE_UNKNOWN; /* default: autodetect */ - fraglength = 100000; - overlap = 1000; - outfile = NULL; - - while (Getopt(argc, argv, OPTIONS, NOPTIONS, usage, - &optind, &optname, &optarg)) - { - if (strcmp(optname, "-o") == 0) outfile = optarg; - else if (strcmp(optname, "--length") == 0) fraglength = atoi(optarg); - else if (strcmp(optname, "--overlap") == 0) overlap = atoi(optarg); - else if (strcmp(optname, "--informat") == 0) { - fmt = String2SeqfileFormat(optarg); - if (fmt == SQFILE_UNKNOWN) - Die("unrecognized sequence file format \"%s\"", optarg); - } - else if (strcmp(optname, "-h") == 0) { - Banner(stdout, banner); - puts(usage); - puts(experts); - exit(EXIT_SUCCESS); - } - } - - if (argc - optind != 1) Die("%s\n", usage); - seqfile = argv[argc-1]; - - seqfrag = MallocOrDie(sizeof(char) * (fraglength+overlap)); - seqfrag[fraglength+overlap] = '\0'; - - /*********************************************** - * Read the file. - ***********************************************/ - - if (outfile == NULL) ofp = stdout; - else { - if ((ofp = fopen(outfile, "w")) == NULL) - Die("Failed to open output sequence file %s for writing", outfile); - } - - if ((dbfp = SeqfileOpen(seqfile, fmt, NULL)) == NULL) - Die("Failed to open sequence file %s for reading", seqfile); - - nseqs = nsplit = nnewfrags = 0; - while (ReadSeq(dbfp, dbfp->format, &seq, &sqinfo)) - { - nseqs++; - if (sqinfo.flags & SQINFO_DESC) desc = sqinfo.desc; - else desc = NULL; - - if (sqinfo.len <= fraglength+overlap) { - WriteSimpleFASTA(ofp, seq, sqinfo.name, desc); - continue; - } - - num = 1; - nsplit++; - for (pos = 0; pos < sqinfo.len; pos += fraglength) - { - if (sqinfo.len - pos <= overlap) continue; - strncpy(seqfrag, seq+pos, fraglength+overlap); - len = strlen(seqfrag); - sprintf(seqname, "%s/frag%d/%d-%d", - sqinfo.name, num, pos+1, pos+len); - WriteSimpleFASTA(ofp, seqfrag, seqname, desc); - nnewfrags++; - num ++; - } - FreeSequence(seq, &sqinfo); - } - SeqfileClose(dbfp); - if (outfile != NULL) fclose(ofp); - - printf("Total # of seqs: %d\n", nseqs); - printf("Affected by splitting: %d\n", nsplit); - printf("New # of seqs: %d\n", nseqs-nsplit + nnewfrags); - - return 0; -} diff --git a/forester/archive/RIO/others/hmmer/squid/seqstat_main.c b/forester/archive/RIO/others/hmmer/squid/seqstat_main.c deleted file mode 100644 index 01fe620..0000000 --- a/forester/archive/RIO/others/hmmer/squid/seqstat_main.c +++ /dev/null @@ -1,229 +0,0 @@ -/***************************************************************** - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - *****************************************************************/ - -/* seqstat_main.c - * Wed Aug 10 15:47:14 1994 - * - * Look at a sequence file, determine some simple statistics. - * CVS $Id: seqstat_main.c,v 1.1.1.1 2005/03/22 08:34:29 cmzmasek Exp $ - */ - -#include -#include -#include -#include -#include "squid.h" -#include "msa.h" - -static char banner[] = "seqstat - show some simple statistics on a sequence file"; - -static char usage[] = "\ -Usage: seqstat [-options] \n\ - Available options:\n\ - -a : report per-sequence info, not just a summary\n\ - -h : help; display usage and version\n\ -"; - -static char experts[] = "\ - --gccomp : with -a, include GC composition in report (DNA/RNA only)\n\ - --informat : specify sequence file format \n\ - --quiet : suppress verbose header (used in regression testing)\n\ -"; - -struct opt_s OPTIONS[] = { - { "-a", TRUE, sqdARG_NONE }, - { "-h", TRUE, sqdARG_NONE }, - { "--gccomp", FALSE, sqdARG_NONE }, - { "--informat", FALSE, sqdARG_STRING }, - { "--quiet", FALSE, sqdARG_NONE }, -}; -#define NOPTIONS (sizeof(OPTIONS) / sizeof(struct opt_s)) - -static float gc_composition(char *seq); - -int -main(int argc, char **argv) -{ - char *seqfile; /* name of sequence file */ - SQFILE *dbfp; /* open sequence file */ - int fmt; /* format of seqfile */ - char *seq; /* sequence */ - SQINFO sqinfo; /* extra info about sequence */ - int nseqs; - long long small; /* smallest length */ - long long large; /* largest length */ - long long total; /* total length */ - int type; /* kAmino, kDNA, kRNA, or kOtherSeq */ - - int allreport; /* TRUE to do a short table for each sequence */ - int be_quiet; /* TRUE to suppress header */ - int do_gccomp; /* TRUE to include GC composition in per-seq report */ - float gc; /* fractional gc composition, 0..1 */ - - char *optname; - char *optarg; - int optind; - - /*********************************************** - * Parse command line - ***********************************************/ - - fmt = SQFILE_UNKNOWN; /* default: autodetect format */ - allreport = FALSE; /* default: file summary only */ - be_quiet = FALSE; /* show header info by default */ - type = kOtherSeq; /* just to silence gcc uninit warning */ - do_gccomp = FALSE; - - while (Getopt(argc, argv, OPTIONS, NOPTIONS, usage, - &optind, &optname, &optarg)) - { - if (strcmp(optname, "-a") == 0) allreport = TRUE; - else if (strcmp(optname, "--quiet") == 0) be_quiet = TRUE; - else if (strcmp(optname, "--gccomp") == 0) do_gccomp = TRUE; - - else if (strcmp(optname, "--informat") == 0) { - fmt = String2SeqfileFormat(optarg); - if (fmt == SQFILE_UNKNOWN) - Die("unrecognized sequence file format \"%s\"", optarg); - } - else if (strcmp(optname, "-h") == 0) { - Banner(stdout, banner); - puts(usage); - puts(experts); - exit(EXIT_SUCCESS); - } - } - - if (argc - optind != 1) Die("%s\n", usage); - seqfile = argv[argc-1]; - - if (! be_quiet) Banner(stdout, banner); - - /*********************************************** - * Read the file. - ***********************************************/ - - if ((dbfp = SeqfileOpen(seqfile, fmt, NULL)) == NULL) - Die("Failed to open sequence file %s for reading", seqfile); - - if (allreport) { - printf(" %-15s %-5s %s%s\n", " NAME", "LEN", - do_gccomp? " f_GC " : "", - "DESCRIPTION"); - printf(" --------------- ----- %s-----------\n", - do_gccomp ? "----- " : ""); - } - - nseqs = 0; - small = -1; - large = -1; - total = 0L; - while (ReadSeq(dbfp, dbfp->format, &seq, &sqinfo)) - { - if (nseqs == 0) type = Seqtype(seq); - if (do_gccomp) gc = gc_composition(seq); - - if (allreport) { - if (do_gccomp) { - printf("* %-15s %5d %.3f %-50.50s\n", sqinfo.name, sqinfo.len, - gc, - sqinfo.flags & SQINFO_DESC ? sqinfo.desc : ""); - } else { - printf("* %-15s %5d %-50.50s\n", sqinfo.name, sqinfo.len, - sqinfo.flags & SQINFO_DESC ? sqinfo.desc : ""); - } - } - - if (small == -1 || sqinfo.len < small) small = (long long) sqinfo.len; - if (large == -1 || sqinfo.len > large) large = (long long) sqinfo.len; - total += (long long) sqinfo.len; - nseqs++; - FreeSequence(seq, &sqinfo); - } - if (allreport) puts(""); - - printf("Format: %s\n", SeqfileFormat2String(dbfp->format)); - printf("Type (of 1st seq): "); - switch (type) - { - case kDNA: puts("DNA"); break; - case kRNA: puts("RNA"); break; - case kAmino: puts("Protein"); break; - case kOtherSeq: puts("Unknown"); break; - default: Die("oops."); - } - printf("Number of sequences: %d\n", nseqs); - printf("Total # residues: %lld\n", total); - printf("Smallest: %lld\n", small); - printf("Largest: %lld\n", large); - printf("Average length: %.1f\n", (float) total / (float) nseqs); - - SeqfileClose(dbfp); - - return 0; -} - - -/* Function: gc_composition() - * Date: SRE, Mon Apr 23 10:01:48 2001 [St. Louis] - * - * Purpose: Calculate the fractional GC composition of - * an input RNA or DNA sequence. Deals appropriately - * with IUPAC degeneracy. Case-insensitive. - * Ignores gap symbols. Other unexpected characters - * make it die with an error (protein, for instance). - * - * Args: seq - the DNA or RNA sequence - * - * Returns: fractional GC composition, 0-1 - */ -static float -gc_composition(char *seq) -{ - int c; - float total; - float gc; - - gc = total = 0.; - for (; *seq != '\0'; seq++) - { - if (isgap(c)) continue; - - c = toupper((int) *seq); - total += 1.0; - - switch (c) { - case 'C': - case 'G': - case 'S': gc += 1.0; break; - - case 'A': - case 'T': - case 'U': - case 'W': gc += 0.0; break; - - case 'N': - case 'R': - case 'Y': - case 'M': - case 'K': gc += 0.5; break; - - case 'H': - case 'D': gc += 0.3333; break; - - case 'B': - case 'V': gc += 0.6667; break; - - default: - Die("unrecognized nucleic acid character %c in sequence", c); - } - } - return (gc/total); -} diff --git a/forester/archive/RIO/others/hmmer/squid/sfetch_main.c b/forester/archive/RIO/others/hmmer/squid/sfetch_main.c deleted file mode 100644 index 8cb6aa4..0000000 --- a/forester/archive/RIO/others/hmmer/squid/sfetch_main.c +++ /dev/null @@ -1,444 +0,0 @@ -/***************************************************************** - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - *****************************************************************/ - -/* sfetch_main.c, Fri Dec 25 14:22:17 1992, SRE - * - * sfetch -- a program to extract subsequences from a sequence database - * Renamed from "getseq" SRE, Tue Jan 19 10:47:42 1999 (GCG clash) - * - * CVS $Id: sfetch_main.c,v 1.1.1.1 2005/03/22 08:34:20 cmzmasek Exp $ - */ - -#include -#include -#include "squid.h" -#include "msa.h" -#include "ssi.h" - -static char banner[] = "sfetch - retrieve a specified sequence from a file"; - -static char usage[] = "\ -Usage: sfetch [-options] \n\ - or: sfetch [-options] .\n\ - (The second version fetches the first seq in the file.)\n\ - Get a sequence from a database.\n\ - Available options:\n\ - -a : name is an accession number, not a key\n\ - -d : get sequence from \n\ - -D : instead, get sequence from main database\n\ - -h : help; print version and usage info\n\ - -r : rename the fragment \n\ - -f : from which residue (1..N)\n\ - -t : to which residue (1..N)\n\ - -o : direct output to \n\ - -F : use output format of ; see below for\n\ - list. Default is original format of database.\n\ -\n\ - Available output formats include:\n\ - fasta\n\ - genbank\n\ - embl\n\ - gcg\n\ - pir\n\ - raw\n\n\ - Available databases are: (if $env variables are set correctly)\n\ - -Dsw $SWDIR SwissProt\n\ - -Dpir $PIRDIR PIR\n\ - -Dem $EMBLDIR EMBL\n\ - -Dgb $GBDIR GenBank\n\ - -Dwp $WORMDIR WormPep\n\ - -Dowl $OWLDIR OWL\n"; - -static char experts[] = "\ - --informat : specify input sequence file format \n\ -"; - -struct opt_s OPTIONS[] = { - { "-a", TRUE, sqdARG_NONE }, - { "-d", TRUE, sqdARG_STRING }, - { "-f", TRUE, sqdARG_INT }, - { "-h", TRUE, sqdARG_NONE }, - { "-o", TRUE, sqdARG_STRING }, - { "-r", TRUE, sqdARG_STRING }, - { "-t", TRUE, sqdARG_INT }, - { "-D", TRUE, sqdARG_STRING }, - { "-F", TRUE, sqdARG_STRING }, - { "--informat", FALSE, sqdARG_STRING }, -}; -#define NOPTIONS (sizeof(OPTIONS) / sizeof(struct opt_s)) - -/* dbenv maps command line database selection to an environment - * variable, from which the database directory is obtained. - */ -struct dbenv_s { - char *dbname; /* name of database, as used on command line */ - char *ssiname; /* name of GSI index file to look for */ - char *envname; /* environment var to get directory path from*/ - char *entryend; /* string signifying end of entry */ - int addend; /* TRUE if entryend line is part of entry */ -} dbenv[] = -{ - { "sw", "swiss.ssi", "SWDIR", "//", TRUE}, - { "pir", "pir.ssi", "PIRDIR", "///", TRUE}, - { "em", "embl.ssi", "EMBLDIR", "//", TRUE}, - { "gb", "genbank.ssi","GBDIR", "//", TRUE}, - { "wp", "wormpep.ssi","WORMDIR", ">", FALSE}, - { "owl", "owl.ssi", "OWLDIR", ">", FALSE}, /* use FASTA OWL version */ -}; -#define NUMDBS (sizeof(dbenv) / sizeof(struct dbenv_s)) - -int -main(int argc, char **argv) -{ - char *dbname; /* master database to search */ - char *seqfile; /* name of sequence file to read */ - char *ssifile; /* name of SSI index file (if one exists) */ - SQFILE *seqfp; /* pointer to open sequence file */ - char *getname; /* name of sequence to get from */ - int from; /* starting residue, 1..N */ - int to; /* ending residue, 1..N */ - char *outfile; /* name of file to put output to */ - FILE *outfp; /* file pointer to put output to */ - int format; /* format of seqfile */ - int outfmt; /* output format */ - char *seq; /* current working sequence */ - SQINFO sqinfo; - char *frag; /* extracted subsequence */ - int source_start; /* start of seq on original source 1..N */ - int source_stop; /* end of seq on original source 1..N */ - int source_orient; /* sign of parent: -1 revcomp, +1 normal*/ - char *ss; /* secondary structure representation */ - - SSIFILE *ssi; /* open SSI index file */ - SSIOFFSET ssi_offset; /* disk offset for locating sequence */ - int used_ssi; /* TRUE if SSI file was used (don't scan) */ - int status; /* status returned by an SSI call */ - - char *rename; /* new name to give fragment */ - int reverse_complement; /* do we have to reverse complement? */ - int getall; - int getfirst; /* TRUE to extract from the first seq, w/o looking at name */ - char *outformat; /* output format string */ - int by_accession; /* TRUE if name is accession number not key */ - - int dbidx; - - char *optname; - char *optarg; - int optind; - - /*********************************************** - * Parse the command line - ***********************************************/ - - /* initializations and defaults */ - format = SQFILE_UNKNOWN; /* autodetect default, overridden by --informat or SSI files */ - reverse_complement = 0; - getall = TRUE; - getfirst= FALSE; - dbname = NULL; - dbidx = -1; - seqfile = NULL; - from = -1; - to = -1; /* flag that says do the whole thing */ - outfile = NULL; - getname = NULL; - rename = NULL; - outformat = NULL; - by_accession = FALSE; - used_ssi = FALSE; - - while (Getopt(argc, argv, OPTIONS, NOPTIONS, usage, - &optind, &optname, &optarg)) - { - if (strcmp(optname, "-a") == 0) { by_accession = TRUE; } - else if (strcmp(optname, "-d") == 0) { seqfile = optarg; } - else if (strcmp(optname, "-f") == 0) { - from = atoi(optarg); getall = FALSE; - } - else if (strcmp(optname, "-t") == 0) { - to = atoi(optarg); getall = FALSE; - } - else if (strcmp(optname, "-r") == 0) { rename = optarg; } - else if (strcmp(optname, "-o") == 0) { outfile = optarg; } - else if (strcmp(optname, "-D") == 0) { dbname = optarg; } - else if (strcmp(optname, "-F") == 0) { outformat = optarg; } - else if (strcmp(optname, "--informat") == 0) { - format = String2SeqfileFormat(optarg); - if (format == SQFILE_UNKNOWN) - Die("unrecognized input sequence file format \"%s\"", optarg); - } - else if (strcmp(optname, "-h") == 0) { - Banner(stdout, banner); - puts(usage); - puts(experts); - exit(EXIT_SUCCESS); - } - } - - if (argc - optind != 1) - Die("Incorrect number of command line arguments.\n%s\n", usage); - - getname = argv[optind]; - if (strcmp(getname, ".") == 0) getfirst = TRUE; - - if (getfirst && seqfile == NULL) - Die("You need to specify -d to retrieve a first sequence.\n%s", - usage); - - /*********************************************** - * Get name of file to look through, and disk offset, - * using SSI file if one exists. Three possibilities: - * 1) Look in main DB, which has SSI index in the directory - * 2) Look in a file, which has associated SSI index - * 3) Look in an unindexed file - ***********************************************/ - - if (dbname != NULL && seqfile != NULL) - Die("Can't fetch from *both* a database %s and a file %s\n%s", - dbname, seqfile, usage); - if (dbname == NULL && seqfile == NULL) - { /* try to guess SwissProt, stupidly, but usually works */ - if (strchr(getname, '_') != NULL) dbname = Strdup("sw"); - else Die("You have to specify either a database or a seqfile\n%s", usage); - } - - if (dbname != NULL) /* Main database. GSI index mandatory. */ - { - char *dbdir; - char *dbfile; - int fh; - /* find which db this is */ - for (dbidx = 0; dbidx < NUMDBS; dbidx++) - if (strcmp(dbenv[dbidx].dbname, dbname) == 0) - break; - if (dbidx == NUMDBS) - Die("No such main database %s\n%s", dbname, usage); - - /* get directory name */ - if ((dbdir = getenv(dbenv[dbidx].envname)) == NULL) - Die("Environment variable %s is not set.\n%s", - dbenv[dbidx].envname, usage); - /* open ssi file */ - ssifile = (char *) MallocOrDie - ((strlen(dbdir) + strlen(dbenv[dbidx].ssiname) + 2) * sizeof(char)); - sprintf(ssifile, "%s/%s", dbdir, dbenv[dbidx].ssiname); - if ((status = SSIOpen(ssifile, &ssi)) != 0) - Die("Failed to open SSI index file %s in directory %s\n%s", - dbenv[dbidx].ssiname, dbdir, usage); - /* get seqfile name, file format, and offset */ - if ((status = SSIGetOffsetByName(ssi, getname, &fh, &ssi_offset)) != 0) - Die("Failed to find key %s in SSI file %s", getname, ssifile); - if ((status = SSIFileInfo(ssi, fh, &dbfile, &format)) != 0) - Die("SSI error: %s", SSIErrorString(status)); - free(ssifile); - /* set up proper seqfile, with path */ - seqfile = (char *) MallocOrDie - ((strlen(dbdir) + strlen(dbfile) + 2) * sizeof(char)); - sprintf(seqfile, "%s/%s", dbdir, dbfile); - used_ssi = TRUE; - SSIClose(ssi); - } - else if (! getfirst) /* Sequence file. SSI index optional. */ - { - char *dbfile; - int fh; - - ssifile = (char *) MallocOrDie ((strlen(seqfile) + 5) * sizeof(char)); - sprintf(ssifile, "%s.ssi", seqfile); - if ((status = SSIOpen(ssifile, &ssi)) == 0) - { - SQD_DPRINTF1(("Opened SSI index %s...\n", ssifile)); - if ((status = SSIGetOffsetByName(ssi, getname, &fh, &ssi_offset)) != 0) - Die("Failed to find key %s in SSI file %s", getname, ssifile); - if ((status = SSIFileInfo(ssi, fh, &dbfile, &format)) != 0) - Die("SSI error: %s", SSIErrorString(status)); - SSIClose(ssi); - used_ssi = TRUE; - } - free(ssifile); - } - - /*********************************************** - * Open database file - ***********************************************/ - - if ((seqfp = SeqfileOpen(seqfile, format, NULL)) == NULL) - Die("Failed to open sequence database file %s\n%s\n", seqfile, usage); - if (used_ssi) - SeqfilePosition(seqfp, &ssi_offset); - - /*********************************************** - * Open output file - ***********************************************/ - - /* Determine output format. Default: use same as input. Override: -F option. - */ - outfmt = seqfp->format; - if (outformat != NULL) - { - outfmt = String2SeqfileFormat(outformat); - if (outfmt == SQFILE_UNKNOWN) - Die("Unknown output format %s\n%s", outformat, usage); - if (IsAlignmentFormat(outfmt)) - Die("Can't output a single sequence in an alignment format (%s)\n", outformat); - } - /* open output file for writing; - use stdout by default */ - if (outfile == NULL) outfp = stdout; - else if ((outfp = fopen(outfile, "w")) == NULL) - Die("cannot open %s for output\n", outfile); - - - /*********************************************** - * Main loop - ***********************************************/ - - /* If this is a simple fetch of the complete sequence - * in native format, and we've been positioned in the file - * by an SSI index file, we can just read right from the file, - * partially bypassing the ReadSeq() API, and probably - * putting our fingers a little too deep into the seqfp object. - */ - if (getall && used_ssi && outfmt == format && dbname != NULL) - { - char *buf = NULL; - int buflen = 0; - int endlen; - - if (dbidx == -1) Die("That's weird. No database index available."); - endlen = strlen(dbenv[dbidx].entryend); - fputs(seqfp->buf, outfp); /* always do first line */ - /* fputs("\n", outfp); */ /* buf has its /n */ - while (sre_fgets(&buf, &buflen, seqfp->f) != NULL) - { - if (strncmp(buf, dbenv[dbidx].entryend, endlen) == 0) - { - if (dbenv[dbidx].addend) fputs(buf, outfp); - break; - } - fputs(buf, outfp); - } - if (buf != NULL) free(buf); - } - else /* else, the hard way with ReadSeq */ - { - seq = NULL; - frag = NULL; - - while (ReadSeq(seqfp, format, &seq, &sqinfo)) - { - if (used_ssi) /* GSI file puts us right on our seq. */ - break; - else if (getfirst) /* Use the first seq in the file. */ - break; - else if (by_accession && - (sqinfo.flags & SQINFO_ACC) && - strcmp(sqinfo.acc, getname) == 0) - break; - else if (strcmp(sqinfo.name, getname) == 0) - break; - - FreeSequence(seq, &sqinfo); - seq = NULL; - } - - if (seq == NULL) - Die("failed to extract the subsequence %s\n%s", getname, usage); - - if (getall) - { - from = 1; - to = sqinfo.len; - } - else if (from == -1) from = 1; - else if (to == -1) to = sqinfo.len; - - if (to > sqinfo.len || from > sqinfo.len) - Warn("Extracting beyond the length of the sequence"); - if (to < 1 || from < 1) - Warn("Extracting beyond the beginning of the sequence"); - - /* check for reverse complement */ - if (to != -1 && from > to) - { - int swapfoo; /* temp variable for swapping coords */ - - reverse_complement = TRUE; - swapfoo = from; from = to; to = swapfoo; - } - if (to > sqinfo.len) to = sqinfo.len; - if (from < 1) from = 1; - - if ((frag = (char *) calloc (to-from+2, sizeof(char))) == NULL) - Die("memory error\n"); - - if (strncpy(frag, seq+from-1, to-from+1) == NULL) - Die("strncpy() failed\n"); - - if (sqinfo.flags & SQINFO_SS) - { - if ((ss = (char *) calloc (to-from+2, sizeof(char))) == NULL) - Die("memory error\n"); - if (strncpy(ss, sqinfo.ss+from-1, to-from+1) == NULL) - Die("strncpy() failed\n"); - free(sqinfo.ss); - sqinfo.ss = ss; - } - - if (reverse_complement) - { - char *revfrag; /* temp variable for reverse complement */ - int swapfoo; /* temp variable for swapping coords back */ - - if ((revfrag = calloc ( to-from+2, sizeof(char))) == NULL) - Die("memory failure\n"); - revcomp(revfrag, frag); - free(frag); - frag = revfrag; - swapfoo = from; from = to; to = swapfoo; - - /* reverse complement nullifies secondary structure */ - if (sqinfo.flags & SQINFO_SS) - { free(sqinfo.ss); sqinfo.flags &= ~SQINFO_SS; } - } - - if (! (sqinfo.flags & SQINFO_ID)) - SetSeqinfoString(&sqinfo, sqinfo.name, SQINFO_ID); - - if (! (sqinfo.flags & SQINFO_OLEN)) - { sqinfo.olen = sqinfo.len; sqinfo.flags |= SQINFO_OLEN; } - - sqinfo.len = (to > from) ? to-from+1 : from-to+1; - sqinfo.flags |= SQINFO_LEN; - - if (rename != NULL) - SetSeqinfoString(&sqinfo, rename, SQINFO_NAME); - - source_start = (sqinfo.flags & SQINFO_START) ? sqinfo.start : 1; - source_stop = (sqinfo.flags & SQINFO_STOP) ? sqinfo.stop : sqinfo.len; - source_orient= (source_stop > source_start) ? 1 : -1; - - sqinfo.start = source_start + (from- 1) * source_orient; - sqinfo.stop = source_start + (to - 1) * source_orient; - sqinfo.flags |= SQINFO_START | SQINFO_STOP; - - WriteSeq(outfp, outfmt, frag, &sqinfo); - free(frag); - FreeSequence(seq, &sqinfo); - } - - if (outfile != NULL) - printf("Fragment written to file %s\n", outfile); - - SeqfileClose(seqfp); - fclose(outfp); - return(0); -} diff --git a/forester/archive/RIO/others/hmmer/squid/shuffle.c b/forester/archive/RIO/others/hmmer/squid/shuffle.c deleted file mode 100644 index d923a2a..0000000 --- a/forester/archive/RIO/others/hmmer/squid/shuffle.c +++ /dev/null @@ -1,550 +0,0 @@ -/***************************************************************** - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - *****************************************************************/ - -/* shuffle.c - * - * Routines for randomizing sequences. - * - * All routines are alphabet-independent (DNA, protein, RNA, whatever); - * they assume that input strings are purely alphabetical [a-zA-Z], and - * will return strings in all upper case [A-Z]. - * - * All return 1 on success, and 0 on failure; 0 status invariably - * means the input string was not alphabetical. - * - * StrShuffle() - shuffled string, preserve mono-symbol composition. - * StrDPShuffle() - shuffled string, preserve mono- and di-symbol composition. - * - * StrMarkov0() - random string, same zeroth order Markov properties. - * StrMarkov1() - random string, same first order Markov properties. - * - * StrReverse() - simple reversal of string - * StrRegionalShuffle() - mono-symbol shuffled string in regional windows - * - * There are also similar routines for shuffling alignments: - * - * AlignmentShuffle() - alignment version of StrShuffle(). - * AlignmentBootstrap() - sample with replacement; a bootstrap dataset. - * - * CVS $Id: shuffle.c,v 1.1.1.1 2005/03/22 08:34:24 cmzmasek Exp $ - */ - -#include -#include - -#include "squid.h" - -/* Function: StrShuffle() - * - * Purpose: Returns a shuffled version of s2, in s1. - * (s1 and s2 can be identical, to shuffle in place.) - * - * Args: s1 - allocated space for shuffled string. - * s2 - string to shuffle. - * - * Return: 1 on success. - */ -int -StrShuffle(char *s1, char *s2) -{ - int len; - int pos; - char c; - - if (s1 != s2) strcpy(s1, s2); - for (len = strlen(s1); len > 1; len--) - { - pos = CHOOSE(len); - c = s1[pos]; - s1[pos] = s1[len-1]; - s1[len-1] = c; - } - return 1; -} - -/* Function: StrDPShuffle() - * Date: SRE, Fri Oct 29 09:15:17 1999 [St. Louis] - * - * Purpose: Returns a shuffled version of s2, in s1. - * (s1 and s2 may be identical; i.e. a string - * may be shuffled in place.) The shuffle is a - * "doublet-preserving" (DP) shuffle. Both - * mono- and di-symbol composition are preserved. - * - * Done by searching for a random Eulerian - * walk on a directed multigraph. - * Reference: S.F. Altschul and B.W. Erickson, Mol. Biol. - * Evol. 2:526-538, 1985. Quoted bits in my comments - * are from Altschul's outline of the algorithm. - * - * Args: s1 - RETURN: the string after it's been shuffled - * (space for s1 allocated by caller) - * s2 - the string to be shuffled - * - * Returns: 0 if string can't be shuffled (it's not all [a-zA-z] - * alphabetic. - * 1 on success. - */ -int -StrDPShuffle(char *s1, char *s2) -{ - int len; - int pos; /* a position in s1 or s2 */ - int x,y; /* indices of two characters */ - char **E; /* edge lists: E[0] is the edge list from vertex A */ - int *nE; /* lengths of edge lists */ - int *iE; /* positions in edge lists */ - int n; /* tmp: remaining length of an edge list to be shuffled */ - char sf; /* last character in s2 */ - char Z[26]; /* connectivity in last edge graph Z */ - int keep_connecting; /* flag used in Z connectivity algorithm */ - int is_eulerian; /* flag used for when we've got a good Z */ - - /* First, verify that the string is entirely alphabetic. - */ - len = strlen(s2); - for (pos = 0; pos < len; pos++) - if (! isalpha(s2[pos])) return 0; - - /* "(1) Construct the doublet graph G and edge ordering E - * corresponding to S." - * - * Note that these also imply the graph G; and note, - * for any list x with nE[x] = 0, vertex x is not part - * of G. - */ - E = MallocOrDie(sizeof(char *) * 26); - nE = MallocOrDie(sizeof(int) * 26); - for (x = 0; x < 26; x++) - { - E[x] = MallocOrDie(sizeof(char) * (len-1)); - nE[x] = 0; - } - - x = toupper(s2[0]) - 'A'; - for (pos = 1; pos < len; pos++) - { - y = toupper(s2[pos]) - 'A'; - E[x][nE[x]] = y; - nE[x]++; - x = y; - } - - /* Now we have to find a random Eulerian edge ordering. - */ - sf = toupper(s2[len-1]) - 'A'; - is_eulerian = 0; - while (! is_eulerian) - { - /* "(2) For each vertex s in G except s_f, randomly select - * one edge from the s edge list of E(S) to be the - * last edge of the s list in a new edge ordering." - * - * select random edges and move them to the end of each - * edge list. - */ - for (x = 0; x < 26; x++) - { - if (nE[x] == 0 || x == sf) continue; - - pos = CHOOSE(nE[x]); - y = E[x][pos]; - E[x][pos] = E[x][nE[x]-1]; - E[x][nE[x]-1] = y; - } - - /* "(3) From this last set of edges, construct the last-edge - * graph Z and determine whether or not all of its - * vertices are connected to s_f." - * - * a probably stupid algorithm for looking at the - * connectivity in Z: iteratively sweep through the - * edges in Z, and build up an array (confusing called Z[x]) - * whose elements are 1 if x is connected to sf, else 0. - */ - for (x = 0; x < 26; x++) Z[x] = 0; - Z[(int) sf] = keep_connecting = 1; - - while (keep_connecting) { - keep_connecting = 0; - for (x = 0; x < 26; x++) - { - y = E[x][nE[x]-1]; /* xy is an edge in Z */ - if (Z[x] == 0 && Z[y] == 1) /* x is connected to sf in Z */ - { - Z[x] = 1; - keep_connecting = 1; - } - } - } - - /* if any vertex in Z is tagged with a 0, it's - * not connected to sf, and we won't have a Eulerian - * walk. - */ - is_eulerian = 1; - for (x = 0; x < 26; x++) - { - if (nE[x] == 0 || x == sf) continue; - if (Z[x] == 0) { - is_eulerian = 0; - break; - } - } - - /* "(4) If any vertex is not connected in Z to s_f, the - * new edge ordering will not be Eulerian, so return to - * (2). If all vertices are connected in Z to s_f, - * the new edge ordering will be Eulerian, so - * continue to (5)." - * - * e.g. note infinite loop while is_eulerian is FALSE. - */ - } - - /* "(5) For each vertex s in G, randomly permute the remaining - * edges of the s edge list of E(S) to generate the s - * edge list of the new edge ordering E(S')." - * - * Essentially a StrShuffle() on the remaining nE[x]-1 elements - * of each edge list; unfortunately our edge lists are arrays, - * not strings, so we can't just call out to StrShuffle(). - */ - for (x = 0; x < 26; x++) - for (n = nE[x] - 1; n > 1; n--) - { - pos = CHOOSE(n); - y = E[x][pos]; - E[x][pos] = E[x][n-1]; - E[x][n-1] = y; - } - - /* "(6) Construct sequence S', a random DP permutation of - * S, from E(S') as follows. Start at the s_1 edge list. - * At each s_i edge list, add s_i to S', delete the - * first edge s_i,s_j of the edge list, and move to - * the s_j edge list. Continue this process until - * all edge lists are exhausted." - */ - iE = MallocOrDie(sizeof(int) * 26); - for (x = 0; x < 26; x++) iE[x] = 0; - - pos = 0; - x = toupper(s2[0]) - 'A'; - while (1) - { - s1[pos++] = 'A' + x; /* add s_i to S' */ - - y = E[x][iE[x]]; - iE[x]++; /* "delete" s_i,s_j from edge list */ - - x = y; /* move to s_j edge list. */ - - if (iE[x] == nE[x]) - break; /* the edge list is exhausted. */ - } - s1[pos++] = 'A' + sf; - s1[pos] = '\0'; - - /* Reality checks. - */ - if (x != sf) Die("hey, you didn't end on s_f."); - if (pos != len) Die("hey, pos (%d) != len (%d).", pos, len); - - /* Free and return. - */ - Free2DArray((void **) E, 26); - free(nE); - free(iE); - return 1; -} - - -/* Function: StrMarkov0() - * Date: SRE, Fri Oct 29 11:08:31 1999 [St. Louis] - * - * Purpose: Returns a random string s1 with the same - * length and zero-th order Markov properties - * as s2. - * - * s1 and s2 may be identical, to randomize s2 - * in place. - * - * Args: s1 - allocated space for random string - * s2 - string to base s1's properties on. - * - * Returns: 1 on success; 0 if s2 doesn't look alphabetical. - */ -int -StrMarkov0(char *s1, char *s2) -{ - int len; - int pos; - float p[26]; /* symbol probabilities */ - - /* First, verify that the string is entirely alphabetic. - */ - len = strlen(s2); - for (pos = 0; pos < len; pos++) - if (! isalpha(s2[pos])) return 0; - - /* Collect zeroth order counts and convert to frequencies. - */ - FSet(p, 26, 0.); - for (pos = 0; pos < len; pos++) - p[(int)(toupper(s2[pos]) - 'A')] += 1.0; - FNorm(p, 26); - - /* Generate a random string using those p's. - */ - for (pos = 0; pos < len; pos++) - s1[pos] = FChoose(p, 26) + 'A'; - s1[pos] = '\0'; - - return 1; -} - - -/* Function: StrMarkov1() - * Date: SRE, Fri Oct 29 11:22:20 1999 [St. Louis] - * - * Purpose: Returns a random string s1 with the same - * length and first order Markov properties - * as s2. - * - * s1 and s2 may be identical, to randomize s2 - * in place. - * - * Args: s1 - allocated space for random string - * s2 - string to base s1's properties on. - * - * Returns: 1 on success; 0 if s2 doesn't look alphabetical. - */ -int -StrMarkov1(char *s1, char *s2) -{ - int len; - int pos; - int x,y; - int i; /* initial symbol */ - float p[26][26]; /* symbol probabilities */ - - /* First, verify that the string is entirely alphabetic. - */ - len = strlen(s2); - for (pos = 0; pos < len; pos++) - if (! isalpha(s2[pos])) return 0; - - /* Collect first order counts and convert to frequencies. - */ - for (x = 0; x < 26; x++) FSet(p[x], 26, 0.); - - i = x = toupper(s2[0]) - 'A'; - for (pos = 1; pos < len; pos++) - { - y = toupper(s2[pos]) - 'A'; - p[x][y] += 1.0; - x = y; - } - for (x = 0; x < 26; x++) - FNorm(p[x], 26); - - /* Generate a random string using those p's. - */ - x = i; - s1[0] = x + 'A'; - for (pos = 1; pos < len; pos++) - { - y = FChoose(p[x], 26); - s1[pos] = y + 'A'; - x = y; - } - s1[pos] = '\0'; - - return 1; -} - - - -/* Function: StrReverse() - * Date: SRE, Thu Nov 20 10:54:52 1997 [St. Louis] - * - * Purpose: Returns a reversed version of s2, in s1. - * (s1 and s2 can be identical, to reverse in place) - * - * Args: s1 - allocated space for reversed string. - * s2 - string to reverse. - * - * Return: 1. - */ -int -StrReverse(char *s1, char *s2) -{ - int len; - int pos; - char c; - - if (s1 != s2) strcpy(s1, s2); - len = strlen(s1); - for (pos = 0; pos < len/2; pos++) - { /* swap ends */ - c = s1[len-pos-1]; - s1[len-pos-1] = s1[pos]; - s1[pos] = c; - } - return 1; -} - -/* Function: StrRegionalShuffle() - * Date: SRE, Thu Nov 20 11:02:34 1997 [St. Louis] - * - * Purpose: Returns a regionally shuffled version of s2, in s1. - * (s1 and s2 can be identical to regionally - * shuffle in place.) See [Pearson88]. - * - * Args: s1 - allocated space for regionally shuffled string. - * s2 - string to regionally shuffle - * w - window size (typically 10 or 20) - * - * Return: 1. - */ -int -StrRegionalShuffle(char *s1, char *s2, int w) -{ - int len; - char c; - int pos; - int i, j; - - if (s1 != s2) strcpy(s1, s2); - len = strlen(s1); - - for (i = 0; i < len; i += w) - for (j = MIN(len-1, i+w-1); j > i; j--) - { - pos = i + CHOOSE(j-i); - c = s1[pos]; - s1[pos] = s1[j]; - s1[j] = c; - } - return 1; -} - - -/* Function: AlignmentShuffle() - * Date: SRE, Sun Apr 22 18:37:15 2001 [St. Louis] - * - * Purpose: Returns a shuffled version of ali2, in ali1. - * (ali1 and ali2 can be identical, to shuffle - * in place.) The alignment columns are shuffled, - * preserving % identity within the columns. - * - * Args: ali1 - allocated space for shuffled alignment - * [0..nseq-1][0..alen-1] - * ali2 - alignment to be shuffled - * nseq - number of sequences in the alignment - * alen - length of alignment, in columns. - * - * Returns: int - */ -int -AlignmentShuffle(char **ali1, char **ali2, int nseq, int alen) -{ - int i; - int pos; - char c; - - if (ali1 != ali2) - { - for (i = 0; i < nseq; i++) strcpy(ali1[i], ali2[i]); - } - - for (i = 0; i < nseq; i++) - ali1[i][alen] = '\0'; - - for (; alen > 1; alen--) - { - pos = CHOOSE(alen); - for (i = 0; i < nseq; i++) - { - c = ali1[i][pos]; - ali1[i][pos] = ali1[i][alen-1]; - ali1[i][alen-1] = c; - } - } - - return 1; -} - -/* Function: AlignmentBootstrap() - * Date: SRE, Sun Apr 22 18:49:14 2001 [St. Louis] - * - * Purpose: Returns a bootstrapped alignment sample in ali1, - * constructed from ali2 by sampling columns with - * replacement. - * - * Unlike the other shuffling routines, ali1 and - * ali2 cannot be the same. ali2 is left unchanged. - * ali1 must be a properly allocated space for an - * alignment the same size as ali2. - * - * Args: ali1 - allocated space for bootstrapped alignment - * [0..nseq-1][0..alen-1] - * ali2 - alignment to be bootstrapped - * nseq - number of sequences in the alignment - * alen - length of alignment, in columns. - * - * Returns: 1 on success. - */ -int -AlignmentBootstrap(char **ali1, char **ali2, int nseq, int alen) -{ - int pos; - int col; - int i; - - for (pos = 0; pos < alen; pos++) - { - col = CHOOSE(alen); - for (i = 0; i < nseq; i++) - ali1[i][pos] = ali2[i][col]; - } - for (i = 0; i < nseq; i++) - ali1[i][alen] = '\0'; - - return 1; -} - - - - -#ifdef TESTDRIVER -/* - * cc -g -o testdriver -DTESTDRIVER -L. shuffle.c -lsquid -lm - */ -int -main(int argc, char **argv) -{ - char s1[100]; - char s2[100]; - - sre_srandom(42); - strcpy(s2, "GGGGGGGGGGCCCCCCCCCC"); - /* strcpy(s2, "AGACATAAAGTTCCGTACTGCCGGGAT"); - */ - StrDPShuffle(s1, s2); - printf("DPshuffle: %s\n", s1); - StrMarkov0(s1,s2); - printf("Markov 0 : %s\n", s1); - StrMarkov1(s1,s2); - printf("Markov 1 : %s\n", s1); - return 0; -} -#endif diff --git a/forester/archive/RIO/others/hmmer/squid/shuffle_main.c b/forester/archive/RIO/others/hmmer/squid/shuffle_main.c deleted file mode 100644 index 34be923..0000000 --- a/forester/archive/RIO/others/hmmer/squid/shuffle_main.c +++ /dev/null @@ -1,281 +0,0 @@ -/***************************************************************** - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - *****************************************************************/ - -/* main for shuffle - * - * shuffle - generate shuffled sequences - * Mon Feb 26 16:56:08 1996 - * - * CVS $Id: shuffle_main.c,v 1.1.1.1 2005/03/22 08:34:16 cmzmasek Exp $ - */ - -#include -#include -#include -#include "squid.h" - -char banner[] = "shuffle - generated shuffled (or otherwise randomized) sequence"; - -char usage[] = "\ -Usage: shuffle [-options] \n\ - Available options:\n\ - -h : help; print version and usage info\n\ - -n : make samples per input seq (default 1)\n\ - -t : truncate/delete inputs to fixed length \n\ -\n\ - Default: shuffle each input randomly, preserving mono-symbol composition.\n\ - Other choices (exclusive; can't use more than one) :\n\ - -d : shuffle but preserve both mono- and di-symbol composition\n\ - -0 : generate with same 0th order Markov properties as each input\n\ - -1 : generate with same 1st order Markov properties as each input\n\ - -l : make iid sequences of same number and length as inputs\n\ - -r : reverse inputs\n\ - -w : regionally shuffle inputs in window size \n\ - -i : make [-n] iid seqs of length [-t] of type [--dna|--amino];\n\ - when -i is set, no argument is used\n\ -"; - -char experts[] = "\ - --alignment : is an alignment; shuffle the columns\n\ - --amino : synthesize protein sequences [default] (see -i, -l)\n\ - --dna : synthesize DNA sequences (see -i, -l))\n\ - --informat : specify sequence file format \n\ - --nodesc : remove sequence description lines\n\ - --seed : set random number seed to \n\ -"; - -struct opt_s OPTIONS[] = { - { "-0", TRUE, sqdARG_NONE }, /* 0th order Markov */ - { "-1", TRUE, sqdARG_NONE }, /* 1st order Markov */ - { "-d", TRUE, sqdARG_NONE }, /* digram shuffle */ - { "-h", TRUE, sqdARG_NONE }, /* help */ - { "-i", TRUE, sqdARG_NONE }, /* make iid seq of set length */ - { "-l", TRUE, sqdARG_NONE }, /* make iid seq of same length */ - { "-n", TRUE, sqdARG_INT }, /* number of shuffles per input seq */ - { "-r", TRUE, sqdARG_NONE }, /* reverse seq rather than shuffle */ - { "-t", TRUE, sqdARG_INT }, /* truncation of inputs to fixed len */ - { "-w", TRUE, sqdARG_INT }, /* do regional shuffling */ - { "--alignment",FALSE, sqdARG_NONE }, /* input is alignment; shuff cols */ - { "--amino", FALSE, sqdARG_NONE }, /* make iid protein seqs [default]*/ - { "--dna", FALSE, sqdARG_NONE }, /* make iid DNA seqs */ - { "--informat", FALSE, sqdARG_STRING }, /* remove desc lines */ - { "--nodesc", FALSE, sqdARG_NONE }, /* remove desc lines */ - { "--seed", FALSE, sqdARG_INT }, /* set the random number seed */ -}; -#define NOPTIONS (sizeof(OPTIONS) / sizeof(struct opt_s)) - -static void shuffle_alignment_file(char *afile, int fmt); - -int -main(int argc, char **argv) -{ - char *seqfile; /* name of sequence file */ - SQFILE *dbfp; /* open sequence file */ - int fmt; /* format of seqfile */ - char *seq; /* sequence */ - char sqname[32]; /* name of an iid sequence */ - SQINFO sqinfo; /* additional sequence info */ - char *shuff; /* shuffled sequence */ - int num; /* number to generate */ - int seed; /* random number generator seed */ - int i; - int w; /* window size for regional shuffle (or 0) */ - int truncation; /* fixed length for truncation option (or 0) */ - int no_desc; /* TRUE to remove description lines */ - enum { /* shuffling strategy */ - DO_SHUFFLE, DO_DPSHUFFLE, DO_MARKOV0, DO_MARKOV1, DO_REVERSE, DO_REGIONAL, - DO_IID_SAMELEN, DO_IID_FIXEDLEN} strategy; - int do_dna; /* TRUE to make DNA iid seqs, not protein */ - int do_alignment; /* TRUE to shuffle alignment columns */ - - char *optname; /* option name */ - char *optarg; /* option argument (or NULL) */ - int optind; /* index of next argv[] */ - - - /*********************************************** - * Parse command line - ***********************************************/ - - fmt = SQFILE_UNKNOWN; /* autodetect file format by default */ - num = 0; - seed = (int) time ((time_t *) NULL); - w = 0; - truncation = 0; - strategy = DO_SHUFFLE; - no_desc = FALSE; - do_dna = FALSE; - do_alignment = FALSE; - - while (Getopt(argc, argv, OPTIONS, NOPTIONS, usage, - &optind, &optname, &optarg)) - { - if (strcmp(optname, "-0") == 0) strategy = DO_MARKOV0; - else if (strcmp(optname, "-1") == 0) strategy = DO_MARKOV1; - else if (strcmp(optname, "-d") == 0) strategy = DO_DPSHUFFLE; - else if (strcmp(optname, "-n") == 0) num = atoi(optarg); - else if (strcmp(optname, "-w") == 0) {strategy = DO_REGIONAL; w = atoi(optarg); } - else if (strcmp(optname, "-i") == 0) strategy = DO_IID_FIXEDLEN; - else if (strcmp(optname, "-l") == 0) strategy = DO_IID_SAMELEN; - else if (strcmp(optname, "-r") == 0) strategy = DO_REVERSE; - else if (strcmp(optname, "-t") == 0) truncation = atoi(optarg); - - else if (strcmp(optname, "--alignment")== 0) do_alignment = TRUE; - else if (strcmp(optname, "--amino") == 0) do_dna = FALSE; - else if (strcmp(optname, "--dna") == 0) do_dna = TRUE; - else if (strcmp(optname, "--nodesc") == 0) no_desc = TRUE; - else if (strcmp(optname, "--seed") == 0) seed = atoi(optarg); - else if (strcmp(optname, "--informat") == 0) { - fmt = String2SeqfileFormat(optarg); - if (fmt == SQFILE_UNKNOWN) - Die("unrecognized sequence file format \"%s\"", optarg); - } - else if (strcmp(optname, "-h") == 0) { - Banner(stdout, banner); - puts(usage); - puts(experts); - exit(EXIT_SUCCESS); - } - } - - /***************************************************************** - * Special case, 1: IID sequence generation. - * -i option is special, because it synthesizes, rather than - * shuffles. Doesn't take a seqfile argument; - * requires -n, -t; and doesn't use the same code logic as the - * other shuffling strategies. Note that we misuse/overload the - * -t "truncation length" option to set our fixed length for - * generating iid sequence. - *****************************************************************/ - - if (strategy == DO_IID_FIXEDLEN) { - if (num == 0 || truncation == 0) - Die("-i (i.i.d. sequence generation) requires -n,-t to be set\n%s\n", - usage); - if (argc-optind != 0) - Die("-i (i.i.d. sequence generation) takes no seqfile argument\n%s\n", - usage); - sre_srandom(seed); - for (i = 0; i < num; i++) - { - if (do_dna) - shuff = RandomSequence(DNA_ALPHABET, dnafq, 4, truncation); - else - shuff = RandomSequence(AMINO_ALPHABET, aafq, 20, truncation); - - /* pedantic note: sqname has room for 31 char + \0, so - * there's room for 24 digits - a 32-bit integer can only run up - * to 10 digits, and a 64-bit integer to 20, so we don't worry - * about the following sprintf() overrunning its bounds. - */ - sprintf(sqname, "randseq%d", i); - WriteSimpleFASTA(stdout, shuff, sqname, NULL); - free(shuff); - } - return 0; - } - - /***************************************************************** - * Check command line - *****************************************************************/ - - if (argc - optind != 1) - Die("Incorrect number of command line arguments\n%s\n", usage); - seqfile = argv[optind]; - if (num == 0) num = 1; /* set default shuffle number per sequence */ - sre_srandom(seed); - - /***************************************************************** - * Special case, 2: Alignment shuffling - *****************************************************************/ - if (do_alignment) - { - shuffle_alignment_file(seqfile, fmt); - return 0; - } - - /***************************************************************** - * Main logic of the shuffling program: - * expect one seqfile argument - *****************************************************************/ - - if ((dbfp = SeqfileOpen(seqfile, fmt, NULL)) == NULL) - Die("Failed to open sequence file %s for reading", seqfile); - - while (ReadSeq(dbfp, dbfp->format, &seq, &sqinfo)) - { - shuff = (char *) MallocOrDie ((sqinfo.len + 1) * sizeof(char)); - - if (no_desc) strcpy(sqinfo.desc, ""); - - /* If we're truncating seq, do it now. - */ - if (truncation > 0) - { - int start; - if (sqinfo.len < truncation) { - free(shuff); - FreeSequence(seq, &sqinfo); - continue; - } - - start = CHOOSE(sqinfo.len - truncation + 1); - strncpy(shuff, seq+start, truncation); - shuff[truncation] = '\0'; - strcpy(seq, shuff); - sqinfo.len = truncation; - } - - for (i = 0; i < num; i++) - { - switch (strategy) { - case DO_SHUFFLE: StrShuffle(shuff, seq); break; - case DO_DPSHUFFLE: StrDPShuffle(shuff, seq); break; - case DO_MARKOV0: StrMarkov0(shuff, seq); break; - case DO_MARKOV1: StrMarkov1(shuff, seq); break; - case DO_REVERSE: StrReverse(shuff, seq); break; - case DO_REGIONAL: StrRegionalShuffle(shuff, seq, w); break; - case DO_IID_SAMELEN: - free(shuff); - shuff = RandomSequence(AMINO_ALPHABET, aafq, 20, sqinfo.len); - break; - default: Die("choked on a bad enum; tragic."); - } - - WriteSeq(stdout, SQFILE_FASTA, shuff, &sqinfo); - } - - if (shuff != NULL) free(shuff); - FreeSequence(seq, &sqinfo); - } - - SeqfileClose(dbfp); - return 0; -} - - -static void -shuffle_alignment_file(char *afile, int fmt) -{ - MSAFILE *afp; - MSA *msa; - - if ((afp = MSAFileOpen(afile, fmt, NULL)) == NULL) - Die("Alignment file %s could not be opened for reading", afile); - while ((msa = MSAFileRead(afp)) != NULL) - { - /* shuffle in place */ - AlignmentShuffle(msa->aseq, msa->aseq, msa->nseq, msa->alen); - /* write in same format we read in */ - MSAFileWrite(stdout, msa, afp->format, FALSE); - MSAFree(msa); - } - MSAFileClose(afp); -} diff --git a/forester/archive/RIO/others/hmmer/squid/sindex_main.c b/forester/archive/RIO/others/hmmer/squid/sindex_main.c deleted file mode 100644 index ad2faca..0000000 --- a/forester/archive/RIO/others/hmmer/squid/sindex_main.c +++ /dev/null @@ -1,185 +0,0 @@ -/***************************************************************** - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - *****************************************************************/ - -/* sindex_main.c, SRE, Fri Feb 16 08:38:39 2001 [St. Louis] - * - * sindex -- create SSI index of sequence file(s) for sfetch - * - * CVS $Id: sindex_main.c,v 1.1.1.1 2005/03/22 08:34:31 cmzmasek Exp $ - */ - -#include -#include "squid.h" -#include "msa.h" -#include "ssi.h" - -static char banner[] = "sindex - create SSI index of sequence file(s) for sfetch"; - -static char usage[] = "\ -Usage: sindex [-options] ...\n\ - Available options:\n\ - -h : help; print version and usage info.\n\ - -o : output the SSI index to file named \n\ -"; - -static char experts[] = "\ - --informat : specify input sequence file format \n\ -"; - -struct opt_s OPTIONS[] = { - { "-h", TRUE, sqdARG_NONE }, - { "-o", TRUE, sqdARG_STRING }, - { "--64", FALSE< sqdARG_NONE }, - { "--informat", FALSE, sqdARG_STRING }, -}; -#define NOPTIONS (sizeof(OPTIONS) / sizeof(struct opt_s)) - -int -main(int argc, char **argv) -{ - char *file; /* name of a sequence file */ - SQFILE *sfp; /* open sequence file */ - int format; /* forced sequence file format, if any */ - int mode; /* SSI_OFFSET_I32 or SSI_OFFSET_I64 */ - int idx; /* counter over files */ - int status; /* return status from an SSI call */ - SSIINDEX *ssi; /* the index we're creating */ - char *ssifile; /* file name for the SSI index */ - int fh; /* handle on current file */ - char *seq; /* a sequence read from the file */ - SQINFO sqinfo; /* info on the sequence */ - - char *optname; - char *optarg; - int optind; - - /*********************************************** - * Parse the command line - ***********************************************/ - - /* initializations and defaults */ - format = SQFILE_UNKNOWN; /* autodetecting format is the default */ - mode = SSI_OFFSET_I32; /* default = 32 bit mode */ - ssifile = NULL; /* default: set SSI file name as .ssi */ - - while (Getopt(argc, argv, OPTIONS, NOPTIONS, usage, - &optind, &optname, &optarg)) - { - if (strcmp(optname, "--64") == 0) mode = SSI_OFFSET_I64; - else if (strcmp(optname, "-o") == 0) ssifile = sre_strdup(optarg, -1); - else if (strcmp(optname, "--informat") == 0) { - format = String2SeqfileFormat(optarg); - if (format == SQFILE_UNKNOWN) - Die("unrecognized input sequence file format \"%s\"", optarg); - } - else if (strcmp(optname, "-h") == 0) { - Banner(stdout, banner); - puts(usage); - puts(experts); - exit(EXIT_SUCCESS); - } - } - - if (argc - optind < 1) - Die("Incorrect number of command line arguments.\n%s\n", usage); - - - /***************************************************************** - * Get set up... - *****************************************************************/ - - /* Determine whether we'll index in 32-bit or 64-bit mode. - * 32-bit is default, but 64-bit trumps; if any file needs 64-bit, - * we index them all that way. - */ - for (idx = optind; idx < argc; idx++) - { - file = argv[idx]; - if ((status = SSIRecommendMode(file)) == -1) - Die("Couldn't stat %s - file doesn't exist, or is too big", file); - if (status == SSI_OFFSET_I64) mode = SSI_OFFSET_I64; - } - - if (ssifile == NULL) { - ssifile = sre_strdup(file, -1); - sre_strcat(&ssifile, -1, ".ssi", -1); - } - - if ((ssi = SSICreateIndex(mode)) == NULL) - Die("Couldn't allocate/initialize the new SSI index\n"); - - /***************************************************************** - * Go through the files one at a time and compile index. - *****************************************************************/ - - for (idx = optind; idx < argc; idx++) - { - file = argv[idx]; - printf("Working on file %s... \t", file); - fflush(stdout); - - if ((sfp = SeqfileOpenForIndexing(file, format, NULL, mode)) == NULL) - Die("Failed to open sequence file %s for reading", file); - - if ((status = SSIAddFileToIndex(ssi, file, sfp->format, &fh)) != 0) - Die("SSI error: %s\n", SSIErrorString(status)); - - while (ReadSeq(sfp, sfp->format, &seq, &sqinfo)) { - if ((status = SSIAddPrimaryKeyToIndex(ssi, sqinfo.name, fh, - &(sfp->r_off), &(sfp->d_off), - sqinfo.len)) != 0) - Die("SSI error: %s\n", SSIErrorString(status)); - -#if DEBUGLEVEL >= 2 - if (mode == SSI_OFFSET_I32) - SQD_DPRINTF2(("Added primary key %s: r_off=%d, d_off=%d len=%d\n", - sqinfo.name, sfp->r_off.off.i32, - sfp->d_off.off.i32, sqinfo.len)); - else - SQD_DPRINTF2(("Added primary key %s: r_off=%lld, d_off=%lld len=%d\n", - sqinfo.name, sfp->r_off.off.i64, sfp->d_off.off.i64, - sqinfo.len)); -#endif - - if (sqinfo.flags & SQINFO_ACC) { - if ((status = SSIAddSecondaryKeyToIndex(ssi, sqinfo.acc, sqinfo.name)) != 0) - Die("SSI error: %s\n", SSIErrorString(status)); - } - - FreeSequence(seq, &sqinfo); - } - if (sfp->bpl > 0 && sfp->rpl > 0) { - if ((status = SSISetFileForSubseq(ssi, fh, sfp->bpl, sfp->rpl)) != 0) - Die("SSI error: %s\n", SSIErrorString(status)); - printf("FAST_SUBSEQ set...\t"); - } - - SeqfileClose(sfp); - printf("[done]\n"); - } - - printf("Sorting and writing index to SSI file %s...\t", ssifile); - fflush(stdout); - if ((status = SSIWriteIndex(ssifile, ssi)) != 0) - Die("SSIWriteIndex() failed: %s", SSIErrorString(status)); - printf("[done]\n"); - - printf("%s:\n", ssifile); - printf("Mode: %s\n", - mode == SSI_OFFSET_I32 ? "32-bit" : "64-bit"); - printf("Files: %d\n", ssi->nfiles); - printf("Primary keys: %d\n", ssi->nprimary); - printf("Secondary keys: %d\n", ssi->nsecondary); - - SSIFreeIndex(ssi); - - free(ssifile); - return 0; -} diff --git a/forester/archive/RIO/others/hmmer/squid/sqerror.c b/forester/archive/RIO/others/hmmer/squid/sqerror.c deleted file mode 100644 index ee6a32e..0000000 --- a/forester/archive/RIO/others/hmmer/squid/sqerror.c +++ /dev/null @@ -1,95 +0,0 @@ -/***************************************************************** - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - *****************************************************************/ - -/* sqerror.c - * - * error handling for the squid library - * RCS $Id: sqerror.c,v 1.1.1.1 2005/03/22 08:34:26 cmzmasek Exp $ - */ - - /* a global errno equivalent */ -int squid_errno; - -#include -#include -#include - -#ifdef MEMDEBUG -#include "dbmalloc.h" -#endif - -/* Function: Die() - * - * Purpose: Print an error message and die. The arguments - * are formatted exactly like arguments to printf(). - * - * Return: None. Exits the program. - */ -/* VARARGS0 */ -void -Die(char *format, ...) -{ - va_list argp; - /* format the error mesg */ - fprintf(stderr, "\nFATAL: "); - va_start(argp, format); - vfprintf(stderr, format, argp); - va_end(argp); - fprintf(stderr, "\n"); - fflush(stderr); - /* exit */ - exit(1); -} - - - -/* Function: Warn() - * - * Purpose: Print an error message and return. The arguments - * are formatted exactly like arguments to printf(). - * - * Return: (void) - */ -/* VARARGS0 */ -void -Warn(char *format, ...) -{ - va_list argp; - /* format the error mesg */ - fprintf(stderr, "WARNING: "); - va_start(argp, format); - vfprintf(stderr, format, argp); - va_end(argp); - fprintf(stderr, "\n"); - fflush(stderr); -} - -/* Function: Panic() - * - * Purpose: Die from a lethal error that's not my problem, - * but instead a failure of a StdC/POSIX call that - * shouldn't fail. Call perror() to get the - * errno flag, then die. - * - * Usually called by the PANIC macro which adds - * the __FILE__ and __LINE__ information; see - * structs.h. - * - * Inspired by code in Donald Lewine's book, _POSIX - * Programmer's Guide_. - */ -void -Panic(char *file, int line) -{ - (void) fprintf(stderr, "\nPANIC [%s line %d] ", file, line); - (void) perror("Unusual error"); - exit(EXIT_FAILURE); -} - diff --git a/forester/archive/RIO/others/hmmer/squid/sqfuncs.h b/forester/archive/RIO/others/hmmer/squid/sqfuncs.h deleted file mode 100644 index 4b5ef3c..0000000 --- a/forester/archive/RIO/others/hmmer/squid/sqfuncs.h +++ /dev/null @@ -1,293 +0,0 @@ -/***************************************************************** - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - *****************************************************************/ - -#ifndef SQFUNCSH_INCLUDED -#define SQFUNCSH_INCLUDED -/* sqfuncs.h - * - * Prototypes for squid library functions; - * also makes a good reference list for what the package contains. - * - * Warning: squid is a slowly evolving beast. Some functions are - * obsolete. Some functions are probably just wrong, dating to - * a primordial era before I knew anything about what I was doing. - * Some functions are both obsolete and wrong but still necessary - * to get legacy code to compile. - * - * RCS $Id: sqfuncs.h,v 1.1.1.1 2005/03/22 08:34:30 cmzmasek Exp $ - */ - -/* - * from aligneval.c - */ -extern float ComparePairAlignments(char *known1, char *known2, char *calc1, char *calc2); -extern float CompareRefPairAlignments(int *ref, char *known1, char *known2, char *calc1, char *calc2); -extern float CompareMultAlignments(char **kseqs, char **tseqs, int N); -extern float CompareRefMultAlignments(int *ref, char **kseqs, char **tseqs, int N); -extern float PairwiseIdentity(char *s1, char *s2); -extern float AlignmentIdentityBySampling(char **aseq, int L, int N, int nsample); -extern char *MajorityRuleConsensus(char **aseq, int nseq, int alen); - -/* - * from alignio.c - */ -extern void AllocAlignment(int nseq, int alen, char ***ret_aseq, AINFO *ainfo); -extern void InitAinfo(AINFO *ainfo); -extern void FreeAlignment(char **aseqs, AINFO *ainfo); -extern void SAMizeAlignment(char **aseq, int nseq, int alen); -extern void SAMizeAlignmentByGapFrac(char **aseq, int nseq, int alen, float maxgap); -extern int MakeAlignedString(char *aseq, int alen, char *ss, char **ret_s); -extern int MakeDealignedString(char *aseq, int alen, char *ss, char **ret_s); -extern int DealignedLength(char *aseq); -extern int WritePairwiseAlignment(FILE *ofp, char *aseq1, char *name1, int spos1, - char *aseq2, char *name2, int spos2, - int **pam, int indent); -extern int MingapAlignment(char **aseqs, AINFO *ainfo); -extern int RandomAlignment(char **rseqs, SQINFO *sqinfo, int nseq, float pop, float pex, - char ***ret_aseqs, AINFO *ainfo); -extern void AlignmentHomogenousGapsym(char **aseq, int nseq, int alen, char gapsym); - -/* from cluster.c - */ -extern int Cluster(float **mx, int N, enum clust_strategy mode, struct phylo_s **ret_tree); -extern struct phylo_s *AllocPhylo(int N); -extern void FreePhylo(struct phylo_s *tree, int N); -extern void MakeDiffMx(char **aseqs, int num, float ***ret_dmx); -extern void MakeIdentityMx(char **aseqs, int num, float ***ret_imx); -extern void PrintNewHampshireTree(FILE *fp, AINFO *ainfo, struct phylo_s *tree, int N); -extern void PrintPhylo(FILE *fp, AINFO *ainfo, struct phylo_s *tree, int N); - -/* - * from dayhoff.c - */ -extern int ParsePAMFile(FILE *fp, int ***ret_pam, float *ret_scale); -extern void ScalePAM(int **pam, int scale); - - -/* from file.c - */ -extern char *FileDirname(char *filename); -extern char *FileTail(char *file, int noextension); -extern char *FileConcat(char *dir, char *file); -extern char *FileAddSuffix(char *filename, char *sfx); -extern FILE *EnvFileOpen(char *fname, char *env, char **ret_dir); -extern int FileExists(char *filename); - - -/* from getopt.c - */ -extern int Getopt(int argc, char **argv, - struct opt_s *opt, int nopts, char *usage, - int *ret_optind, char **ret_optname, char **ret_optarg); - - -/* from hsregex.c - * Henry Spencer's regex() code - */ -extern int Strparse(char *rexp, char *s, int ntok); -extern void SqdClean(void); -extern sqd_regexp *sqd_regcomp(const char *re); -extern int sqd_regexec(sqd_regexp *rp, const char *s); -extern void sqd_regsub(const sqd_regexp *rp, const char *src, char *dst); -extern void sqd_regerror(char *message); - -/* from interleaved.c - */ -extern int IsInterleavedFormat(int format); -extern int ReadInterleaved(char *seqfile, - int (*skip_header)(FILE *), - int (*parse_header)(FILE *, AINFO *), - int (*is_dataline)(char *, char *), - char ***ret_aseqs, AINFO *ainfo); -extern int ReadAlignment(char *seqfile, int format, char ***ret_aseqs, AINFO *ainfo); - - -/* from revcomp.c - */ -extern char *revcomp(char *comp, char *seq); - -/* - * from selex.c - */ -extern int DealignAseqs(char **aseqs, int num, char ***ret_rseqs); -extern int IsSELEXFormat(char *filename); -extern int TruncateNames(char **names, int N); /* OBSOLETE? */ - -/* - * from seqencode.c - */ -extern int seqcmp(char *s1, char *s2, int allow); -extern int seqncmp(char *s1, char *s2, int n, int allow); -extern int seqencode(char *codeseq,char *str); -extern int coded_revcomp(char *comp, char *seq); -extern int seqdecode(char *str, char *codeseq); -extern int seqndecode(char *str, char *codeseq, int n); - -/* - * from shuffle.c - */ -extern int StrShuffle(char *s1, char *s2); -extern int StrDPShuffle(char *s1, char *s2); -extern int StrMarkov0(char *s1, char *s2); -extern int StrMarkov1(char *s1, char *s2); -extern int StrReverse(char *s1, char *s2); -extern int StrRegionalShuffle(char *s1, char *s2, int w); -extern int AlignmentShuffle(char **ali1, char **ali2, int nseq, int alen); -extern int AlignmentBootstrap(char **ali1, char **ali2, int nseq, int alen); - -/* - * from sqerror.c - */ -extern void Die(char *format, ...); -extern void Warn(char *format, ...); -extern void Panic(char *file, int line); - - -/* - * from sqio.c - */ -extern void FreeSequence(char *seq, SQINFO *sqinfo); -extern int SetSeqinfoString(SQINFO *sqinfo, char *sptr, int flag); -extern void SeqinfoCopy(SQINFO *sq1, SQINFO *sq2); -extern void ToDNA(char *seq); -extern void ToRNA(char *seq); -extern void ToIUPAC(char *seq); -extern int ReadMultipleRseqs(char *seqfile, int fformat, char ***ret_rseqs, - SQINFO **ret_sqinfo, int *ret_num); -extern SQFILE *SeqfileOpen(char *filename, int format, char *env); -extern SQFILE *SeqfileOpenForIndexing(char *filename, int format, char *env, int ssimode); -extern int SeqfileFormat(FILE *fp); -extern void SeqfilePosition(SQFILE *sfp, SSIOFFSET *offset); -extern void SeqfileRewind(SQFILE *sfp); -extern void SeqfileClose(SQFILE *sfp); - -extern int ReadSeq(SQFILE *fp, int format, char **ret_seq, SQINFO *sqinfo); -extern int GCGBinaryToSequence(char *seq, int len); -extern int GCGchecksum(char *seq, int seqlen); -extern int GCGMultchecksum(char **seqs, int nseq); -extern void WriteSimpleFASTA(FILE *fp, char *seq, char *name, char *desc); -extern int WriteSeq(FILE *outf, int outfmt, char *seq, SQINFO *sqinfo); -extern int Seqtype(char *seq); -extern int GuessAlignmentSeqtype(char **aseq, int nseq); -extern int String2SeqfileFormat(char *s); -extern char *SeqfileFormat2String(int code); -extern SQINFO *MSAToSqinfo(MSA *msa); - -/* from squidcore.c - */ -extern void Banner(FILE *fp, char *banner); - - -/* from sre_ctype.c - */ -extern int sre_tolower(int c); -extern int sre_toupper(int c); - -/* from sre_math.c - */ -extern float ExponentialRandom(void); -extern float Gaussrandom(float mean, float stddev); -extern int Linefit(float *x, float *y, int N, - float *ret_a, float *ret_b, float *ret_r); -extern void WeightedLinefit(float *x, float *y, float *var, int N, - float *ret_m, float *ret_b); -extern double Gammln(double xx); -extern int DNorm(double *vec, int n); -extern int FNorm(float *vec, int n); -extern void DScale(double *vec, int n, double scale); -extern void FScale(float *vec, int n, float scale); -extern void DSet(double *vec, int n, double value); -extern void FSet(float *vec, int n, float value); -extern double DSum(double *vec, int n); -extern float FSum(float *vec, int n); -extern void DAdd(double *vec1, double *vec2, int n); -extern void FAdd(float *vec1, float *vec2, int n); -extern void DCopy(double *vec1, double *vec2, int n); -extern void FCopy(float *vec1, float *vec2, int n); -extern int DMax(double *vec, int n); -extern int FMax(float *vec, int n); -extern double DDot(double *vec1, double *vec2, int n); -extern float FDot(float *vec1, float *vec2, int n); -extern float **FMX2Alloc(int rows, int cols); -extern void FMX2Free(float **mx); -extern double **DMX2Alloc(int rows, int cols); -extern void DMX2Free(double **mx); -extern void FMX2Multiply(float **A, float **B, float **C, int m, int p, int n); -extern float sre_random(void); -extern void sre_srandom(int seed); -extern int DChoose(double *p, int n); -extern int FChoose(float *p, int n); -extern double DLogSum(double *logp, int n); -extern float FLogSum(float *logp, int n); -extern double IncompleteGamma(double a, double x); - -/* from sre_string.c - */ -#ifdef NOSTR -extern char *strstr(char *s, char *subs); -#endif -extern char *Strdup(char *s); -extern void StringChop(char *s); -extern int Strinsert(char *s1, char c, int pos); -extern int Strdelete(char *s1, int pos); -extern void s2lower(char *s); -extern void s2upper(char *s); -extern void *sre_malloc(char *file, int line, size_t size); -extern void *sre_realloc(char *file, int line, void *p, size_t size); -extern void Free2DArray(void **p, int dim1); -extern void Free3DArray(void ***p, int dim1, int dim2); -extern char *RandomSequence(char *alphabet, float *p, int n, int len); -extern char *sre_fgets(char **buf, int *n, FILE *fp); -extern int sre_strcat(char **dest, int ldest, char *src, int lsrc); -extern char *sre_strtok(char **s, char *delim, int *len); -extern char *sre_strdup(char *s, int n); -extern char *sre_strncat(char *s1, char *s2, int n); -extern int IsBlankline(char *s); - -/* from stack.c - */ -extern struct intstack_s *InitIntStack(void); -extern void PushIntStack(struct intstack_s *stack, int data); -extern int PopIntStack(struct intstack_s *stack, int *ret_data); -extern void ReverseIntStack(struct intstack_s *stack); -extern int FreeIntStack( struct intstack_s *stack ); - -/* - * from translate.c - */ -extern char *Translate(char *seq, char **code); - -/* - * from types.c - */ -extern int IsInt(char *s); -extern int IsReal(char *s); -extern void Byteswap(char *swap, int nbytes); -#ifndef USE_HOST_BYTESWAP_FUNCTIONS -extern sqd_uint16 sre_ntoh16(sqd_uint16 netshort); -extern sqd_uint32 sre_ntoh32(sqd_uint32 netlong); -extern sqd_uint16 sre_hton16(sqd_uint16 hostshort); -extern sqd_uint32 sre_hton32(sqd_uint32 hostlong); -#endif /*!USE_HOST_BYTESWAP_FUNCTIONS*/ -extern sqd_uint64 sre_ntoh64(sqd_uint64 net_int64); -extern sqd_uint64 sre_hton64(sqd_uint64 host_int64); - -/* - * from weight.c - */ -extern void GSCWeights(char **aseq, int nseq, int alen, float *wgt); -extern void VoronoiWeights(char **aseq, int nseq, int alen, float *wgt); -extern void BlosumWeights(char **aseq, int nseq, int alen, float blosumlevel, float *wgt); -extern void PositionBasedWeights(char **aseq, int nseq, int alen, float *wgt); -extern void FilterAlignment(MSA *msa, float cutoff, MSA **ret_new); -extern void SampleAlignment(MSA *msa, int sample, MSA **ret_new); -extern void SingleLinkCluster(char **aseq, int nseq, int alen, float maxid, - int **ret_c, int *ret_nc); -#endif /* SQFUNCSH_INCLUDED */ diff --git a/forester/archive/RIO/others/hmmer/squid/sqio.c b/forester/archive/RIO/others/hmmer/squid/sqio.c deleted file mode 100644 index 4192f59..0000000 --- a/forester/archive/RIO/others/hmmer/squid/sqio.c +++ /dev/null @@ -1,1901 +0,0 @@ -/***************************************************************** - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - *****************************************************************/ - -/* File: sqio.c - * From: ureadseq.c in Don Gilbert's sequence i/o package - * - * Reads and writes nucleic/protein sequence in various - * formats. Data files may have multiple sequences. - * - * Heavily modified from READSEQ package - * Copyright (C) 1990 by D.G. Gilbert - * Biology Dept., Indiana University, Bloomington, IN 47405 - * email: gilbertd@bio.indiana.edu - * Thanks Don! - * - * SRE: Modifications as noted. Fri Jul 3 09:44:54 1992 - * Packaged for squid, Thu Oct 1 10:07:11 1992 - * ANSI conversion in full swing, Mon Jul 12 12:22:21 1993 - * - * CVS $Id: sqio.c,v 1.1.1.1 2005/03/22 08:34:29 cmzmasek Exp $ - * - ***************************************************************** - * Basic API for single sequence reading: - * - * SQFILE *sqfp; - * char *seqfile; - * int format; - see squid.h for formats; example: SQFILE_FASTA - * char *seq; - * SQINFO *sqinfo; - * - * if ((sqfp = SeqfileOpen(seqfile, format, "BLASTDB")) == NULL) - * Die("Failed to open sequence database file %s\n%s\n", seqfile, usage); - * while (ReadSeq(sqfp, sqfp->format, &seq, &sqinfo)) { - * do_stuff; - * FreeSequence(seq, &sqinfo); - * } - * SeqfileClose(sqfp); - * - ***************************************************************** - */ - -#include -#include -#include -#include - -#ifndef SEEK_SET -#include -#endif - -#include "squid.h" -#include "msa.h" -#include "ssi.h" - -static void SeqfileGetLine(SQFILE *V); - -#define kStartLength 500 - -static char *aminos = "ABCDEFGHIKLMNPQRSTVWXYZ*"; -static char *primenuc = "ACGTUN"; -static char *protonly = "EFIPQZ"; - -static SQFILE *seqfile_open(char *filename, int format, char *env, int ssimode); - -/* Function: SeqfileOpen() - * - * Purpose : Open a sequence database file and prepare for reading - * sequentially. - * - * Args: filename - name of file to open - * format - format of file - * env - environment variable for path (e.g. BLASTDB) - * ssimode - -1, SSI_OFFSET_I32, or SSI_OFFSET_I64 - * - * Returns opened SQFILE ptr, or NULL on failure. - */ -SQFILE * -SeqfileOpen(char *filename, int format, char *env) -{ - return seqfile_open(filename, format, env, -1); -} -SQFILE * -SeqfileOpenForIndexing(char *filename, int format, char *env, int ssimode) -{ - return seqfile_open(filename, format, env, ssimode); -} -static SQFILE * -seqfile_open(char *filename, int format, char *env, int ssimode) -{ - SQFILE *dbfp; - - dbfp = (SQFILE *) MallocOrDie (sizeof(SQFILE)); - - dbfp->ssimode = ssimode; - dbfp->rpl = -1; /* flag meaning "unset" */ - dbfp->lastrpl = 0; - dbfp->maxrpl = 0; - dbfp->bpl = -1; /* flag meaning "unset" */ - dbfp->lastbpl = 0; - dbfp->maxbpl = 0; - - /* Open our file handle. - * Three possibilities: - * 1. normal file open - * 2. filename = "-"; read from stdin - * 3. filename = "*.gz"; read thru pipe from gzip - * If we're reading from stdin or a pipe, we can't reliably - * back up, so we can't do two-pass parsers like the interleaved alignment - * formats. - */ - if (strcmp(filename, "-") == 0) - { - dbfp->f = stdin; - dbfp->do_stdin = TRUE; - dbfp->do_gzip = FALSE; - dbfp->fname = sre_strdup("[STDIN]", -1); - } -#ifndef SRE_STRICT_ANSI - /* popen(), pclose() aren't portable to non-POSIX systems; disable */ - else if (Strparse("^.*\\.gz$", filename, 0)) - { - char cmd[256]; - - /* Note that popen() will return "successfully" - * if file doesn't exist, because gzip works fine - * and prints an error! So we have to check for - * existence of file ourself. - */ - if (! FileExists(filename)) - Die("%s: file does not exist", filename); - - if (strlen(filename) + strlen("gzip -dc ") >= 256) - Die("filename > 255 char in SeqfileOpen()"); - sprintf(cmd, "gzip -dc %s", filename); - if ((dbfp->f = popen(cmd, "r")) == NULL) - return NULL; - - dbfp->do_stdin = FALSE; - dbfp->do_gzip = TRUE; - dbfp->fname = sre_strdup(filename, -1); - } -#endif /*SRE_STRICT_ANSI*/ - else - { - if ((dbfp->f = fopen(filename, "r")) == NULL && - (dbfp->f = EnvFileOpen(filename, env, NULL)) == NULL) - return NULL; - - dbfp->do_stdin = FALSE; - dbfp->do_gzip = FALSE; - dbfp->fname = sre_strdup(filename, -1); - } - - - /* Invoke autodetection if we haven't already been told what - * to expect. - */ - if (format == SQFILE_UNKNOWN) - { - if (dbfp->do_stdin == TRUE || dbfp->do_gzip) - Die("Can't autodetect sequence file format from a stdin or gzip pipe"); - format = SeqfileFormat(dbfp->f); - if (format == SQFILE_UNKNOWN) - Die("Can't determine format of sequence file %s", dbfp->fname); - } - - /* The hack for sequential access of an interleaved alignment file: - * read the alignment in, we'll copy sequences out one at a time. - */ - dbfp->msa = NULL; - dbfp->afp = NULL; - dbfp->format = format; - dbfp->linenumber = 0; - dbfp->buf = NULL; - dbfp->buflen = 0; - if (IsAlignmentFormat(format)) - { - /* We'll be reading from the MSA interface. Copy our data - * to the MSA afp's structure. - */ - dbfp->afp = MallocOrDie(sizeof(MSAFILE)); - dbfp->afp->f = dbfp->f; /* just a ptr, don't close */ - dbfp->afp->do_stdin = dbfp->do_stdin; - dbfp->afp->do_gzip = dbfp->do_gzip; - dbfp->afp->fname = dbfp->fname; /* just a ptr, don't free */ - dbfp->afp->format = dbfp->format; /* e.g. format */ - dbfp->afp->linenumber = dbfp->linenumber; /* e.g. 0 */ - dbfp->afp->buf = NULL; - dbfp->afp->buflen = 0; - - if ((dbfp->msa = MSAFileRead(dbfp->afp)) == NULL) - Die("Failed to read any alignment data from file %s", dbfp->fname); - /* hack: overload/reuse msa->lastidx; indicates - next seq to return upon a ReadSeq() call */ - dbfp->msa->lastidx = 0; - - return dbfp; - } - - /* Load the first line. - */ - SeqfileGetLine(dbfp); - return dbfp; -} - -/* Function: SeqfilePosition() - * - * Purpose: Move to a particular offset in a seqfile. - * Will not work on alignment files. - */ -void -SeqfilePosition(SQFILE *sqfp, SSIOFFSET *offset) -{ - if (sqfp->do_stdin || sqfp->do_gzip || IsAlignmentFormat(sqfp->format)) - Die("SeqfilePosition() failed: in a nonrewindable data file or stream"); - - if (SSISetFilePosition(sqfp->f, offset) != 0) - Die("SSISetFilePosition failed, but that shouldn't happen."); - SeqfileGetLine(sqfp); -} - - -/* Function: SeqfileRewind() - * - * Purpose: Set a sequence file back to the first sequence. - * - * Won't work on alignment files. Although it would - * seem that it could (just set msa->lastidx back to 0), - * that'll fail on "multiple multiple" alignment file formats - * (e.g. Stockholm). - */ -void -SeqfileRewind(SQFILE *sqfp) -{ - if (sqfp->do_stdin || sqfp->do_gzip) - Die("SeqfileRewind() failed: in a nonrewindable data file or stream"); - - rewind(sqfp->f); - SeqfileGetLine(sqfp); -} - -/* Function: SeqfileLineParameters() - * Date: SRE, Thu Feb 15 17:00:41 2001 [St. Louis] - * - * Purpose: After all the sequences have been read from the file, - * but before closing it, retrieve overall bytes-per-line and - * residues-per-line info. If non-zero, these mean that - * the file contains homogeneous sequence line lengths (except - * the last line in each record). - * - * If either of bpl or rpl is determined to be inhomogeneous, - * both are returned as 0. - * - * Args: *sqfp - an open but fully read sequence file - * ret_bpl - RETURN: bytes per line, or 0 if inhomogeneous - * ret_rpl - RETURN: residues per line, or 0 if inhomogenous. - * - * Returns: void - */ -void -SeqfileLineParameters(SQFILE *V, int *ret_bpl, int *ret_rpl) -{ - if (V->rpl > 0 && V->maxrpl == V->rpl && - V->bpl > 0 && V->maxbpl == V->bpl) { - *ret_bpl = V->bpl; - *ret_rpl = V->rpl; - } else { - *ret_bpl = 0; - *ret_rpl = 0; - } -} - - -void -SeqfileClose(SQFILE *sqfp) -{ - /* note: don't test for sqfp->msa being NULL. Now that - * we're holding afp open and allowing access to multi-MSA - * databases (e.g. Stockholm format, Pfam), msa ends - * up being NULL when we run out of alignments. - */ - if (sqfp->afp != NULL) { - if (sqfp->msa != NULL) MSAFree(sqfp->msa); - if (sqfp->afp->buf != NULL) free(sqfp->afp->buf); - free(sqfp->afp); - } -#ifndef SRE_STRICT_ANSI /* gunzip functionality only on POSIX systems */ - if (sqfp->do_gzip) pclose(sqfp->f); -#endif - else if (! sqfp->do_stdin) fclose(sqfp->f); - if (sqfp->buf != NULL) free(sqfp->buf); - if (sqfp->fname != NULL) free(sqfp->fname); - free(sqfp); -} - - -/* Function: SeqfileGetLine() - * Date: SRE, Tue Jun 22 09:15:49 1999 [Sanger Centre] - * - * Purpose: read a line from a sequence file into V->buf - * If the fgets() is NULL, sets V->buf[0] to '\0'. - * - * Args: V - * - * Returns: void - */ -static void -SeqfileGetLine(SQFILE *V) -{ - if (V->ssimode >= 0) - if (0 != SSIGetFilePosition(V->f, V->ssimode, &(V->ssioffset))) - Die("SSIGetFilePosition() failed"); - if (sre_fgets(&(V->buf), &(V->buflen), V->f) == NULL) - *(V->buf) = '\0'; - V->linenumber++; -} - - -void -FreeSequence(char *seq, SQINFO *sqinfo) -{ - if (seq != NULL) free(seq); - if (sqinfo->flags & SQINFO_SS) free(sqinfo->ss); - if (sqinfo->flags & SQINFO_SA) free(sqinfo->sa); -} - -int -SetSeqinfoString(SQINFO *sqinfo, char *sptr, int flag) -{ - int len; - int pos; - - /* silently ignore NULL. */ - if (sptr == NULL) return 1; - - while (*sptr == ' ') sptr++; /* ignore leading whitespace */ - for (pos = strlen(sptr)-1; pos >= 0; pos--) - if (! isspace((int) sptr[pos])) break; - sptr[pos+1] = '\0'; /* ignore trailing whitespace */ - - switch (flag) { - case SQINFO_NAME: - if (*sptr != '-') - { - strncpy(sqinfo->name, sptr, SQINFO_NAMELEN-1); - sqinfo->name[SQINFO_NAMELEN-1] = '\0'; - sqinfo->flags |= SQINFO_NAME; - } - break; - - case SQINFO_ID: - if (*sptr != '-') - { - strncpy(sqinfo->id, sptr, SQINFO_NAMELEN-1); - sqinfo->id[SQINFO_NAMELEN-1] = '\0'; - sqinfo->flags |= SQINFO_ID; - } - break; - - case SQINFO_ACC: - if (*sptr != '-') - { - strncpy(sqinfo->acc, sptr, SQINFO_NAMELEN-1); - sqinfo->acc[SQINFO_NAMELEN-1] = '\0'; - sqinfo->flags |= SQINFO_ACC; - } - break; - - case SQINFO_DESC: - if (*sptr != '-') - { - if (sqinfo->flags & SQINFO_DESC) /* append? */ - { - len = strlen(sqinfo->desc); - if (len < SQINFO_DESCLEN-2) /* is there room? */ - { - strncat(sqinfo->desc, " ", SQINFO_DESCLEN-1-len); len++; - strncat(sqinfo->desc, sptr, SQINFO_DESCLEN-1-len); - } - } - else /* else copy */ - strncpy(sqinfo->desc, sptr, SQINFO_DESCLEN-1); - sqinfo->desc[SQINFO_DESCLEN-1] = '\0'; - sqinfo->flags |= SQINFO_DESC; - } - break; - - case SQINFO_START: - if (!IsInt(sptr)) { squid_errno = SQERR_FORMAT; return 0; } - sqinfo->start = atoi(sptr); - if (sqinfo->start != 0) sqinfo->flags |= SQINFO_START; - break; - - case SQINFO_STOP: - if (!IsInt(sptr)) { squid_errno = SQERR_FORMAT; return 0; } - sqinfo->stop = atoi(sptr); - if (sqinfo->stop != 0) sqinfo->flags |= SQINFO_STOP; - break; - - case SQINFO_OLEN: - if (!IsInt(sptr)) { squid_errno = SQERR_FORMAT; return 0; } - sqinfo->olen = atoi(sptr); - if (sqinfo->olen != 0) sqinfo->flags |= SQINFO_OLEN; - break; - - default: - Die("Invalid flag %d to SetSeqinfoString()", flag); - } - return 1; -} - -void -SeqinfoCopy(SQINFO *sq1, SQINFO *sq2) -{ - sq1->flags = sq2->flags; - if (sq2->flags & SQINFO_NAME) strcpy(sq1->name, sq2->name); - if (sq2->flags & SQINFO_ID) strcpy(sq1->id, sq2->id); - if (sq2->flags & SQINFO_ACC) strcpy(sq1->acc, sq2->acc); - if (sq2->flags & SQINFO_DESC) strcpy(sq1->desc, sq2->desc); - if (sq2->flags & SQINFO_LEN) sq1->len = sq2->len; - if (sq2->flags & SQINFO_START) sq1->start = sq2->start; - if (sq2->flags & SQINFO_STOP) sq1->stop = sq2->stop; - if (sq2->flags & SQINFO_OLEN) sq1->olen = sq2->olen; - if (sq2->flags & SQINFO_TYPE) sq1->type = sq2->type; - if (sq2->flags & SQINFO_SS) sq1->ss = Strdup(sq2->ss); - if (sq2->flags & SQINFO_SA) sq1->sa = Strdup(sq2->sa); -} - -/* Function: ToDNA() - * - * Purpose: Convert a sequence to DNA. - * U --> T - */ -void -ToDNA(char *seq) -{ - for (; *seq != '\0'; seq++) - { - if (*seq == 'U') *seq = 'T'; - else if (*seq == 'u') *seq = 't'; - } -} - -/* Function: ToRNA() - * - * Purpose: Convert a sequence to RNA. - * T --> U - */ -void -ToRNA(char *seq) -{ - for (; *seq != '\0'; seq++) - { - if (*seq == 'T') *seq = 'U'; - else if (*seq == 't') *seq = 'u'; - } -} - - -/* Function: ToIUPAC() - * - * Purpose: Convert X's, o's, other junk in a nucleic acid sequence to N's, - * to comply with IUPAC code. Does allow gap characters - * though, so we can call ToIUPAC() on aligned seqs. - * - * WU-BLAST's pressdb will - * choke on X's, for instance, necessitating conversion - * of certain genome centers' data. - */ -void -ToIUPAC(char *seq) -{ - for (; *seq != '\0'; seq++) - if (strchr(NUCLEOTIDES, *seq) == NULL && ! isgap(*seq)) *seq = 'N'; -} - - -/* Function: addseq() - * - * Purpose: Add a line of sequence to the growing string in V. - * Skip all nonalphabetic characters in the input string: - * in particular, spaces and digits (coordinates). This - * allows us to generically read sequence data from most - * any format. - */ -static void -addseq(char *s, struct ReadSeqVars *V) -{ - char *s0; - char *sq; - int rpl; /* valid residues per line */ - int bpl; /* characters per line */ - - if (V->ssimode == -1) - { /* Normal mode: keeping the seq */ - /* Make sure we have enough room. We know that s is <= buflen, - * so just make sure we've got room for a whole new buflen worth - * of sequence. - */ - if (V->seqlen + V->buflen > V->maxseq) { - V->maxseq += MAX(V->buflen, kStartLength); - V->seq = ReallocOrDie (V->seq, V->maxseq+1); - } - - s0 = s; - sq = V->seq + V->seqlen; - while (*s != 0) { - if (isalpha((int) *s)) { - *sq = *s; - sq++; - } - s++; - } - V->seqlen = sq - V->seq; - } - else /* else: indexing mode, discard the seq */ - { - s0 = s; - rpl = 0; - while (*s != 0) { - if (isalpha((int) *s)) rpl++; - s++; - } - V->seqlen += rpl; - bpl = s - s0; - - /* Keep track of the global rpl, bpl for the file. - * This is overly complicated because we have to - * allow the last line of each record (e.g. the last addseq() call - * on each sequence) to have a different length - and sometimes - * we'll have one-line sequence records, too. Thus we only - * do something with the global V->rpl when we have *passed over* - * a line - we keep the last line's rpl in last_rpl. And because - * a file might consist entirely of single-line records, we keep - * a third guy, maxrpl, that tells us the maximum rpl of any line - * in the file. If we reach the end of file and rpl is still unset, - * we'll set it to maxrpl. If we reach eof and rpl is set, but is - * less than maxrpl, that's a weird case where a last line in some - * record is longer than every other line. - */ - if (V->rpl != 0) { /* 0 means we already know rpl is invalid */ - if (V->lastrpl > 0) { /* we're on something that's not the first line */ - if (V->rpl > 0 && V->lastrpl != V->rpl) V->rpl = 0; - else if (V->rpl == -1) V->rpl = V->lastrpl; - } - V->lastrpl = rpl; - if (rpl > V->maxrpl) V->maxrpl = rpl; /* make sure we check max length of final lines */ - } - if (V->bpl != 0) { /* 0 means we already know bpl is invalid */ - if (V->lastbpl > 0) { /* we're on something that's not the first line */ - if (V->bpl > 0 && V->lastbpl != V->bpl) V->bpl = 0; - else if (V->bpl == -1) V->bpl = V->lastbpl; - } - V->lastbpl = bpl; - if (bpl > V->maxbpl) V->maxbpl = bpl; /* make sure we check max length of final lines */ - } - } /* end of indexing mode of addseq(). */ - -} - -static void -readLoop(int addfirst, int (*endTest)(char *,int *), struct ReadSeqVars *V) -{ - int addend = 0; - int done = 0; - - V->seqlen = 0; - V->lastrpl = V->lastbpl = 0; - if (addfirst) { - if (V->ssimode >= 0) V->d_off = V->ssioffset; - addseq(V->buf, V); - } else if (V->ssimode >= 0) - if (0 != SSIGetFilePosition(V->f, V->ssimode, &(V->d_off))) - Die("SSIGetFilePosition() failed"); - - do { - SeqfileGetLine(V); - /* feof() alone is a bug; files not necessarily \n terminated */ - if (*(V->buf) == '\0' && feof(V->f)) - done = TRUE; - done |= (*endTest)(V->buf, &addend); - if (addend || !done) - addseq(V->buf, V); - } while (!done); -} - - -static int -endPIR(char *s, int *addend) -{ - *addend = 0; - if ((strncmp(s, "///", 3) == 0) || - (strncmp(s, "ENTRY", 5) == 0)) - return 1; - else - return 0; -} - -static void -readPIR(struct ReadSeqVars *V) -{ - char *sptr; - /* load first line of entry */ - while (!feof(V->f) && strncmp(V->buf, "ENTRY", 5) != 0) { - SeqfileGetLine(V); - } - if (feof(V->f)) return; - if (V->ssimode >= 0) V->r_off = V->ssioffset; - - if ((sptr = strtok(V->buf + 15, "\n\t ")) != NULL) - { - SetSeqinfoString(V->sqinfo, sptr, SQINFO_NAME); - SetSeqinfoString(V->sqinfo, sptr, SQINFO_ID); - } - do { - SeqfileGetLine(V); - if (!feof(V->f) && strncmp(V->buf, "TITLE", 5) == 0) - SetSeqinfoString(V->sqinfo, V->buf+15, SQINFO_DESC); - else if (!feof(V->f) && strncmp(V->buf, "ACCESSION", 9) == 0) - { - if ((sptr = strtok(V->buf+15, " \t\n")) != NULL) - SetSeqinfoString(V->sqinfo, sptr, SQINFO_ACC); - } - } while (! feof(V->f) && (strncmp(V->buf,"SEQUENCE", 8) != 0)); - SeqfileGetLine(V); /* skip next line, coords */ - - readLoop(0, endPIR, V); - - /* reading a real PIR-CODATA database file, we keep the source coords - */ - V->sqinfo->start = 1; - V->sqinfo->stop = V->seqlen; - V->sqinfo->olen = V->seqlen; - V->sqinfo->flags |= SQINFO_START | SQINFO_STOP | SQINFO_OLEN; - - /* get next line - */ - while (!feof(V->f) && strncmp(V->buf, "ENTRY", 5) != 0) { - SeqfileGetLine(V); - } -} - - - -static int -endIG(char *s, int *addend) -{ - *addend = 1; /* 1 or 2 occur in line w/ bases */ - return((strchr(s,'1')!=NULL) || (strchr(s,'2')!=NULL)); -} - -static void -readIG(struct ReadSeqVars *V) -{ - char *nm; - /* position past ';' comments */ - do { - SeqfileGetLine(V); - } while (! (feof(V->f) || ((*V->buf != 0) && (*V->buf != ';')) )); - - if (!feof(V->f)) - { - if ((nm = strtok(V->buf, "\n\t ")) != NULL) - SetSeqinfoString(V->sqinfo, nm, SQINFO_NAME); - - readLoop(0, endIG, V); - } - - while (!(feof(V->f) || ((*V->buf != '\0') && (*V->buf == ';')))) - SeqfileGetLine(V); -} - -static int -endStrider(char *s, int *addend) -{ - *addend = 0; - return (strstr( s, "//") != NULL); -} - -static void -readStrider(struct ReadSeqVars *V) -{ - char *nm; - - while ((!feof(V->f)) && (*V->buf == ';')) - { - if (strncmp(V->buf,"; DNA sequence", 14) == 0) - { - if ((nm = strtok(V->buf+16, ",\n\t ")) != NULL) - SetSeqinfoString(V->sqinfo, nm, SQINFO_NAME); - } - SeqfileGetLine(V); - } - - if (! feof(V->f)) - readLoop(1, endStrider, V); - - /* load next line - */ - while ((!feof(V->f)) && (*V->buf != ';')) - SeqfileGetLine(V); -} - - -static int -endGB(char *s, int *addend) -{ - *addend = 0; - return ((strstr(s,"//") != NULL) || (strstr(s,"LOCUS") == s)); -} - -static void -readGenBank(struct ReadSeqVars *V) -{ - char *sptr; - int in_definition; - - while (strncmp(V->buf, "LOCUS", 5) != 0) { - SeqfileGetLine(V); - } - if (V->ssimode >= 0) V->r_off = V->ssioffset; - - if ((sptr = strtok(V->buf+12, "\n\t ")) != NULL) - { - SetSeqinfoString(V->sqinfo, sptr, SQINFO_NAME); - SetSeqinfoString(V->sqinfo, sptr, SQINFO_ID); - } - - in_definition = FALSE; - while (! feof(V->f)) - { - SeqfileGetLine(V); - if (! feof(V->f) && strstr(V->buf, "DEFINITION") == V->buf) - { - if ((sptr = strtok(V->buf+12, "\n")) != NULL) - SetSeqinfoString(V->sqinfo, sptr, SQINFO_DESC); - in_definition = TRUE; - } - else if (! feof(V->f) && strstr(V->buf, "ACCESSION") == V->buf) - { - if ((sptr = strtok(V->buf+12, "\n\t ")) != NULL) - SetSeqinfoString(V->sqinfo, sptr, SQINFO_ACC); - in_definition = FALSE; - } - else if (strncmp(V->buf,"ORIGIN", 6) != 0) - { - if (in_definition) - SetSeqinfoString(V->sqinfo, V->buf, SQINFO_DESC); - } - else - break; - } - - readLoop(0, endGB, V); - - /* reading a real GenBank database file, we keep the source coords - */ - V->sqinfo->start = 1; - V->sqinfo->stop = V->seqlen; - V->sqinfo->olen = V->seqlen; - V->sqinfo->flags |= SQINFO_START | SQINFO_STOP | SQINFO_OLEN; - - - while (!(feof(V->f) || ((*V->buf!=0) && (strstr(V->buf,"LOCUS") == V->buf)))) - SeqfileGetLine(V); - /* SRE: V->s now holds "//", so sequential - reads are wedged: fixed Tue Jul 13 1993 */ - while (!feof(V->f) && strstr(V->buf, "LOCUS ") != V->buf) - SeqfileGetLine(V); -} - -static int -endGCGdata(char *s, int *addend) -{ - *addend = 0; - return (*s == '>'); -} - -static void -readGCGdata(struct ReadSeqVars *V) -{ - int binary = FALSE; /* whether data are binary or not */ - int blen = 0; /* length of binary sequence */ - - /* first line contains ">>>>" followed by name */ - if (Strparse(">>>>([^ ]+) .+2BIT +Len: ([0-9]+)", V->buf, 2)) - { - binary = TRUE; - SetSeqinfoString(V->sqinfo, sqd_parse[1], SQINFO_NAME); - blen = atoi(sqd_parse[2]); - } - else if (Strparse(">>>>([^ ]+) .+ASCII +Len: [0-9]+", V->buf, 1)) - SetSeqinfoString(V->sqinfo, sqd_parse[1], SQINFO_NAME); - else - Die("bogus GCGdata format? %s", V->buf); - - /* second line contains free text description */ - SeqfileGetLine(V); - SetSeqinfoString(V->sqinfo, V->buf, SQINFO_DESC); - - if (binary) { - /* allocate for blen characters +3... (allow for 3 bytes of slop) */ - if (blen >= V->maxseq) { - V->maxseq = blen; - if ((V->seq = (char *) realloc (V->seq, sizeof(char)*(V->maxseq+4)))==NULL) - Die("malloc failed"); - } - /* read (blen+3)/4 bytes from file */ - if (fread(V->seq, sizeof(char), (blen+3)/4, V->f) < (size_t) ((blen+3)/4)) - Die("fread failed"); - V->seqlen = blen; - /* convert binary code to seq */ - GCGBinaryToSequence(V->seq, blen); - } - else readLoop(0, endGCGdata, V); - - while (!(feof(V->f) || ((*V->buf != 0) && (*V->buf == '>')))) - SeqfileGetLine(V); -} - -static int -endPearson(char *s, int *addend) -{ - *addend = 0; - return(*s == '>'); -} - -static void -readPearson(struct ReadSeqVars *V) -{ - char *sptr; - - if (V->ssimode >= 0) V->r_off = V->ssioffset; - - if (*V->buf != '>') - Die("\ -File %s does not appear to be in FASTA format at line %d.\n\ -You may want to invoke the Babelfish to autodetect your file's format.\n\ -Usually this is done with a -B option.\n", - V->fname, V->linenumber); - - if ((sptr = strtok(V->buf+1, "\n\t ")) != NULL) - SetSeqinfoString(V->sqinfo, sptr, SQINFO_NAME); - if ((sptr = strtok(NULL, "\n")) != NULL) - SetSeqinfoString(V->sqinfo, sptr, SQINFO_DESC); - - readLoop(0, endPearson, V); - - while (!(feof(V->f) || ((*V->buf != 0) && (*V->buf == '>')))) { - SeqfileGetLine(V); - } -} - - -static int -endEMBL(char *s, int *addend) -{ - *addend = 0; - /* Some people (Berlin 5S rRNA database, f'r instance) use - * an extended EMBL format that attaches extra data after - * the sequence -- watch out for that. We use the fact that - * real EMBL sequence lines begin with five spaces. - * - * We can use this as the sole end test because readEMBL() will - * advance to the next ID line before starting to read again. - */ - return (strncmp(s," ",5) != 0); -/* return ((strstr(s,"//") != NULL) || (strstr(s,"ID ") == s)); */ -} - -static void -readEMBL(struct ReadSeqVars *V) -{ - char *sptr; - - /* make sure we have first line */ - while (!feof(V->f) && strncmp(V->buf, "ID ", 4) != 0) { - SeqfileGetLine(V); - } - if (V->ssimode >= 0) V->r_off = V->ssioffset; - - if ((sptr = strtok(V->buf+5, "\n\t ")) != NULL) - { - SetSeqinfoString(V->sqinfo, sptr, SQINFO_NAME); - SetSeqinfoString(V->sqinfo, sptr, SQINFO_ID); - } - - do { - SeqfileGetLine(V); - if (!feof(V->f) && strstr(V->buf, "AC ") == V->buf) - { - if ((sptr = strtok(V->buf+5, "; \t\n")) != NULL) - SetSeqinfoString(V->sqinfo, sptr, SQINFO_ACC); - } - else if (!feof(V->f) && strstr(V->buf, "DE ") == V->buf) - { - if ((sptr = strtok(V->buf+5, "\n")) != NULL) - SetSeqinfoString(V->sqinfo, sptr, SQINFO_DESC); - } - } while (! feof(V->f) && strncmp(V->buf,"SQ",2) != 0); - - readLoop(0, endEMBL, V); - - /* Hack for Staden experiment files: convert - to N - */ - if (V->ssimode == -1) /* if we're in ssi mode, we're not keeping the seq */ - for (sptr = V->seq; *sptr != '\0'; sptr++) - if (*sptr == '-') *sptr = 'N'; - - /* reading a real EMBL database file, we keep the source coords - */ - V->sqinfo->start = 1; - V->sqinfo->stop = V->seqlen; - V->sqinfo->olen = V->seqlen; - V->sqinfo->flags |= SQINFO_START | SQINFO_STOP | SQINFO_OLEN; - - /* load next record's ID line */ - while (!feof(V->f) && strncmp(V->buf, "ID ", 4) != 0) { - SeqfileGetLine(V); - } - -} - - -static int -endZuker(char *s, int *addend) -{ - *addend = 0; - return( *s == '(' ); -} - -static void -readZuker(struct ReadSeqVars *V) -{ - char *sptr; - - SeqfileGetLine(V); /*s == "seqLen seqid string..."*/ - - if ((sptr = strtok(V->buf+6, " \t\n")) != NULL) - SetSeqinfoString(V->sqinfo, sptr, SQINFO_NAME); - - if ((sptr = strtok(NULL, "\n")) != NULL) - SetSeqinfoString(V->sqinfo, sptr, SQINFO_DESC); - - readLoop(0, endZuker, V); - - while (!(feof(V->f) | ((*V->buf != '\0') & (*V->buf == '(')))) - SeqfileGetLine(V); -} - -static void -readUWGCG(struct ReadSeqVars *V) -{ - char *si; - char *sptr; - int done; - - V->seqlen = 0; - - /*writeseq: " %s Length: %d (today) Check: %d ..\n" */ - /*drop above or ".." from id*/ - if ((si = strstr(V->buf," Length: ")) != NULL) *si = 0; - else if ((si = strstr(V->buf,"..")) != NULL) *si = 0; - - if ((sptr = strtok(V->buf, "\n\t ")) != NULL) - SetSeqinfoString(V->sqinfo, sptr, SQINFO_NAME); - - do { - done = feof(V->f); - SeqfileGetLine(V); - if (! done) addseq(V->buf, V); - } while (!done); -} - - -/* Function: ReadSeq() - * - * Purpose: Read next sequence from an open database file. - * Return the sequence and associated info. - * - * Args: fp - open sequence database file pointer - * format - format of the file (previously determined - * by call to SeqfileFormat()). - * Currently unused, since we carry it in V. - * ret_seq - RETURN: sequence - * sqinfo - RETURN: filled in w/ other information - * - * Limitations: uses squid_errno, so it's not threadsafe. - * - * Return: 1 on success, 0 on failure. - * ret_seq and some field of sqinfo are allocated here, - * The preferred call mechanism to properly free the memory is: - * - * SQINFO sqinfo; - * char *seq; - * - * ReadSeq(fp, format, &seq, &sqinfo); - * ... do something... - * FreeSequence(seq, &sqinfo); - */ -int -ReadSeq(SQFILE *V, int format, char **ret_seq, SQINFO *sqinfo) -{ - int gotuw; - - squid_errno = SQERR_OK; - - /* Here's the hack for sequential access of sequences from - * the multiple sequence alignment formats - */ - if (IsAlignmentFormat(V->format)) - { - if (V->msa->lastidx >= V->msa->nseq) - { /* out of data. try to read another alignment */ - MSAFree(V->msa); - if ((V->msa = MSAFileRead(V->afp)) == NULL) - return 0; - V->msa->lastidx = 0; - } - /* copy and dealign the appropriate aligned seq */ - MakeDealignedString(V->msa->aseq[V->msa->lastidx], V->msa->alen, - V->msa->aseq[V->msa->lastidx], &(V->seq)); - V->seqlen = strlen(V->seq); - - /* Extract sqinfo stuff for this sequence from the msa. - * Tedious; code that should be cleaned. - */ - sqinfo->flags = 0; - if (V->msa->sqname[V->msa->lastidx] != NULL) - SetSeqinfoString(sqinfo, V->msa->sqname[V->msa->lastidx], SQINFO_NAME); - if (V->msa->sqacc != NULL && V->msa->sqacc[V->msa->lastidx] != NULL) - SetSeqinfoString(sqinfo, V->msa->sqacc[V->msa->lastidx], SQINFO_ACC); - if (V->msa->sqdesc != NULL && V->msa->sqdesc[V->msa->lastidx] != NULL) - SetSeqinfoString(sqinfo, V->msa->sqdesc[V->msa->lastidx], SQINFO_DESC); - if (V->msa->ss != NULL && V->msa->ss[V->msa->lastidx] != NULL) { - MakeDealignedString(V->msa->aseq[V->msa->lastidx], V->msa->alen, - V->msa->ss[V->msa->lastidx], &(sqinfo->ss)); - sqinfo->flags |= SQINFO_SS; - } - if (V->msa->sa != NULL && V->msa->sa[V->msa->lastidx] != NULL) { - MakeDealignedString(V->msa->aseq[V->msa->lastidx], V->msa->alen, - V->msa->sa[V->msa->lastidx], &(sqinfo->sa)); - sqinfo->flags |= SQINFO_SA; - } - V->msa->lastidx++; - } - else { - if (feof(V->f)) return 0; - - if (V->ssimode == -1) { /* normal mode */ - V->seq = (char*) calloc (kStartLength+1, sizeof(char)); - V->maxseq = kStartLength; - } else { /* index mode: discarding seq */ - V->seq = NULL; - V->maxseq = 0; - } - V->seqlen = 0; - V->sqinfo = sqinfo; - V->sqinfo->flags = 0; - - switch (V->format) { - case SQFILE_IG : readIG(V); break; - case SQFILE_STRIDER : readStrider(V); break; - case SQFILE_GENBANK : readGenBank(V); break; - case SQFILE_FASTA : readPearson(V); break; - case SQFILE_EMBL : readEMBL(V); break; - case SQFILE_ZUKER : readZuker(V); break; - case SQFILE_PIR : readPIR(V); break; - case SQFILE_GCGDATA : readGCGdata(V); break; - - case SQFILE_GCG : - do { /* skip leading comments on GCG file */ - gotuw = (strstr(V->buf,"..") != NULL); - if (gotuw) readUWGCG(V); - SeqfileGetLine(V); - } while (! feof(V->f)); - break; - - case SQFILE_IDRAW: /* SRE: no attempt to read idraw postscript */ - default: - squid_errno = SQERR_FORMAT; - free(V->seq); - return 0; - } - if (V->seq != NULL) /* (it can be NULL in indexing mode) */ - V->seq[V->seqlen] = 0; /* stick a string terminator on it */ - } - - /* Cleanup - */ - sqinfo->len = V->seqlen; - sqinfo->flags |= SQINFO_LEN; - *ret_seq = V->seq; - if (squid_errno == SQERR_OK) return 1; else return 0; -} - -/* Function: SeqfileFormat() - * Date: SRE, Tue Jun 22 10:58:58 1999 [Sanger Centre] - * - * Purpose: Determine format of an open file. - * Returns format code. - * Rewinds the file. - * - * Autodetects the following unaligned formats: - * SQFILE_FASTA - * SQFILE_GENBANK - * SQFILE_EMBL - * SQFILE_GCG - * SQFILE_GCGDATA - * SQFILE_PIR - * Also autodetects the following alignment formats: - * MSAFILE_STOCKHOLM - * MSAFILE_MSF - * MSAFILE_CLUSTAL - * MSAFILE_SELEX - * MSAFILE_PHYLIP - * - * Can't autodetect MSAFILE_A2M, calls it SQFILE_FASTA. - * MSAFileFormat() does the opposite. - * - * Args: sfp - open SQFILE - * - * Return: format code, or SQFILE_UNKNOWN if unrecognized - */ -int -SeqfileFormat(FILE *fp) -{ - char *buf; - int len; - int fmt = SQFILE_UNKNOWN; - int ndataline; - char *bufcpy, *s, *s1, *s2; - int has_junk; - - buf = NULL; - len = 0; - ndataline = 0; - has_junk = FALSE; - while (sre_fgets(&buf, &len, fp) != NULL) - { - if (IsBlankline(buf)) continue; - - /* Well-behaved formats identify themselves in first nonblank line. - */ - if (ndataline == 0) - { - if (strncmp(buf, ">>>>", 4) == 0 && strstr(buf, "Len: ")) - { fmt = SQFILE_GCGDATA; goto DONE; } - - if (buf[0] == '>') - { fmt = SQFILE_FASTA; goto DONE; } - - if (strncmp(buf, "!!AA_SEQUENCE", 13) == 0 || - strncmp(buf, "!!NA_SEQUENCE", 13) == 0) - { fmt = SQFILE_GCG; goto DONE; } - - if (strncmp(buf, "# STOCKHOLM 1.", 14) == 0) - { fmt = MSAFILE_STOCKHOLM; goto DONE; } - - if (strncmp(buf, "CLUSTAL", 7) == 0 && - strstr(buf, "multiple sequence alignment") != NULL) - { fmt = MSAFILE_CLUSTAL; goto DONE; } - - if (strncmp(buf, "!!AA_MULTIPLE_ALIGNMENT", 23) == 0 || - strncmp(buf, "!!NA_MULTIPLE_ALIGNMENT", 23) == 0) - { fmt = MSAFILE_MSF; goto DONE; } - - /* PHYLIP id: also just a good bet */ - bufcpy = sre_strdup(buf, -1); - s = bufcpy; - if ((s1 = sre_strtok(&s, WHITESPACE, NULL)) != NULL && - (s2 = sre_strtok(&s, WHITESPACE, NULL)) != NULL && - IsInt(s1) && - IsInt(s2)) - { free(bufcpy); fmt = MSAFILE_PHYLIP; goto DONE; } - free(bufcpy); - } - - /* We trust that other formats identify themselves soon. - */ - /* dead giveaways for extended SELEX */ - if (strncmp(buf, "#=AU", 4) == 0 || - strncmp(buf, "#=ID", 4) == 0 || - strncmp(buf, "#=AC", 4) == 0 || - strncmp(buf, "#=DE", 4) == 0 || - strncmp(buf, "#=GA", 4) == 0 || - strncmp(buf, "#=TC", 4) == 0 || - strncmp(buf, "#=NC", 4) == 0 || - strncmp(buf, "#=SQ", 4) == 0 || - strncmp(buf, "#=SS", 4) == 0 || - strncmp(buf, "#=CS", 4) == 0 || - strncmp(buf, "#=RF", 4) == 0) - { fmt = MSAFILE_SELEX; goto DONE; } - - if (strncmp(buf, "///", 3) == 0 || strncmp(buf, "ENTRY ", 6) == 0) - { fmt = SQFILE_PIR; goto DONE; } - - /* a ha, diagnostic of an (old) MSF file */ - if ((strstr(buf, "..") != NULL) && - (strstr(buf, "MSF:") != NULL) && - (strstr(buf, "Check:")!= NULL)) - { fmt = MSAFILE_MSF; goto DONE; } - - /* unaligned GCG (must follow MSF test!) */ - if (strstr(buf, " Check: ") != NULL && strstr(buf, "..") != NULL) - { fmt = SQFILE_GCG; goto DONE; } - - if (strncmp(buf,"LOCUS ",6) == 0 || strncmp(buf,"ORIGIN ",6) == 0) - { fmt = SQFILE_GENBANK; goto DONE; } - - if (strncmp(buf,"ID ",5) == 0 || strncmp(buf,"SQ ",5) == 0) - { fmt = SQFILE_EMBL; goto DONE; } - - /* But past here, we're being desperate. A simple SELEX file is - * very difficult to detect; we can only try to disprove it. - */ - s = buf; - if ((s1 = sre_strtok(&s, WHITESPACE, NULL)) == NULL) continue; /* skip blank lines */ - if (strchr("#%", *s1) != NULL) continue; /* skip comment lines */ - - /* Disproof 1. Noncomment, nonblank lines in a SELEX file - * must have at least two space-delimited fields (name/seq) - */ - if ((s2 = sre_strtok(&s, WHITESPACE, NULL)) == NULL) - has_junk = TRUE; - - /* Disproof 2. - * The sequence field should look like a sequence. - */ - if (s2 != NULL && Seqtype(s2) == kOtherSeq) - has_junk = TRUE; - - ndataline++; - if (ndataline == 300) break; /* only look at first 300 lines */ - } - - if (ndataline == 0) - Die("Sequence file contains no data"); - - /* If we've made it this far, we've run out of data, but there - * was at least one line of it; check if we've - * disproven SELEX. If not, cross our fingers, pray, and guess SELEX. - */ - if (has_junk == TRUE) fmt = SQFILE_UNKNOWN; - else fmt = MSAFILE_SELEX; - - DONE: - if (buf != NULL) free(buf); - rewind(fp); - return fmt; -} - -/* Function: GCGBinaryToSequence() - * - * Purpose: Convert a GCG 2BIT binary string to DNA sequence. - * 0 = C 1 = T 2 = A 3 = G - * 4 nts/byte - * - * Args: seq - binary sequence. Converted in place to DNA. - * len - length of DNA. binary is (len+3)/4 bytes - */ -int -GCGBinaryToSequence(char *seq, int len) -{ - int bpos; /* position in binary */ - int spos; /* position in sequence */ - char twobit; - int i; - - for (bpos = (len-1)/4; bpos >= 0; bpos--) - { - twobit = seq[bpos]; - spos = bpos*4; - - for (i = 3; i >= 0; i--) - { - switch (twobit & 0x3) { - case 0: seq[spos+i] = 'C'; break; - case 1: seq[spos+i] = 'T'; break; - case 2: seq[spos+i] = 'A'; break; - case 3: seq[spos+i] = 'G'; break; - } - twobit = twobit >> 2; - } - } - seq[len] = '\0'; - return 1; -} - - -/* Function: GCGchecksum() - * Date: SRE, Mon May 31 11:13:21 1999 [St. Louis] - * - * Purpose: Calculate a GCG checksum for a sequence. - * Code provided by Steve Smith of Genetics - * Computer Group. - * - * Args: seq - sequence to calculate checksum for. - * may contain gap symbols. - * len - length of sequence (usually known, - * so save a strlen() call) - * - * Returns: GCG checksum. - */ -int -GCGchecksum(char *seq, int len) -{ - int i; /* position in sequence */ - int chk = 0; /* calculated checksum */ - - for (i = 0; i < len; i++) - chk = (chk + (i % 57 + 1) * (sre_toupper((int) seq[i]))) % 10000; - return chk; -} - - -/* Function: GCGMultchecksum() - * - * Purpose: GCG checksum for a multiple alignment: sum of - * individual sequence checksums (including their - * gap characters) modulo 10000. - * - * Implemented using spec provided by Steve Smith of - * Genetics Computer Group. - * - * Args: seqs - sequences to be checksummed; aligned or not - * nseq - number of sequences - * - * Return: the checksum, a number between 0 and 9999 - */ -int -GCGMultchecksum(char **seqs, int nseq) -{ - int chk = 0; - int idx; - - for (idx = 0; idx < nseq; idx++) - chk = (chk + GCGchecksum(seqs[idx], strlen(seqs[idx]))) % 10000; - return chk; -} - - - - -/* Function: Seqtype() - * - * Purpose: Returns a (very good) guess about type of sequence: - * kDNA, kRNA, kAmino, or kOtherSeq. - * - * Modified from, and replaces, Gilbert getseqtype(). - */ -int -Seqtype(char *seq) -{ - int saw; /* how many non-gap characters I saw */ - char c; - int po = 0; /* count of protein-only */ - int nt = 0; /* count of t's */ - int nu = 0; /* count of u's */ - int na = 0; /* count of nucleotides */ - int aa = 0; /* count of amino acids */ - int no = 0; /* count of others */ - - /* Look at the first 300 non-gap characters - */ - for (saw = 0; *seq != '\0' && saw < 300; seq++) - { - c = sre_toupper((int) *seq); - if (! isgap(c)) - { - if (strchr(protonly, c)) po++; - else if (strchr(primenuc,c)) { - na++; - if (c == 'T') nt++; - else if (c == 'U') nu++; - } - else if (strchr(aminos,c)) aa++; - else if (isalpha((int) c)) no++; - saw++; - } - } - - if (no > 0) return kOtherSeq; - else if (po > 0) return kAmino; - else if (na > aa) { - if (nu > nt) return kRNA; - else return kDNA; - } - else return kAmino; /* ooooh. risky. */ -} - - -/* Function: GuessAlignmentSeqtype() - * Date: SRE, Wed Jul 7 09:42:34 1999 [St. Louis] - * - * Purpose: Try to guess whether an alignment is protein - * or nucleic acid; return a code for the - * type (kRNA, kDNA, or kAmino). - * - * Args: aseq - array of aligned sequences. (Could also - * be an rseq unaligned sequence array) - * nseq - number of aseqs - * - * Returns: kRNA, kDNA, kAmino; - * kOtherSeq if inconsistency is detected. - */ -int -GuessAlignmentSeqtype(char **aseq, int nseq) -{ - int idx; - int nrna = 0; - int ndna = 0; - int namino = 0; - int nother = 0; - - for (idx = 0; idx < nseq; idx++) - switch (Seqtype(aseq[idx])) { - case kRNA: nrna++; break; - case kDNA: ndna++; break; - case kAmino: namino++; break; - default: nother++; - } - - /* Unambiguous decisions: - */ - if (nother) return kOtherSeq; - if (namino == nseq) return kAmino; - if (ndna == nseq) return kDNA; - if (nrna == nseq) return kRNA; - - /* Ambiguous decisions: - */ - if (namino == 0) return kRNA; /* it's nucleic acid, but seems mixed RNA/DNA */ - return kAmino; /* some amino acid seen; others probably short seqs, some - of which may be entirely ACGT (ala,cys,gly,thr). We - could be a little more sophisticated: U would be a giveaway - that we're not in protein seqs */ -} - -/* Function: WriteSimpleFASTA() - * Date: SRE, Tue Nov 16 18:06:00 1999 [St. Louis] - * - * Purpose: Just write a FASTA format sequence to a file; - * minimal interface, mostly for quick and dirty programs. - * - * Args: fp - open file handle (stdout, possibly) - * seq - sequence to output - * name - name for the sequence - * desc - optional description line, or NULL. - * - * Returns: void - */ -void -WriteSimpleFASTA(FILE *fp, char *seq, char *name, char *desc) -{ - char buf[61]; - int len; - int pos; - - len = strlen(seq); - buf[60] = '\0'; - fprintf(fp, ">%s %s\n", name, desc != NULL ? desc : ""); - for (pos = 0; pos < len; pos += 60) - { - strncpy(buf, seq+pos, 60); - fprintf(fp, "%s\n", buf); - } -} - -int -WriteSeq(FILE *outf, int outform, char *seq, SQINFO *sqinfo) -{ - int numline = 0; - int lines = 0, spacer = 0, width = 50, tab = 0; - int i, j, l, l1, ibase; - char endstr[10]; - char s[100]; /* buffer for sequence */ - char ss[100]; /* buffer for structure */ - int checksum = 0; - int seqlen; - int which_case; /* 0 = do nothing. 1 = upper case. 2 = lower case */ - int dostruc; /* TRUE to print structure lines*/ - - which_case = 0; - dostruc = FALSE; - seqlen = (sqinfo->flags & SQINFO_LEN) ? sqinfo->len : strlen(seq); - - if (IsAlignmentFormat(outform)) - Die("Tried to write an aligned format with WriteSeq() -- bad, bad."); - - - strcpy( endstr,""); - l1 = 0; - checksum = GCGchecksum(seq, seqlen); - - switch (outform) { - case SQFILE_UNKNOWN: /* no header, just sequence */ - strcpy(endstr,"\n"); /* end w/ extra blank line */ - break; - - case SQFILE_GENBANK: - fprintf(outf,"LOCUS %s %d bp\n", - (sqinfo->flags & SQINFO_ID) ? sqinfo->id : sqinfo->name, - seqlen); - fprintf(outf,"DEFINITION %s\n", - (sqinfo->flags & SQINFO_DESC) ? sqinfo->desc : "-"); - fprintf(outf,"ACCESSION %s\n", - (sqinfo->flags & SQINFO_ACC) ? sqinfo->acc : "-"); - fprintf(outf,"ORIGIN \n"); - spacer = 11; - numline = 1; - strcpy(endstr, "\n//"); - break; - - case SQFILE_GCGDATA: - fprintf(outf, ">>>>%s 9/95 ASCII Len: %d\n", sqinfo->name, seqlen); - fprintf(outf, "%s\n", (sqinfo->flags & SQINFO_DESC) ? sqinfo->desc : "-"); - break; - - case SQFILE_PIR: - fprintf(outf, "ENTRY %s\n", - (sqinfo->flags & SQINFO_ID) ? sqinfo->id : sqinfo->name); - fprintf(outf, "TITLE %s\n", - (sqinfo->flags & SQINFO_DESC) ? sqinfo->desc : "-"); - fprintf(outf, "ACCESSION %s\n", - (sqinfo->flags & SQINFO_ACC) ? sqinfo->acc : "-"); - fprintf(outf, "SUMMARY #Length %d #Checksum %d\n", - sqinfo->len, checksum); - fprintf(outf, "SEQUENCE\n"); - fprintf(outf, " 5 10 15 20 25 30\n"); - spacer = 2; /* spaces after every residue */ - numline = 1; /* number lines w/ coords */ - width = 30; /* 30 aa per line */ - strcpy(endstr, "\n///"); - break; - - case SQFILE_SQUID: - fprintf(outf, "NAM %s\n", sqinfo->name); - if (sqinfo->flags & (SQINFO_ID | SQINFO_ACC | SQINFO_START | SQINFO_STOP | SQINFO_OLEN)) - fprintf(outf, "SRC %s %s %d..%d::%d\n", - (sqinfo->flags & SQINFO_ID) ? sqinfo->id : "-", - (sqinfo->flags & SQINFO_ACC) ? sqinfo->acc : "-", - (sqinfo->flags & SQINFO_START) ? sqinfo->start : 0, - (sqinfo->flags & SQINFO_STOP) ? sqinfo->stop : 0, - (sqinfo->flags & SQINFO_OLEN) ? sqinfo->olen : 0); - if (sqinfo->flags & SQINFO_DESC) - fprintf(outf, "DES %s\n", sqinfo->desc); - if (sqinfo->flags & SQINFO_SS) - { - fprintf(outf, "SEQ +SS\n"); - dostruc = TRUE; /* print structure lines too */ - } - else - fprintf(outf, "SEQ\n"); - numline = 1; /* number seq lines w/ coords */ - strcpy(endstr, "\n++"); - break; - - case SQFILE_EMBL: - fprintf(outf,"ID %s\n", - (sqinfo->flags & SQINFO_ID) ? sqinfo->id : sqinfo->name); - fprintf(outf,"AC %s\n", - (sqinfo->flags & SQINFO_ACC) ? sqinfo->acc : "-"); - fprintf(outf,"DE %s\n", - (sqinfo->flags & SQINFO_DESC) ? sqinfo->desc : "-"); - fprintf(outf,"SQ %d BP\n", seqlen); - strcpy(endstr, "\n//"); /* 11Oct90: bug fix*/ - tab = 5; /** added 31jan91 */ - spacer = 11; /** added 31jan91 */ - break; - - case SQFILE_GCG: - fprintf(outf,"%s\n", sqinfo->name); - if (sqinfo->flags & SQINFO_ACC) - fprintf(outf,"ACCESSION %s\n", sqinfo->acc); - if (sqinfo->flags & SQINFO_DESC) - fprintf(outf,"DEFINITION %s\n", sqinfo->desc); - fprintf(outf," %s Length: %d (today) Check: %d ..\n", - sqinfo->name, seqlen, checksum); - spacer = 11; - numline = 1; - strcpy(endstr, "\n"); /* this is insurance to help prevent misreads at eof */ - break; - - case SQFILE_STRIDER: /* ?? map ?*/ - fprintf(outf,"; ### from DNA Strider ;-)\n"); - fprintf(outf,"; DNA sequence %s, %d bases, %d checksum.\n;\n", - sqinfo->name, seqlen, checksum); - strcpy(endstr, "\n//"); - break; - - /* SRE: Don had Zuker default to Pearson, which is not - intuitive or helpful, since Zuker's MFOLD can't read - Pearson format. More useful to use kIG */ - case SQFILE_ZUKER: - which_case = 1; /* MFOLD requires upper case. */ - /*FALLTHRU*/ - case SQFILE_IG: - fprintf(outf,";%s %s\n", - sqinfo->name, - (sqinfo->flags & SQINFO_DESC) ? sqinfo->desc : ""); - fprintf(outf,"%s\n", sqinfo->name); - strcpy(endstr,"1"); /* == linear dna */ - break; - - case SQFILE_RAW: /* Raw: no header at all. */ - break; - - default : - case SQFILE_FASTA: - fprintf(outf,">%s %s\n", sqinfo->name, - (sqinfo->flags & SQINFO_DESC) ? sqinfo->desc : ""); - break; - } - - if (which_case == 1) s2upper(seq); - if (which_case == 2) s2lower(seq); - - - width = MIN(width,100); - for (i=0, l=0, ibase = 1, lines = 0; i < seqlen; ) { - if (l1 < 0) l1 = 0; - else if (l1 == 0) { - if (numline) fprintf(outf,"%8d ",ibase); - for (j=0; jflags & SQINFO_SS) ? sqinfo->ss[i] : '.'; - l++; i++; - l1++; /* don't count spaces for width*/ - if (l1 == width || i == seqlen) { - s[l] = ss[l] = '\0'; - l = 0; l1 = 0; - if (dostruc) - { - fprintf(outf, "%s\n", s); - if (numline) fprintf(outf," "); - for (j=0; jformat, &rseqs[num], &(sqinfo[num]))) - { - num++; - if (num == numalloced) /* more seqs coming, alloc more room */ - { - numalloced += 16; - rseqs = (char **) ReallocOrDie (rseqs, numalloced*sizeof(char *)); - sqinfo = (SQINFO *) ReallocOrDie (sqinfo, numalloced * sizeof(SQINFO)); - } - } - SeqfileClose(dbfp); - - *ret_rseqs = rseqs; - *ret_sqinfo = sqinfo; - *ret_num = num; - return 1; -} - - -/* Function: String2SeqfileFormat() - * Date: SRE, Sun Jun 27 15:25:54 1999 [TW 723 over Canadian Shield] - * - * Purpose: Convert a string (e.g. from command line option arg) - * to a format code. Case insensitive. Return - * MSAFILE_UNKNOWN/SQFILE_UNKNOWN if string is bad. - * Uses codes defined in squid.h (unaligned formats) and - * msa.h (aligned formats). - * - * Args: s - string to convert; e.g. "stockholm" - * - * Returns: format code; e.g. MSAFILE_STOCKHOLM - */ -int -String2SeqfileFormat(char *s) -{ - char *s2; - int code = SQFILE_UNKNOWN; - - if (s == NULL) return SQFILE_UNKNOWN; - s2 = sre_strdup(s, -1); - s2upper(s2); - - if (strcmp(s2, "FASTA") == 0) code = SQFILE_FASTA; - else if (strcmp(s2, "GENBANK") == 0) code = SQFILE_GENBANK; - else if (strcmp(s2, "EMBL") == 0) code = SQFILE_EMBL; - else if (strcmp(s2, "GCG") == 0) code = SQFILE_GCG; - else if (strcmp(s2, "GCGDATA") == 0) code = SQFILE_GCGDATA; - else if (strcmp(s2, "RAW") == 0) code = SQFILE_RAW; - else if (strcmp(s2, "IG") == 0) code = SQFILE_IG; - else if (strcmp(s2, "STRIDER") == 0) code = SQFILE_STRIDER; - else if (strcmp(s2, "IDRAW") == 0) code = SQFILE_IDRAW; - else if (strcmp(s2, "ZUKER") == 0) code = SQFILE_ZUKER; - else if (strcmp(s2, "PIR") == 0) code = SQFILE_PIR; - else if (strcmp(s2, "SQUID") == 0) code = SQFILE_SQUID; - else if (strcmp(s2, "STOCKHOLM") == 0) code = MSAFILE_STOCKHOLM; - else if (strcmp(s2, "SELEX") == 0) code = MSAFILE_SELEX; - else if (strcmp(s2, "MSF") == 0) code = MSAFILE_MSF; - else if (strcmp(s2, "CLUSTAL") == 0) code = MSAFILE_CLUSTAL; - else if (strcmp(s2, "A2M") == 0) code = MSAFILE_A2M; - else if (strcmp(s2, "PHYLIP") == 0) code = MSAFILE_PHYLIP; - else if (strcmp(s2, "EPS") == 0) code = MSAFILE_EPS; - - free(s2); - return code; -} -char * -SeqfileFormat2String(int code) -{ - switch (code) { - case SQFILE_UNKNOWN: return "unknown"; - case SQFILE_FASTA: return "FASTA"; - case SQFILE_GENBANK: return "Genbank"; - case SQFILE_EMBL: return "EMBL"; - case SQFILE_GCG: return "GCG"; - case SQFILE_GCGDATA: return "GCG data library"; - case SQFILE_RAW: return "raw"; - case SQFILE_IG: return "Intelligenetics"; - case SQFILE_STRIDER: return "MacStrider"; - case SQFILE_IDRAW: return "Idraw Postscript"; - case SQFILE_ZUKER: return "Zuker"; - case SQFILE_PIR: return "PIR"; - case SQFILE_SQUID: return "SQUID"; - case MSAFILE_STOCKHOLM: return "Stockholm"; - case MSAFILE_SELEX: return "SELEX"; - case MSAFILE_MSF: return "MSF"; - case MSAFILE_CLUSTAL: return "Clustal"; - case MSAFILE_A2M: return "a2m"; - case MSAFILE_PHYLIP: return "Phylip"; - case MSAFILE_EPS: return "EPS"; - default: - Die("Bad code passed to MSAFormat2String()"); - } - /*NOTREACHED*/ - return NULL; -} - - -/* Function: MSAToSqinfo() - * Date: SRE, Tue Jul 20 14:36:56 1999 [St. Louis] - * - * Purpose: Take an MSA and generate a SQINFO array suitable - * for use in annotating the unaligned sequences. - * Return the array. - * - * Permanent temporary code. sqinfo was poorly designed. - * it must eventually be replaced, but the odds - * of this happening soon are nil, so I have to deal. - * - * Args: msa - the alignment - * - * Returns: ptr to allocated sqinfo array. - * Freeing is ghastly: free in each individual sqinfo[i] - * with FreeSequence(NULL, &(sqinfo[i])), then - * free(sqinfo). - */ -SQINFO * -MSAToSqinfo(MSA *msa) -{ - int idx; - SQINFO *sqinfo; - - sqinfo = MallocOrDie(sizeof(SQINFO) * msa->nseq); - - for (idx = 0; idx < msa->nseq; idx++) - { - sqinfo[idx].flags = 0; - SetSeqinfoString(&(sqinfo[idx]), - msa->sqname[idx], SQINFO_NAME); - SetSeqinfoString(&(sqinfo[idx]), - MSAGetSeqAccession(msa, idx), SQINFO_ACC); - SetSeqinfoString(&(sqinfo[idx]), - MSAGetSeqDescription(msa, idx), SQINFO_DESC); - - if (msa->ss != NULL && msa->ss[idx] != NULL) { - MakeDealignedString(msa->aseq[idx], msa->alen, - msa->ss[idx], &(sqinfo[idx].ss)); - sqinfo[idx].flags |= SQINFO_SS; - } - - if (msa->sa != NULL && msa->sa[idx] != NULL) { - MakeDealignedString(msa->aseq[idx], msa->alen, - msa->sa[idx], &(sqinfo[idx].sa)); - sqinfo[idx].flags |= SQINFO_SA; - } - - sqinfo[idx].len = DealignedLength(msa->aseq[idx]); - sqinfo[idx].flags |= SQINFO_LEN; - } - return sqinfo; -} - - - -/* cc -o sqio_test -DA_QUIET_DAY -L. sqio.c -lsquid */ -#ifdef A_QUIET_DAY -#include "ssi.h" -int -main(int argc, char **argv) -{ - FILE *fp; - char *filename; - char *buf; - int len; - int mode = 3; - SSIOFFSET off; - - filename = argv[1]; - - if (mode == 1) { - buf = malloc(sizeof(char) * 256); - if ((fp = fopen(filename, "r")) == NULL) - Die("open of %s failed", filename); - while (fgets(buf, 255, fp) != NULL) - ; - fclose(fp); - free(buf); - } else if (mode == 2) { - if ((fp = fopen(filename, "r")) == NULL) - Die("open of %s failed", filename); - buf = NULL; len = 0; - while (sre_fgets(&buf, &len, fp) != NULL) - SSIGetFilePosition(fp, SSI_OFFSET_I32, &off); - fclose(fp); - free(buf); - } else if (mode == 3) { - SQFILE *dbfp; - SQINFO info; - - if ((dbfp = SeqfileOpen(filename, SQFILE_FASTA, NULL)) == NULL) - Die("open of %s failed", filename); - while (ReadSeq(dbfp, dbfp->format, &buf, &info)) { - SSIGetFilePosition(dbfp->f, SSI_OFFSET_I32, &off); - FreeSequence(buf, &info); - } - SeqfileClose(dbfp); - } - -} - - -#endif diff --git a/forester/archive/RIO/others/hmmer/squid/squid.h.in b/forester/archive/RIO/others/hmmer/squid/squid.h.in deleted file mode 100644 index 2cf9a73..0000000 --- a/forester/archive/RIO/others/hmmer/squid/squid.h.in +++ /dev/null @@ -1,473 +0,0 @@ -/* @configure_input@ */ -/***************************************************************** - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - *****************************************************************/ - -#ifndef SQUIDH_INCLUDED -#define SQUIDH_INCLUDED - -/* squid.h - * Header file for my library of sequence functions. - * - * CVS $Id: squid.h.in,v 1.1.1.1 2005/03/22 08:34:25 cmzmasek Exp $ - */ - -#include -#include -#include -#include /* for sysconf() #define's */ - - -#if DEBUGLEVEL > 0 -#include /* for SQD_DASSERT1(), etc. */ -#endif - -#include "squidconf.h" /* #define's generated by ./configure script */ - -/***************************************************************** - * Integers of guaranteed size. (used for instance in gsi.c, gsi2.c) - * These are set by the ./configure script; if they show up as FIXME, - * they must be manually edited to appropriate type definitions. You - * do need 64-bit integers in the current code; email me if this - * prevents you from compiling SQUID and tell me your system (I don't - * know of any systems that don't have 64-bit integers these days). - *****************************************************************/ -typedef @SQD_UINT16@ sqd_uint16; -typedef @SQD_UINT32@ sqd_uint32; -typedef @SQD_UINT64@ sqd_uint64; - -#ifdef USE_HOST_BYTESWAP_FUNCTIONS -#include /* only for ntohl() and friends. */ -#include /* only for ntohl() and friends. */ -#define sre_ntoh16(x) ntohs(x); -#define sre_ntoh32(x) ntohl(x); -#define sre_hton16(x) htons(x); -#define sre_hton32(x) htonl(x); -#endif /* USE_HOST_BYTESWAP_FUNCTIONS */ - -/* Library version info is made available as a global to - * any interested program. These are defined in iupac.c - * with the other globals. - */ -extern char squid_version[]; /* version number */ -extern char squid_date[]; /* date of release */ -extern int squid_errno; /* error codes */ - - - -/**************************************************** - * Error codes returned by squid library functions (squid_errno) - ****************************************************/ - -#define SQERR_OK 0 /* no error */ -#define SQERR_UNKNOWN 1 /* generic error, unidentified */ -#define SQERR_NODATA 2 /* unexpectedly NULL stream */ -#define SQERR_MEM 3 /* malloc or realloc failed */ -#define SQERR_NOFILE 4 /* file not found */ -#define SQERR_FORMAT 5 /* file format not recognized */ -#define SQERR_PARAMETER 6 /* bad parameter passed to func */ -#define SQERR_DIVZERO 7 /* error in sre_math.c */ -#define SQERR_INCOMPAT 8 /* incompatible parameters */ -#define SQERR_EOD 9 /* end-of-data (often normal) */ - -/**************************************************** - * Single sequence information - ****************************************************/ -#define SQINFO_NAMELEN 64 -#define SQINFO_DESCLEN 128 - -struct seqinfo_s { - int flags; /* what extra data are available */ - char name[SQINFO_NAMELEN];/* up to 63 characters of name */ - char id[SQINFO_NAMELEN]; /* up to 63 char of database identifier */ - char acc[SQINFO_NAMELEN]; /* up to 63 char of database accession # */ - char desc[SQINFO_DESCLEN];/* up to 127 char of description */ - int len; /* length of this seq */ - int start; /* (1..len) start position on source seq */ - int stop; /* (1..len) end position on source seq */ - int olen; /* original length of source seq */ - int type; /* kRNA, kDNA, kAmino, or kOther */ - char *ss; /* 0..len-1 secondary structure string */ - char *sa; /* 0..len-1 % side chain surface access. */ -}; -typedef struct seqinfo_s SQINFO; - -#define SQINFO_NAME (1 << 0) -#define SQINFO_ID (1 << 1) -#define SQINFO_ACC (1 << 2) -#define SQINFO_DESC (1 << 3) -#define SQINFO_START (1 << 4) -#define SQINFO_STOP (1 << 5) -#define SQINFO_LEN (1 << 6) -#define SQINFO_TYPE (1 << 7) -#define SQINFO_OLEN (1 << 8) -#define SQINFO_SS (1 << 9) -#define SQINFO_SA (1 << 10) - - -/**************************************************** - * Sequence alphabet: see also iupac.c - ****************************************************/ - /* IUPAC symbols defined globally in iupac.c */ -struct iupactype { - char sym; /* character representation */ - char symcomp; /* complement (regular char */ - char code; /* my binary rep */ - char comp; /* binary encoded complement */ -}; -extern struct iupactype iupac[]; -#define IUPACSYMNUM 17 - -extern char *stdcode1[]; /* 1-letter amino acid translation code */ -extern char *stdcode3[]; /* 3-letter amino acid translation code */ -extern float dnafq[]; /* nucleotide occurrence frequencies */ -extern float aafq[]; /* amino acid occurrence frequencies */ -extern char aa_alphabet[]; /* amino acid alphabet */ -extern int aa_index[]; /* convert 0..19 indices to 0..26 */ - - /* valid symbols in IUPAC code */ -#define NUCLEOTIDES "ACGTUNRYMKSWHBVDacgtunrymkswhbvd" -#define AMINO_ALPHABET "ACDEFGHIKLMNPQRSTVWY" -#define DNA_ALPHABET "ACGT" -#define RNA_ALPHABET "ACGU" -#define WHITESPACE " \t\n" - -#define isgap(c) ((c) == ' ' || (c) == '.' || (c) == '_' || (c) == '-' || (c) == '~') - - -/**************************************************** - * Sequence i/o: originally from Don Gilbert's readseq - ****************************************************/ -#include "msa.h" /* for multiple sequence alignment support */ - - /* buffer size for reading in lines from sequence files*/ -#define LINEBUFLEN 4096 - -/* sequence types parsed by Seqtype() */ -/* note that these must match hmmAMINO and hmmNUCLEIC in HMMER */ -#define kOtherSeq 0 /* hmmNOTSETYET */ -#define kDNA 1 -#define kRNA 2 /* hmmNUCLEIC */ -#define kAmino 3 /* hmmAMINO */ - -/* Unaligned sequence file formats recognized - * Coexists with definitions of multiple alignment formats in msa.h: - * >100 reserved for alignment formats - * <100 reserved for unaligned formats - * 0 reserved for unknown - * - * Some "legacy" formats are supported only when explicitly - * requested; not autodetected by SeqfileFormat(). - * - * DON'T REASSIGN THESE CODES. They're written into - * GSI index files. You can use new ones, but reassigning - * the sense of old ones will break GSI indices. - * Alignment format codes were reassigned with the creation - * of msa.c, but before Stockholm format, there were no - * indexed alignment databases. - */ -#define SQFILE_UNKNOWN 0 /* unknown format */ -#define SQFILE_IG 1 /* Intelligenetics (!) */ -#define SQFILE_GENBANK 2 /* GenBank flatfile */ - /* 3 was A2M. Now an alignment format */ -#define SQFILE_EMBL 4 /* EMBL or Swissprot flatfile */ -#define SQFILE_GCG 5 /* GCG single sequence files */ -#define SQFILE_STRIDER 6 /* MacStrider (!!) */ -#define SQFILE_FASTA 7 /* FASTA format: default */ -#define SQFILE_ZUKER 8 /* Zuker MFOLD format (legacy) */ -#define SQFILE_IDRAW 9 /* Idraw-style PostScript (legacy) */ - /* 10 was SELEX. Now alignment format */ - /* 11 was MSF. Now alignment format */ -#define SQFILE_PIR 12 /* PIR format */ -#define SQFILE_RAW 13 /* raw sequence */ -#define SQFILE_SQUID 14 /* my obsolete squid format */ - /* 15 was kXPearson, extended FASTA; withdrawn */ -#define SQFILE_GCGDATA 16 /* GCG data library file */ - /* 17 was Clustal. Now alignment format*/ - -#define IsUnalignedFormat(fmt) ((fmt) && (fmt) < 100) - -#include "ssi.h" - -struct ReadSeqVars { - FILE *f; /* open file pointer */ - char *fname; /* name of file; used for diagnostics */ - int linenumber; /* what line are we on in the file */ - - char *buf; /* dynamically allocated sre_fgets() buffer */ - int buflen; /* allocation length for buf */ - - int ssimode; /* SSI_OFFSET_I32 or SSI_OFFSET_I64 */ - SSIOFFSET ssioffset; /* disk offset to last line read into buf */ - SSIOFFSET r_off; /* offset to start of record */ - SSIOFFSET d_off; /* offset to start of sequence data */ - - int rpl; /* residues per data line for this file; -1 if unset, 0 if invalid */ - int lastrpl; /* rpl on last line seen */ - int maxrpl; /* max rpl on any line of the file */ - int bpl; /* bytes per data line; -1 if unset, 0 if invalid */ - int lastbpl; /* bpl on last line seen */ - int maxbpl; /* max bpl on any line of the file */ - - char *seq; /* growing sequence during parse */ - SQINFO *sqinfo; /* name, id, etc, gathered during parse */ - char *sp; - int seqlen; /* current sequence length */ - int maxseq; /* current allocation length for seq */ - - int format; /* format of seqfile we're reading. */ - int do_gzip; /* TRUE if f is a pipe from gzip -dc */ - int do_stdin; /* TRUE if f is stdin */ - - /* An (important) hack for sequential access of multiple alignment files: - * we read the whole alignment in, - * and then copy it one sequence at a time into seq and sqinfo. - * It is active if msa is non NULL. - * msa->lastidx is reused/overloaded: used to keep track of what - * seq we'll return next. - * afp->format is the real format, while SQFILE->format is kMSA. - * Because we keep it in the SQFILE structure, - * ReadSeq() and friends are always reentrant for multiple seqfiles. - */ - MSA *msa; - MSAFILE *afp; -}; -typedef struct ReadSeqVars SQFILE; - - -/**************************************************** - * Cluster analysis and phylogenetic tree support - ****************************************************/ - -/* struct phylo_s - a phylogenetic tree - * - * For N sequences, there will generally be an array of 0..N-2 - * phylo_s structures representing the nodes of a tree. - * [0] is the root. The indexes of left and - * right children are somewhat confusing so be careful. The - * indexes can have values of 0..2N-2. If they are 0..N-1, they - * represent pointers to individual sequences. If they are - * >= N, they represent pointers to a phylo_s structure - * at (index - N). - */ -struct phylo_s { - int parent; /* index of parent, N..2N-2, or -1 for root */ - int left; /* index of one of the branches, 0..2N-2 */ - int right; /* index of other branch, 0..2N-2 */ - float diff; /* difference score between seqs */ - float lblen; /* left branch length */ - float rblen; /* right branch length */ - char *is_in; /* 0..N-1 flag array, 1 if seq included */ - int incnum; /* number of seqs included at this node */ -}; - - -/* Strategies for cluster analysis; cluster by mean distance, - * minimum distance, or maximum distance. - */ -enum clust_strategy { CLUSTER_MEAN, CLUSTER_MAX, CLUSTER_MIN }; - -/**************************************************** - * Generic data structure support - ****************************************************/ - -/* a struct intstack_s implements a pushdown stack for storing - * single integers. - */ -struct intstack_s { - int data; - struct intstack_s *nxt; -}; - -/**************************************************** - * Binary nucleotide alphabet support - ****************************************************/ - -/* Binary encoding of the IUPAC code for nucleotides - * - * four-bit "word", permitting rapid degenerate matching - * A C G T/U - * 0 0 1 0 - */ -#define NTA 8 -#define NTC 4 -#define NTG 2 -#define NTT 1 -#define NTU 1 -#define NTN 15 /* A|C|G|T */ -#define NTR 10 /* A|G */ -#define NTY 5 /* C|T */ -#define NTM 12 /* A|C */ -#define NTK 3 /* G|T */ -#define NTS 6 /* C|G */ -#define NTW 9 /* A|T */ -#define NTH 13 /* A|C|T */ -#define NTB 7 /* C|G|T */ -#define NTV 14 /* A|C|G */ -#define NTD 11 /* A|G|T */ -#define NTGAP 16 /* GAP */ -#define NTEND 0 /* null string terminator */ - -/* ntmatch(): bitwise comparison of two nuc's - * note that it's sensitive to the order; - * probe may be degenerate but target should not be - */ -#define ntmatch(probe, target) ((probe & target) == target) - -/**************************************************** - * Support for a portable, flexible Getopt() - ****************************************************/ - -/* Structure: opt_s - * - * Structure for declaring options to a main(). - */ -struct opt_s { - char *name; /* name of option, e.g. "--option1" or "-o" */ - int single; /* TRUE if a single letter option */ - int argtype; /* for typechecking, e.g. sqdARG_INT */ -}; - /* acceptable argtype's... */ -#define sqdARG_NONE 0 /* no argument */ -#define sqdARG_INT 1 /* something that atoi() can grok */ -#define sqdARG_FLOAT 2 /* something that atof() can grok */ -#define sqdARG_CHAR 3 /* require single character or digit */ -#define sqdARG_STRING 4 /* anything goes */ - -/**************************************************** - * Support for convenient Perl-y regexp matching - * See hsregexp.c for copyright notice: this code is derived - * from Henry Spencer's freely distributed regexp library. - ****************************************************/ - -#define NSUBEXP 10 -typedef struct sqd_regexp { - char *startp[NSUBEXP]; - char *endp[NSUBEXP]; - char regstart; /* Internal use only. */ - char reganch; /* Internal use only. */ - char *regmust; /* Internal use only. */ - int regmlen; /* Internal use only. */ - char program[1]; /* Unwarranted chumminess with compiler. */ -} sqd_regexp; - -/* Strparse() defines and manages these. - * sqd_parse[0] contains the substring that matched the pattern. - * sqd_parse[1-9] contain substrings matched with ()'s. - */ -extern char *sqd_parse[10]; - -/**************************************************** - * Portable detection of multiprocessor # of CPUs. - * #include - * long foo = SQD_NPROC; - * returns the number of available processors. - * if foo == -1, we failed. - ****************************************************/ - -/* Our problem here is that POSIX apparently doesn't specify - * a standard for how to get sysconf() to report the number of - * processors on-line. _SC_NPROCESSORS_ONLN is specified - * by SVR4.0MP. Thanks to W. Gish for help here. - */ -#undef SQD_NPROC -#ifdef _SC_NPROCESSORS_ONLN /* Sun Solaris, Digital UNIX */ -#define SQD_NPROC sysconf(_SC_NPROCESSORS_ONLN) -#else -#ifdef _SC_NPROC_ONLN /* Silicon Graphics IRIX */ -#define SQD_NPROC sysconf(_SC_NPROC_ONLN) -#else /* FreeBSD, Linux don't support getting ncpu via sysconf() */ -#define SQD_NPROC -1 -#endif -#endif - -/**************************************************** - * Three levels of debugging printf's and assert's - * level 1: little impact on verbosity or performance - * level 2: moderate impact - * level 3: high impact - * Example: - * SQD_DPRINTF3(("Matrix row %d col %d = %f\n", i, j, val)); - * Note the double parentheses; these are important. - ****************************************************/ - -#ifndef DEBUGLEVEL -#define DEBUGLEVEL 0 -#endif - -#if (DEBUGLEVEL >= 1) -#define SQD_DPRINTF1(x) printf x -#define SQD_DASSERT1(x) assert x -#else -#define SQD_DPRINTF1(x) -#define SQD_DASSERT1(x) -#endif -#if (DEBUGLEVEL >= 2) -#define SQD_DPRINTF2(x) printf x -#define SQD_DASSERT2(x) assert x -#else -#define SQD_DPRINTF2(x) -#define SQD_DASSERT2(x) -#endif -#if (DEBUGLEVEL >= 3) -#define SQD_DPRINTF3(x) printf x -#define SQD_DASSERT3(x) assert x -#else -#define SQD_DPRINTF3(x) -#define SQD_DASSERT3(x) -#endif - -/* PANIC is called for failures of Std C/POSIX functions, - * instead of my own functions. Panic() calls perror() and exits - * abnormally. - */ -#define PANIC Panic(__FILE__, __LINE__) - -/* Malloc/realloc calls are wrapped - */ -#define MallocOrDie(x) sre_malloc(__FILE__, __LINE__, (x)) -#define ReallocOrDie(x,y) sre_realloc(__FILE__, __LINE__, (x), (y)) - -/**************************************************** - * Miscellaneous macros and defines - ****************************************************/ - -#define CHOOSE(a) ((int) (sre_random() * (a))) - /* must declare swapfoo to use SWAP() */ -#define SWAP(a,b) {swapfoo = b; b = a; a = swapfoo;} -#define ScalarsEqual(a,b) (fabs((a)-(b)) < 1e-7) - -#ifndef MIN -#define MIN(a,b) (((a)<(b))?(a):(b)) -#endif -#ifndef MAX -#define MAX(a,b) (((a)>(b))?(a):(b)) -#endif - -/* For convenience and (one hopes) clarity in boolean tests: - */ -#ifndef TRUE -#define TRUE 1 -#endif -#ifndef FALSE -#define FALSE 0 -#endif - -/* Somewhere, there is a universe in which Unix vendors comply - * with the ANSI C standard. Unfortunately, it is not ours: - */ -#ifndef EXIT_SUCCESS -#define EXIT_SUCCESS 0 -#endif -#ifndef EXIT_FAILURE -#define EXIT_FAILURE 1 -#endif - -#include "sqfuncs.h" /* squid function declarations */ -#endif /* SQUIDH_INCLUDED */ diff --git a/forester/archive/RIO/others/hmmer/squid/squidconf.h.in b/forester/archive/RIO/others/hmmer/squid/squidconf.h.in deleted file mode 100644 index 354c912..0000000 --- a/forester/archive/RIO/others/hmmer/squid/squidconf.h.in +++ /dev/null @@ -1,76 +0,0 @@ -/* @configure_input@ */ -/***************************************************************** - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - *****************************************************************/ - -#ifndef SQUIDCONFH_INCLUDED -#define SQUIDCONFH_INCLUDED - -/* squidconf.h - * Captures #define's generated by the ./configure script; - * this configuration information is #included at the start of - * squid.h - */ - -/***************************************************************** - * Sizes of integer types. - * various things are set by ./configure; the code - * uses WORDS_BIGENDIAN and USE_HOST_BYTESWAP_FUNCTIONS. - *****************************************************************/ -#undef WORDS_BIGENDIAN -#define SIZEOF_UNSIGNED_SHORT 0 -#define SIZEOF_UNSIGNED_INT 0 -#define SIZEOF_UNSIGNED_LONG 0 -#define SIZEOF_UNSIGNED_LONG_LONG 0 -#undef HAVE_NTOHS /* if defined, system provides ntohs() */ -#undef HAVE_NTOHL /* if defined, system provides ntohl() */ -#undef HAVE_HTONS /* if defined, system provides htons() */ -#undef HAVE_HTONL /* if defined, system provides htonl() */ -#if defined HAVE_NTOHL && defined HAVE_NTOHS && defined HAVE_HTONS && defined HAVE_HTONL -#define USE_HOST_BYTESWAP_FUNCTIONS 1 -#endif - -/***************************************************************** - * Can we support arithmetic 64-bit file offsets? - * four possible models checked for: - * 1. ftello(), fseeko() with 64-bit off_t - * 2. ftello64(), fseeko64() with 64-bit off64_t - * 3. ftell64(), fseek64() with 64-bit integer - * 4. fgetpos(), fsetpos() with an fpos_t that happens to be a - * 64-bit integer, even though ANSI says we're not supposed to know - * anything about fpos_t's internals. - * Based on what ./configure tells us about these, we set - * HAS_64BIT_FILE_OFFSETS or not. - *****************************************************************/ -#undef HAVE_FTELLO -#undef HAVE_FSEEKO -#undef HAVE_FTELLO64 -#undef HAVE_FSEEKO64 -#undef HAVE_FTELL64 -#undef HAVE_FSEEK64 -#undef ARITHMETIC_FPOS_T -#undef HAVE_STAT64 -#define SIZEOF_FPOS_T -1 -#define SIZEOF_OFF_T -1 -#define SIZEOF_OFF64_T -1 - -#if defined HAVE_FTELLO && defined HAVE_FSEEKO && SIZEOF_OFF_T == 8 -#define HAS_64BIT_FILE_OFFSETS 1 -#elif defined HAVE_FTELLO64 && defined HAVE_FSEEKO64 && SIZEOF_OFF64_T == 8 -#define HAS_64BIT_FILE_OFFSETS 1 -#elif defined HAVE_FTELL64 && defined HAVE_FSEEK64 -#define HAS_64BIT_FILE_OFFSETS 1 -#elif defined ARITHMETIC_FPOS_T && SIZEOF_FPOS_T == 8 -#define HAS_64BIT_FILE_OFFSETS 1 -#else -#undef HAS_64BIT_FILE_OFFSETS -#endif - - -#endif /* SQUIDCONFH_INCLUDED */ diff --git a/forester/archive/RIO/others/hmmer/squid/squidcore.c b/forester/archive/RIO/others/hmmer/squid/squidcore.c deleted file mode 100644 index 9970f0d..0000000 --- a/forester/archive/RIO/others/hmmer/squid/squidcore.c +++ /dev/null @@ -1,53 +0,0 @@ -/************************************************************ - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - ************************************************************/ - -/* squidcore.c - * SRE, Sun Jun 20 17:19:04 1999 [Graeme's kitchen] - * - * Core functions for SQUID library. - * RCS $Id: squidcore.c,v 1.1.1.1 2005/03/22 08:34:32 cmzmasek Exp $ - */ - -#include -#include "version.h" - -/* Function: Banner() - * Date: SRE, Sun Jun 20 17:19:41 1999 [Graeme's kitchen] - * - * Purpose: Print a package version and copyright banner. - * Used by all the main()'s. - * - * Expects to be able to pick up defined macros: - * macro example - * ------ -------------- - * PACKAGE "HMMER" - * RELEASE "2.0.42" - * RELEASEDATE "April 1 1999" - * COPYRIGHT "Copyright (C) 1992-1999 Washington University School of Medicine" - * LICENSE "HMMER is freely distributed under the GNU General Public License (GPL)." - * - * This gives us a general mechanism to update release information - * without changing multiple points in the code; we can also override - * SQUID release data with another package's release data (e.g. - * HMMER) just by redefining macros. - * - * Args: fp - where to print it - * banner - one-line program description, e.g.: - * "foobar - make bars from foo with elan" - * Returns: (void) - */ -void -Banner(FILE *fp, char *banner) -{ - fprintf(fp, "%s\n%s %s (%s)\n%s\n%s\n", banner, PACKAGE, RELEASE, RELEASEDATE, COPYRIGHT, LICENSE); - fprintf(fp, "- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n"); -} - - diff --git a/forester/archive/RIO/others/hmmer/squid/sre_ctype.c b/forester/archive/RIO/others/hmmer/squid/sre_ctype.c deleted file mode 100644 index 6be7b82..0000000 --- a/forester/archive/RIO/others/hmmer/squid/sre_ctype.c +++ /dev/null @@ -1,39 +0,0 @@ -/***************************************************************** - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - *****************************************************************/ - -/* sre_ctype.c - * - * For portability. Some systems have functions tolower, toupper - * as macros (for instance, MIPS M-2000 RISC/os!) - * - * RCS $Id: sre_ctype.c,v 1.1.1.1 2005/03/22 08:34:16 cmzmasek Exp $ - */ - -#include -#include "squid.h" - -#ifdef MEMDEBUG -#include "dbmalloc.h" -#endif - -int -sre_tolower(int c) -{ - if (isupper(c)) return tolower(c); - else return c; -} - -int -sre_toupper(int c) -{ - if (islower(c)) return toupper(c); - else return c; -} - diff --git a/forester/archive/RIO/others/hmmer/squid/sre_math.c b/forester/archive/RIO/others/hmmer/squid/sre_math.c deleted file mode 100644 index f5ecda2..0000000 --- a/forester/archive/RIO/others/hmmer/squid/sre_math.c +++ /dev/null @@ -1,787 +0,0 @@ -/***************************************************************** - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - *****************************************************************/ - -/* sre_math.c - * - * Portability for and extensions to C math library. - * RCS $Id: sre_math.c,v 1.1.1.1 2005/03/22 08:34:32 cmzmasek Exp $ - */ - -#include -#include -#include -#include "squid.h" - -#ifdef MEMDEBUG -#include "dbmalloc.h" -#endif - -static int sre_reseed = 0; /* TRUE to reinit sre_random() */ -static int sre_randseed = 666; /* default seed for sre_random() */ - -/* Function: ExponentialRandom() - * Date: SRE, Mon Sep 6 21:24:29 1999 [St. Louis] - * - * Purpose: Pick an exponentially distributed random variable - * 0 > x >= infinity - * - * Args: (void) - * - * Returns: x - */ -float -ExponentialRandom(void) -{ - float x; - - do x = sre_random(); while (x == 0.0); - return -log(x); -} - -/* Function: Gaussrandom() - * - * Pick a Gaussian-distributed random variable - * with some mean and standard deviation, and - * return it. - * - * Based on RANLIB.c public domain implementation. - * Thanks to the authors, Barry W. Brown and James Lovato, - * University of Texas, M.D. Anderson Cancer Center, Houston TX. - * Their implementation is from Ahrens and Dieter, "Extensions - * of Forsythe's method for random sampling from the normal - * distribution", Math. Comput. 27:927-937 (1973). - * - * Impenetrability of the code is to be blamed on its FORTRAN/f2c lineage. - * - */ -float -Gaussrandom(float mean, float stddev) -{ - static float a[32] = { - 0.0,3.917609E-2,7.841241E-2,0.11777,0.1573107,0.1970991,0.2372021,0.2776904, 0.3186394,0.36013,0.4022501,0.4450965,0.4887764,0.5334097,0.5791322, - 0.626099,0.6744898,0.7245144,0.7764218,0.8305109,0.8871466,0.9467818, - 1.00999,1.077516,1.150349,1.229859,1.318011,1.417797,1.534121,1.67594, - 1.862732,2.153875 - }; - static float d[31] = { - 0.0,0.0,0.0,0.0,0.0,0.2636843,0.2425085,0.2255674,0.2116342,0.1999243, - 0.1899108,0.1812252,0.1736014,0.1668419,0.1607967,0.1553497,0.1504094, - 0.1459026,0.14177,0.1379632,0.1344418,0.1311722,0.128126,0.1252791, - 0.1226109,0.1201036,0.1177417,0.1155119,0.1134023,0.1114027,0.1095039 - }; - static float t[31] = { - 7.673828E-4,2.30687E-3,3.860618E-3,5.438454E-3,7.0507E-3,8.708396E-3, - 1.042357E-2,1.220953E-2,1.408125E-2,1.605579E-2,1.81529E-2,2.039573E-2, - 2.281177E-2,2.543407E-2,2.830296E-2,3.146822E-2,3.499233E-2,3.895483E-2, - 4.345878E-2,4.864035E-2,5.468334E-2,6.184222E-2,7.047983E-2,8.113195E-2, - 9.462444E-2,0.1123001,0.136498,0.1716886,0.2276241,0.330498,0.5847031 - }; - static float h[31] = { - 3.920617E-2,3.932705E-2,3.951E-2,3.975703E-2,4.007093E-2,4.045533E-2, - 4.091481E-2,4.145507E-2,4.208311E-2,4.280748E-2,4.363863E-2,4.458932E-2, - 4.567523E-2,4.691571E-2,4.833487E-2,4.996298E-2,5.183859E-2,5.401138E-2, - 5.654656E-2,5.95313E-2,6.308489E-2,6.737503E-2,7.264544E-2,7.926471E-2, - 8.781922E-2,9.930398E-2,0.11556,0.1404344,0.1836142,0.2790016,0.7010474 - }; - static long i; - static float snorm,u,s,ustar,aa,w,y,tt; - - u = sre_random(); - s = 0.0; - if(u > 0.5) s = 1.0; - u += (u-s); - u = 32.0*u; - i = (long) (u); - if(i == 32) i = 31; - if(i == 0) goto S100; - /* - * START CENTER - */ - ustar = u-(float)i; - aa = *(a+i-1); -S40: - if(ustar <= *(t+i-1)) goto S60; - w = (ustar-*(t+i-1))**(h+i-1); -S50: - /* - * EXIT (BOTH CASES) - */ - y = aa+w; - snorm = y; - if(s == 1.0) snorm = -y; - return (stddev*snorm + mean); -S60: - /* - * CENTER CONTINUED - */ - u = sre_random(); - w = u*(*(a+i)-aa); - tt = (0.5*w+aa)*w; - goto S80; -S70: - tt = u; - ustar = sre_random(); -S80: - if(ustar > tt) goto S50; - u = sre_random(); - if(ustar >= u) goto S70; - ustar = sre_random(); - goto S40; -S100: - /* - * START TAIL - */ - i = 6; - aa = *(a+31); - goto S120; -S110: - aa += *(d+i-1); - i += 1; -S120: - u += u; - if(u < 1.0) goto S110; - u -= 1.0; -S140: - w = u**(d+i-1); - tt = (0.5*w+aa)*w; - goto S160; -S150: - tt = u; -S160: - ustar = sre_random(); - if(ustar > tt) goto S50; - u = sre_random(); - if(ustar >= u) goto S150; - u = sre_random(); - goto S140; -} - - -/* Function: Linefit() - * - * Purpose: Given points x[0..N-1] and y[0..N-1], fit to - * a straight line y = a + bx. - * a, b, and the linear correlation coefficient r - * are filled in for return. - * - * Args: x - x values of data - * y - y values of data - * N - number of data points - * ret_a - RETURN: intercept - * ret_b - RETURN: slope - * ret_r - RETURN: correlation coefficient - * - * Return: 1 on success, 0 on failure. - */ -int -Linefit(float *x, float *y, int N, float *ret_a, float *ret_b, float *ret_r) -{ - float xavg, yavg; - float sxx, syy, sxy; - int i; - - /* Calculate averages, xavg and yavg - */ - xavg = yavg = 0.0; - for (i = 0; i < N; i++) - { - xavg += x[i]; - yavg += y[i]; - } - xavg /= (float) N; - yavg /= (float) N; - - sxx = syy = sxy = 0.0; - for (i = 0; i < N; i++) - { - sxx += (x[i] - xavg) * (x[i] - xavg); - syy += (y[i] - yavg) * (y[i] - xavg); - sxy += (x[i] - xavg) * (y[i] - yavg); - } - *ret_b = sxy / sxx; - *ret_a = yavg - xavg*(*ret_b); - *ret_r = sxy / (sqrt(sxx) * sqrt(syy)); - return 1; -} - - -/* Function: WeightedLinefit() - * - * Purpose: Given points x[0..N-1] and y[0..N-1] with - * variances (measurement errors) var[0..N-1], - * fit to a straight line y = mx + b. - * - * Method: Algorithm from Numerical Recipes in C, [Press88]. - * - * Return: (void) - * ret_m contains slope; ret_b contains intercept - */ -void -WeightedLinefit(float *x, float *y, float *var, int N, float *ret_m, float *ret_b) -{ - int i; - double s; - double sx, sy; - double sxx, sxy; - double delta; - double m, b; - - s = sx = sy = sxx = sxy = 0.; - for (i = 0; i < N; i++) - { - s += 1./var[i]; - sx += x[i] / var[i]; - sy += y[i] / var[i]; - sxx += x[i] * x[i] / var[i]; - sxy += x[i] * y[i] / var[i]; - } - - delta = s * sxx - (sx * sx); - b = (sxx * sy - sx * sxy) / delta; - m = (s * sxy - sx * sy) / delta; - - *ret_m = m; - *ret_b = b; -} - - -/* Function: Gammln() - * - * Returns the natural log of the gamma function of x. - * x is > 0.0. - * - * Adapted from a public domain implementation in the - * NCBI core math library. Thanks to John Spouge and - * the NCBI. (According to the NCBI, that's Dr. John - * "Gammas Galore" Spouge to you, pal.) - */ -double -Gammln(double x) -{ - int i; - double xx, tx; - double tmp, value; - static double cof[11] = { - 4.694580336184385e+04, - -1.560605207784446e+05, - 2.065049568014106e+05, - -1.388934775095388e+05, - 5.031796415085709e+04, - -9.601592329182778e+03, - 8.785855930895250e+02, - -3.155153906098611e+01, - 2.908143421162229e-01, - -2.319827630494973e-04, - 1.251639670050933e-10 - }; - - /* Protect against x=0. We see this in Dirichlet code, - * for terms alpha = 0. This is a severe hack but it is effective - * and (we think?) safe. (due to GJM) - */ - if (x <= 0.0) return 999999.; - - xx = x - 1.0; - tx = tmp = xx + 11.0; - value = 1.0; - for (i = 10; i >= 0; i--) /* sum least significant terms first */ - { - value += cof[i] / tmp; - tmp -= 1.0; - } - value = log(value); - tx += 0.5; - value += 0.918938533 + (xx+0.5)*log(tx) - tx; - return value; -} - - -/* Vector operations for doubles and floats. - * DNorm(), FNorm() -- normalize a probability vector of length n. - * return 0 if all values were zero. - * DScale(), FScale() -- multiply all items in vector by scale - * DSet(), FSet() -- set all items in vector to value. - * DAdd(), FAdd() -- add vec2 to vec1. - * DDot(), FDot() -- calculate dot product of two vectors. - * DCopy(), FCopy() -- set vec1 to be same as vec2. - * DMax(), FMax() -- return index of maximum element in vec - */ -int -DNorm(double *vec, int n) -{ - int x; - double sum; - - sum = 0.0; - for (x = 0; x < n; x++) sum += vec[x]; - if (sum != 0.0) - for (x = 0; x < n; x++) vec[x] /= sum; - else - { squid_errno = SQERR_DIVZERO; return 0; } - return 1; -} -int -FNorm(float *vec, int n) -{ - int x; - float sum; - - sum = 0.0; - for (x = 0; x < n; x++) sum += vec[x]; - if (sum != 0.0) - for (x = 0; x < n; x++) vec[x] /= sum; - else - { squid_errno = SQERR_DIVZERO; return 0; } - return 1; -} - -void -DScale(double *vec, int n, double scale) -{ - int x; - for (x = 0; x < n; x++) - vec[x] *= scale; -} -void -FScale(float *vec, int n, float scale) -{ - int x; - for (x = 0; x < n; x++) - vec[x] *= scale; -} - -void -DSet(double *vec, int n, double value) -{ - int x; - for (x = 0; x < n; x++) - vec[x] = value; -} -void -FSet(float *vec, int n, float value) -{ - int x; - for (x = 0; x < n; x++) - vec[x] = value; -} - -double -DSum(double *vec, int n) -{ - double sum = 0.; - int x; - for (x = 0; x < n; x++) - sum += vec[x]; - return sum; -} -float -FSum(float *vec, int n) -{ - float sum = 0.; - int x; - for (x = 0; x < n; x++) - sum += vec[x]; - return sum; -} - -void -DAdd(double *vec1, double *vec2, int n) -{ - int x; - for (x = 0; x < n; x++) - vec1[x] += vec2[x]; -} -void -FAdd(float *vec1, float *vec2, int n) -{ - int x; - for (x = 0; x < n; x++) - vec1[x] += vec2[x]; -} - -void -DCopy(double *vec1, double *vec2, int n) -{ - int x; - for (x = 0; x < n; x++) - vec1[x] = vec2[x]; -} -void -FCopy(float *vec1, float *vec2, int n) -{ - int x; - for (x = 0; x < n; x++) - vec1[x] = vec2[x]; -} - -double -DDot(double *vec1, double *vec2, int n) -{ - double result = 0.; - int x; - - for (x = 0; x < n; x++) - result += vec1[x] * vec2[x]; - return result; -} -float -FDot(float *vec1, float *vec2, int n) -{ - float result = 0.; - int x; - - for (x = 0; x < n; x++) - result += vec1[x] * vec2[x]; - return result; -} - -/* Functions: DMax(), FMax() - * Date: SRE, Fri Aug 29 11:14:08 1997 (Denver CO) - * - * Purpose: return index of maximum element in vec. - */ -int -DMax(double *vec, int n) -{ - int i; - int best = 0; - - for (i = 1; i < n; i++) - if (vec[i] > vec[best]) best = i; - return best; -} -int -FMax(float *vec, int n) -{ - int i; - int best = 0; - - for (i = 1; i < n; i++) - if (vec[i] > vec[best]) best = i; - return best; -} - - -/* 2D matrix operations - */ -float ** -FMX2Alloc(int rows, int cols) -{ - float **mx; - int r; - - mx = (float **) MallocOrDie(sizeof(float *) * rows); - mx[0] = (float *) MallocOrDie(sizeof(float) * rows * cols); - for (r = 1; r < rows; r++) - mx[r] = mx[0] + r*cols; - return mx; -} -void -FMX2Free(float **mx) -{ - free(mx[0]); - free(mx); -} -double ** -DMX2Alloc(int rows, int cols) -{ - double **mx; - int r; - - mx = (double **) MallocOrDie(sizeof(double *) * rows); - mx[0] = (double *) MallocOrDie(sizeof(double) * rows * cols); - for (r = 1; r < rows; r++) - mx[r] = mx[0] + r*cols; - return mx; -} -void -DMX2Free(double **mx) -{ - free(mx[0]); - free(mx); -} -/* Function: FMX2Multiply() - * - * Purpose: Matrix multiplication. - * Multiply an m x p matrix A by a p x n matrix B, - * giving an m x n matrix C. - * Matrix C must be a preallocated matrix of the right - * size. - */ -void -FMX2Multiply(float **A, float **B, float **C, int m, int p, int n) -{ - int i, j, k; - - for (i = 0; i < m; i++) - for (j = 0; j < n; j++) - { - C[i][j] = 0.; - for (k = 0; k < p; k++) - C[i][j] += A[i][p] * B[p][j]; - } -} - -/* Function: sre_random() - * - * Purpose: Return a uniform deviate from 0.0 to 1.0. - * sre_randseed is a static variable, set - * by sre_srandom(). sre_reseed is a static flag - * raised by sre_srandom(), saying that we need - * to re-initialize. - * [0.0 <= x < 1.0] - * - * Uses a simple linear congruential generator with - * period 2^28. Based on discussion in Robert Sedgewick's - * _Algorithms in C_, Addison-Wesley, 1990. - * - * Requires that long int's have at least 32 bits. - * - * Reliable and portable, but slow. Benchmarks on wol, - * using IRIX cc and IRIX C library rand() and random(): - * sre_random(): 0.8 usec/call - * random(): 0.3 usec/call - * rand(): 0.3 usec/call - */ -#define RANGE 268435456 /* 2^28 */ -#define DIV 16384 /* sqrt(RANGE) */ -#define MULT 72530821 /* my/Cathy's birthdays, x21, x even (Knuth)*/ -float -sre_random(void) -{ - static long rnd; - static int firsttime = 1; - long high1, low1; - long high2, low2; - - if (sre_reseed || firsttime) - { - sre_reseed = firsttime = 0; - if (sre_randseed <= 0) sre_randseed = 666; /* seeds of zero break me */ - high1 = sre_randseed / DIV; low1 = sre_randseed % DIV; - high2 = MULT / DIV; low2 = MULT % DIV; - rnd = (((high2*low1 + high1*low2) % DIV)*DIV + low1*low2) % RANGE; - } - high1 = rnd / DIV; low1 = rnd % DIV; - high2 = MULT / DIV; low2 = MULT % DIV; - rnd = (((high2*low1 + high1*low2) % DIV)*DIV + low1*low2) % RANGE; - - return ((float) rnd / (float) RANGE); -} -#undef RANGE -#undef DIV -#undef MULT - - -/* Function: sre_srandom() - * - * Purpose: Initialize with a random seed. Seed can be - * any integer. - */ -void -sre_srandom(int seed) -{ - if (seed < 0) seed = -1 * seed; - sre_reseed = 1; - sre_randseed = seed; -} - - -/* Functions: DChoose(), FChoose() - * - * Purpose: Make a random choice from a normalized distribution. - * DChoose() is for double-precision vectors; - * FChoose() is for single-precision float vectors. - * Returns the number of the choice. - */ -int -DChoose(double *p, int N) -{ - double roll; /* random fraction */ - double sum; /* integrated prob */ - int i; /* counter over the probs */ - - roll = sre_random(); - sum = 0.0; - for (i = 0; i < N; i++) - { - sum += p[i]; - if (roll < sum) return i; - } - SQD_DASSERT2((fabs(1.0 - sum) < 1e-14)); /* a verification at level 2 */ - return (int) (sre_random() * N); /* bulletproof */ -} -int -FChoose(float *p, int N) -{ - float roll; /* random fraction */ - float sum; /* integrated prob */ - int i; /* counter over the probs */ - - roll = sre_random(); - sum = 0.0; - for (i = 0; i < N; i++) - { - sum += p[i]; - if (roll < sum) return i; - } - SQD_DASSERT2((fabs(1.0f - sum) < 1e-6f)); /* a verification at level 2 */ - return (int) (sre_random() * N); /* bulletproof */ -} - -/* Functions: DLogSum(), FLogSum() - * - * Calculate the sum of a log vector - * *in normal space*, and return the log of the sum. - */ -double -DLogSum(double *logp, int n) -{ - int x; - double max, sum; - - max = logp[0]; - for (x = 1; x < n; x++) - if (logp[x] > max) max = logp[x]; - sum = 0.0; - for (x = 0; x < n; x++) - if (logp[x] > max - 50.) - sum += exp(logp[x] - max); - sum = log(sum) + max; - return sum; -} -float -FLogSum(float *logp, int n) -{ - int x; - float max, sum; - - max = logp[0]; - for (x = 1; x < n; x++) - if (logp[x] > max) max = logp[x]; - sum = 0.0; - for (x = 0; x < n; x++) - if (logp[x] > max - 50.) - sum += exp(logp[x] - max); - sum = log(sum) + max; - return sum; -} - - -/* Function: IncompleteGamma() - * - * Purpose: Returns 1 - P(a,x) where: - * P(a,x) = \frac{1}{\Gamma(a)} \int_{0}^{x} t^{a-1} e^{-t} dt - * = \frac{\gamma(a,x)}{\Gamma(a)} - * = 1 - \frac{\Gamma(a,x)}{\Gamma(a)} - * - * Used in a chi-squared test: for a X^2 statistic x - * with v degrees of freedom, call: - * p = IncompleteGamma(v/2., x/2.) - * to get the probability p that a chi-squared value - * greater than x could be obtained by chance even for - * a correct model. (i.e. p should be large, say - * 0.95 or more). - * - * Method: Based on ideas from Numerical Recipes in C, Press et al., - * Cambridge University Press, 1988. - * - * Args: a - for instance, degrees of freedom / 2 [a > 0] - * x - for instance, chi-squared statistic / 2 [x >= 0] - * - * Return: 1 - P(a,x). - */ -double -IncompleteGamma(double a, double x) -{ - int iter; /* iteration counter */ - - if (a <= 0.) Die("IncompleteGamma(): a must be > 0"); - if (x < 0.) Die("IncompleteGamma(): x must be >= 0"); - - /* For x > a + 1 the following gives rapid convergence; - * calculate 1 - P(a,x) = \frac{\Gamma(a,x)}{\Gamma(a)}: - * use a continued fraction development for \Gamma(a,x). - */ - if (x > a+1) - { - double oldp; /* previous value of p */ - double nu0, nu1; /* numerators for continued fraction calc */ - double de0, de1; /* denominators for continued fraction calc */ - - nu0 = 0.; /* A_0 = 0 */ - de0 = 1.; /* B_0 = 1 */ - nu1 = 1.; /* A_1 = 1 */ - de1 = x; /* B_1 = x */ - - oldp = nu1; - for (iter = 1; iter < 100; iter++) - { - /* Continued fraction development: - * set A_j = b_j A_j-1 + a_j A_j-2 - * B_j = b_j B_j-1 + a_j B_j-2 - * We start with A_2, B_2. - */ - /* j = even: a_j = iter-a, b_j = 1 */ - /* A,B_j-2 are in nu0, de0; A,B_j-1 are in nu1,de1 */ - nu0 = nu1 + ((double)iter - a) * nu0; - de0 = de1 + ((double)iter - a) * de0; - - /* j = odd: a_j = iter, b_j = x */ - /* A,B_j-2 are in nu1, de1; A,B_j-1 in nu0,de0 */ - nu1 = x * nu0 + (double) iter * nu1; - de1 = x * de0 + (double) iter * de1; - - /* rescale */ - if (de1) - { - nu0 /= de1; - de0 /= de1; - nu1 /= de1; - de1 = 1.; - } - /* check for convergence */ - if (fabs((nu1-oldp)/nu1) < 1.e-7) - return nu1 * exp(a * log(x) - x - Gammln(a)); - - oldp = nu1; - } - Die("IncompleteGamma(): failed to converge using continued fraction approx"); - } - else /* x <= a+1 */ - { - double p; /* current sum */ - double val; /* current value used in sum */ - - /* For x <= a+1 we use a convergent series instead: - * P(a,x) = \frac{\gamma(a,x)}{\Gamma(a)}, - * where - * \gamma(a,x) = e^{-x}x^a \sum_{n=0}{\infty} \frac{\Gamma{a}}{\Gamma{a+1+n}} x^n - * which looks appalling but the sum is in fact rearrangeable to - * a simple series without the \Gamma functions: - * = \frac{1}{a} + \frac{x}{a(a+1)} + \frac{x^2}{a(a+1)(a+2)} ... - * and it's obvious that this should converge nicely for x <= a+1. - */ - - p = val = 1. / a; - for (iter = 1; iter < 10000; iter++) - { - val *= x / (a+(double)iter); - p += val; - - if (fabs(val/p) < 1.e-7) - return 1. - p * exp(a * log(x) - x - Gammln(a)); - } - Die("IncompleteGamma(): failed to converge using series approx"); - } - /*NOTREACHED*/ - return 0.; -} - diff --git a/forester/archive/RIO/others/hmmer/squid/sre_string.c b/forester/archive/RIO/others/hmmer/squid/sre_string.c deleted file mode 100644 index 15255ba..0000000 --- a/forester/archive/RIO/others/hmmer/squid/sre_string.c +++ /dev/null @@ -1,524 +0,0 @@ -/***************************************************************** - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - *****************************************************************/ - -/* sre_string.c - * - * my library of extra string functions. Some for portability - * across UNIXes - * - * RCS $Id: sre_string.c,v 1.1.1.1 2005/03/22 08:34:25 cmzmasek Exp $ - */ - -#include -#include -#include -#include -#include -#include "squid.h" - -/* Function: Strdup() - * - * Purpose: Implementation of the common (but non-ANSI) function - * strdup(). Robust against being passed a NULL pointer. - * - */ -char * -Strdup(char *s) -{ - char *new; - if (s == NULL) return NULL; - if ((new = (char *) malloc (strlen(s) +1)) == NULL) return NULL; - strcpy(new, s); - return new; -} - -/* Function: StringChop() - * Date: SRE, Wed Oct 29 12:10:02 1997 [TWA 721] - * - * Purpose: Chop trailing whitespace off of a string. - */ -void -StringChop(char *s) -{ - int i; - - i = strlen(s) - 1; /* set i at last char in string */ - while (i >= 0 && isspace((int) s[i])) i--; /* i now at last non-whitespace char, or -1 */ - s[i+1] = '\0'; -} - -int -Strinsert(char *s1, /* string to insert a char into */ - char c, /* char to insert */ - int pos) /* position in s1 to insert c at */ -{ - char oldc; - char *s; - - for (s = s1 + pos; c; s++) - { - /* swap current char for inserted one */ - oldc = *s; /* pick up current */ - *s = c; /* put down inserted one */ - c = oldc; /* old becomes next to insert */ - } - *s = '\0'; - - return 1; -} - - -int -Strdelete(char *s1, /* string to delete a char from */ - int pos) /* position of char to delete 0..n-1 */ -{ - char *s; - - for (s = s1 + pos; *s; s++) - *s = *(s + 1); - - return 1; -} - -void -s2lower(char *s) -{ - for (; *s != '\0'; s++) - *s = sre_tolower((int) *s); -} - -void -s2upper(char *s) -{ - for (; *s != '\0'; s++) - *s = sre_toupper((int) *s); -} - - -void * -sre_malloc(char *file, int line, size_t size) -{ - void *ptr; - - SQD_DPRINTF3(("MALLOC: %d bytes (file %s line %d)\n", size, file, line)); - if ((ptr = malloc (size)) == NULL) - Die("malloc of %ld bytes failed: file %s line %d", size, file, line); - return ptr; -} - -void * -sre_realloc(char *file, int line, void *p, size_t size) -{ - void *ptr; - - if ((ptr = realloc(p, size)) == NULL) - Die("realloc of %ld bytes failed: file %s line %d", size, file, line); - return ptr; -} - - - -/* Function: Free2DArray(), Free3DArray() - * Date: SRE, Tue Jun 1 14:47:14 1999 [St. Louis] - * - * Purpose: Convenience functions for free'ing 2D - * and 3D pointer arrays. Tolerates any of the - * pointers being NULL, to allow "sparse" - * arrays. - * - * Args: p - array to be freed - * dim1 - n for first dimension - * dim2 - n for second dimension - * - * e.g. a 2d array is indexed p[0..dim1-1][] - * a 3D array is indexed p[0..dim1-1][0..dim2-1][] - * - * Returns: void - * - * Diagnostics: (void) - * "never fails" - */ -void -Free2DArray(void **p, int dim1) -{ - int i; - - if (p != NULL) { - for (i = 0; i < dim1; i++) - if (p[i] != NULL) free(p[i]); - free(p); - } -} -void -Free3DArray(void ***p, int dim1, int dim2) -{ - int i, j; - - if (p != NULL) { - for (i = 0; i < dim1; i++) - if (p[i] != NULL) { - for (j = 0; j < dim2; j++) - if (p[i][j] != NULL) free(p[i][j]); - free(p[i]); - } - free(p); - } -} - - -/* Function: RandomSequence() - * - * Purpose: Generate an iid symbol sequence according - * to some alphabet, alphabet_size, probability - * distribution, and length. Return the - * sequence. - * - * Args: alphabet - e.g. "ACGT" - * p - probability distribution [0..n-1] - * n - number of symbols in alphabet - * len - length of generated sequence - * - * Return: ptr to random sequence, or NULL on failure. - */ -char * -RandomSequence(char *alphabet, float *p, int n, int len) -{ - char *s; - int x; - - s = (char *) MallocOrDie (sizeof(char) * (len+1)); - for (x = 0; x < len; x++) - s[x] = alphabet[FChoose(p,n)]; - s[x] = '\0'; - return s; -} - -/* Function: sre_fgets() - * Date: SRE, Thu May 13 10:56:28 1999 [St. Louis] - * - * Purpose: Dynamic allocation version of fgets(), - * capable of reading unlimited line lengths. - * - * Args: buf - ptr to a string (may be reallocated) - * n - ptr to current allocated length of buf, - * (may be changed) - * fp - open file ptr for reading - * - * Before the first call to sre_fgets(), - * buf should be initialized to NULL and n to 0. - * They're a linked pair, so don't muck with the - * allocation of buf or the value of n while - * you're still doing sre_fgets() calls with them. - * - * Returns: ptr to the buffer on success. - * NULL on EOF (buf isn't to be used in this case) - * sre_fgets() *always* results in an allocation - * in buf. - * - * The reason to have it return a ptr to buf - * is that it makes wrapper macros easy; see - * MSAFileGetLine() for an example. - * - * Example: char *buf; - * int n; - * FILE *fp; - * - * fp = fopen("my_file", "r"); - * buf = NULL; - * n = 0; - * while (sre_fgets(&buf, &n, fp) != NULL) - * { - * do stuff with buf; - * } - */ -char * -sre_fgets(char **buf, int *n, FILE *fp) -{ - char *s; - int len; - int pos; - - if (*n == 0) - { - *buf = MallocOrDie(sizeof(char) * 128); - *n = 128; - } - - /* Simple case 1. We're sitting at EOF, or there's an error. - * fgets() returns NULL, so we return NULL. - */ - if (fgets(*buf, *n, fp) == NULL) return NULL; - - /* Simple case 2. fgets() got a string, and it reached EOF. - * return success status, so caller can use - * the last line; on the next call we'll - * return the 0 for the EOF. - */ - if (feof(fp)) return *buf; - - /* Simple case 3. We got a complete string, with \n, - * and don't need to extend the buffer. - */ - len = strlen(*buf); - if ((*buf)[len-1] == '\n') return *buf; - - /* The case we're waiting for. We have an incomplete string, - * and we have to extend the buffer one or more times. Make - * sure we overwrite the previous fgets's \0 (hence +(n-1) - * in first step, rather than 128, and reads of 129, not 128). - */ - pos = (*n)-1; - while (1) { - *n += 128; - *buf = ReallocOrDie(*buf, sizeof(char) * (*n)); - s = *buf + pos; - if (fgets(s, 129, fp) == NULL) return *buf; - len = strlen(s); - if (s[len-1] == '\n') return *buf; - pos += 128; - } - /*NOTREACHED*/ -} - -/* Function: sre_strcat() - * Date: SRE, Thu May 13 09:36:32 1999 [St. Louis] - * - * Purpose: Dynamic memory version of strcat(). - * appends src to the string that dest points to, - * extending allocation for dest if necessary. - * - * One timing experiment (100 successive appends of - * 1-255 char) shows sre_strcat() has about a 20% - * overhead relative to strcat(). However, if optional - * length info is passed, sre_strcat() is about 30% - * faster than strcat(). - * - * Args: dest - ptr to string (char **), '\0' terminated - * ldest - length of dest, if known; or -1 if length unknown. - * src - string to append to dest, '\0' terminated - * lsrc - length of src, if known; or -1 if length unknown. - * - * dest may be NULL, in which case this is - * the equivalent of dest = Strdup(src). - * - * src may also be NULL, in which case - * dest is unmodified (but why would you want to pass - * a NULL src?) - * - * if both dest and src are NULL, dest is - * unmodified; it stays NULL. - * - * the length parameters are optional. If a -1 - * is passed, sre_strcat() will call strlen() to - * determine the length itself. Passing length - * info saves the strlen() calls and can speed things - * up if lots of successive appends need to be done. - * - * Returns: new length of dest (>=0 on success); - * dest is (probably) reallocated, and modified - * to a longer string, '\0' terminated. - */ -int -sre_strcat(char **dest, int ldest, char *src, int lsrc) -{ - int len1, len2; - - if (ldest < 0) len1 = ((*dest == NULL) ? 0 : strlen(*dest)); - else len1 = ldest; - - if (lsrc < 0) len2 = (( src == NULL) ? 0 : strlen(src)); - else len2 = lsrc; - - if (len2 == 0) return len1; - - if (*dest == NULL) *dest = MallocOrDie(sizeof(char) * (len2+1)); - else *dest = ReallocOrDie(*dest, sizeof(char) * (len1+len2+1)); - - memcpy((*dest)+len1, src, len2+1); - return len1+len2; -} - -/* Function: sre_strtok() - * Date: SRE, Wed May 19 16:30:20 1999 [St. Louis] - * - * Purpose: Thread-safe version of strtok(). - * - * Returns ptr to next token in a string: skips - * until it reaches a character that is not in the delim - * string, and sets beginning of token. Skips to - * next delim character (or '\0') to set the end; replaces that - * character with '\0'. - * If there's still more string left, sets s to point to next - * character after the '\0' that was written, so successive - * calls extract tokens in succession. If there was no string - * left, s points at the terminal '\0'. - * - * If no token is found, returns NULL. - * - * Also returns the length of the token, which - * may save us a strlen() call in some applications. - * - * Limitations: - * *s can't be a constant string, since we write to it. - * - * Example: - * char *tok; - * int len; - * char *s; - * char buf[50] = "This is a sentence."; - * - * s = buf; - * tok = sre_strtok(&s, " ", &len); - * tok is "This"; s is "is a sentence."; len is 4. - * tok = sre_strtok(&s, " ", &len); - * tok is "is"; s is " a sentence."; len is 2. - * tok = sre_strtok(&s, " ", &len); - * tok is "a"; s is "sentence."; len is 1. - * tok = sre_strtok(&s, " ", &len); - * tok is "sentence."; s is "\0"; len is 9. - * tok = sre_strtok(&s, " ", &len); - * tok is NULL; s is "\0", len is undefined. - * - * Args: s - a tmp, modifiable ptr to string - * delim - characters that delimits tokens - * len - RETURN: length of token; pass NULL if not wanted - * - * Returns: ptr to next token, or NULL if there aren't any. - */ -char * -sre_strtok(char **s, char *delim, int *len) -{ - char *begin, *end; - int n; - - begin = *s; - begin += strspn(begin, delim); - if (! *begin) return NULL; - - n = strcspn(begin, delim); - end = begin + n; - if (*end == '\0') { *s = end;} - else { - *end = '\0'; - *s = end+1; - } - - if (len != NULL) *len = n; - return begin; -} - - - -/* Function: sre_strdup() - * Date: SRE, Wed May 19 17:57:28 1999 [St. Louis] - * - * Purpose: A version of the common but non-ANSI strdup() - * function. Can pass len, if known, to save a - * strlen() call. - * - * Args: s - string to duplicate - * n - length of string, if known; -1 if unknown. - * - * Returns: allocated copy of string. - * NULL on failure. - */ -char * -sre_strdup(char *s, int n) -{ - char *new; - - if (s == NULL) return NULL; - if (n < 0) n = strlen(s); - new = MallocOrDie (sizeof(char) * (n+1)); - strcpy(new, s); - return new; -} - - -/* Function: sre_strncpy() - * Date: SRE, Tue Jun 22 10:10:46 1999 [Sanger Centre] - * - * Purpose: a strncpy() that makes sure it adds a trailing \0. - * - * Args: s1 - string to copy to (allocated n+1 or larger) - * s2 - string to copy from - * n - number of chars to copy - * - * Returns: s1. - * Done only for consistency with strncpy(). Not clear - * why it's useful for a strncpy() to return s1. - */ -char * -sre_strncpy(char *s1, char *s2, int n) -{ - strncpy(s1,s2,n); - s1[n] = '\0'; - return s1; -} - -/* Function: IsBlankline() - * Date: SRE, Fri Jun 18 14:36:08 1999 [St. Louis] - * - * Purpose: Returns TRUE if string consists solely of whitespace. - * - * Args: s - string to check - */ -int -IsBlankline(char *s) -{ - for (; *s != '\0'; s++) - if (! isspace(*s)) return FALSE; - return TRUE; -} - - - -#ifdef CUBS_WIN -/* A timing test for sre_strcat() - * cc -O2 -g sre_string.c sre_ctype.c sqerror.c sre_math.c hsregex.c -lm - * 15.200u - 5.360u = 9.84u if sre_strcat() with no length info passed - * 13.660u - 5.360u = 8.30u if strcat(), with a single malloc(). - * 11.370u - 5.360u = 6.01u if sre_strcat() with length info passed. - */ -int main(void) -{ - float p[4] = {0.25, 0.25, 0.25, 0.25}; - int buflen; - int len; - int nappends; - int nstrings; - char *s1 = NULL; - char *s2; - int i; - - nappends = 100; - nstrings = 1000; - while (nstrings--) - { - /* s1 = malloc(sizeof(char) * (255*nappends+1)); - s1[0] = '\0'; - */ - - s1 = NULL; - len = 0; - for (i = 0; i < nappends; i++) - { - buflen = CHOOSE(255) + 1; - s2 = RandomSequence("ACGT", p, 4, buflen); - - /* strcat(s1,s2); */ - if ((len = sre_strcat(&s1, len, s2, buflen)) < 0) exit(1); - free(s2); - } - free(s1); - } - exit(0); -} -#endif /*CUBS_WIN*/ diff --git a/forester/archive/RIO/others/hmmer/squid/sreformat_main.c b/forester/archive/RIO/others/hmmer/squid/sreformat_main.c deleted file mode 100644 index 709f5ba..0000000 --- a/forester/archive/RIO/others/hmmer/squid/sreformat_main.c +++ /dev/null @@ -1,251 +0,0 @@ -/***************************************************************** - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - *****************************************************************/ - -/* sreformat_main.c - * Mon Sep 13 13:06:51 1993 - * - * sreformat - reformat sequence files. - * renamed sreformat from reformat, Tue Jun 30 10:53:38 1998 - * - * CVS $Id: sreformat_main.c,v 1.1.1.1 2005/03/22 08:34:25 cmzmasek Exp $ - */ - - -#include -#include -#include -#include "squid.h" -#include "msa.h" - -static char banner[] = "sreformat - convert between sequence formats"; - -static char usage[] = "\ -Usage: sreformat [-options] \n\ - Output format choices: Unaligned Aligned\n\ - ----------- -------\n\ - fasta stockholm\n\ - embl msf\n\ - genbank a2m\n\ - gcg phylip\n\ - gcgdata clustal\n\ - pir selex\n\ - raw eps\n\n\ - Available options are:\n\ - -h : help; print brief help on version and usage\n\ - -d : force DNA alphabet for nucleic acid sequence\n\ - -r : force RNA alphabet for nucleic acid sequence\n\ - -l : force lower case\n\ - -u : force upper case\n\ - -x : convert non-IUPAC chars in DNA to N's for IUPAC/BLAST compatibility\n\ -"; - -static char experts[] = "\ - Expert options:\n\ - --informat : input sequence file is in format \n\ - --mingap : remove columns containing all gaps (seqfile=alignment)\n\ - --nogap : remove columns containing any gaps (seqfile=alignment)\n\ - --pfam : modify Stockholm format output to be in PFAM style (1 line/seq)\n\ - --sam : try to convert gaps to SAM style (seqfile=alignment)\n\ - --samfrac : convert to SAM convention; cols w/ gapfrac > x are inserts\n\ - --gapsym : convert all gaps to character ''\n\ -"; - -static struct opt_s OPTIONS[] = { - { "-d", TRUE, sqdARG_NONE }, - { "-h", TRUE, sqdARG_NONE }, - { "-l", TRUE, sqdARG_NONE }, - { "-r", TRUE, sqdARG_NONE }, - { "-u", TRUE, sqdARG_NONE }, - { "-x", TRUE, sqdARG_NONE }, - { "--gapsym", FALSE, sqdARG_CHAR }, - { "--informat",FALSE, sqdARG_STRING }, - { "--mingap", FALSE, sqdARG_NONE }, - { "--nogap", FALSE, sqdARG_NONE }, - { "--pfam", FALSE, sqdARG_NONE }, - { "--sam", FALSE, sqdARG_NONE }, - { "--samfrac", FALSE, sqdARG_FLOAT }, -}; -#define NOPTIONS (sizeof(OPTIONS) / sizeof(struct opt_s)) - -int -main(int argc, char **argv) -{ - char *seqfile; /* name of sequence file */ - char *format; - SQFILE *dbfp; /* open sequence file */ - int fmt; /* format of seqfile */ - int outfmt; /* output format */ - char *seq; /* sequence */ - SQINFO sqinfo; - int i; - - int force_rna; /* TRUE to force RNA alphabet */ - int force_dna; /* TRUE to force DNA alphabet */ - int force_lower; /* TRUE to force lower case */ - int force_upper; /* TRUE to force upper case */ - int x_is_bad; /* TRUE to convert X to N */ - int do_mingap; /* TRUE to remove columns containing all gaps */ - int do_nogap; /* TRUE to remove columns containing any gaps */ - int do_pfam; /* TRUE to make SELEX -> PFAM */ - int samize; /* TRUE to SAMize an A2M conversion */ - float samfrac; /* -1, or gap fraction for a SAM conversion */ - int expect_alignment; /* TRUE to expect an input alignment to convert */ - char gapsym; /* 0 if unset; else = character to use for gaps */ - - char *optname; /* name of option found by Getopt() */ - char *optarg; /* argument found by Getopt() */ - int optind; /* index in argv[] */ - - /*********************************************** - * Parse command line - ***********************************************/ - - force_rna = FALSE; - force_dna = FALSE; - force_upper = FALSE; - force_lower = FALSE; - x_is_bad = FALSE; - do_mingap = FALSE; - do_nogap = FALSE; - do_pfam = FALSE; - samize = FALSE; - samfrac = -1.0; - fmt = SQFILE_UNKNOWN; - expect_alignment = FALSE; - gapsym = 0; - - while (Getopt(argc, argv, OPTIONS, NOPTIONS, usage, - &optind, &optname, &optarg)) { - if (strcmp(optname, "-a") == 0) expect_alignment= TRUE; - else if (strcmp(optname, "-d") == 0) force_dna = TRUE; - else if (strcmp(optname, "-l") == 0) force_lower = TRUE; - else if (strcmp(optname, "-r") == 0) force_rna = TRUE; - else if (strcmp(optname, "-u") == 0) force_upper = TRUE; - else if (strcmp(optname, "-x") == 0) x_is_bad = TRUE; - else if (strcmp(optname, "--gapsym") == 0) gapsym = *optarg; - else if (strcmp(optname, "--mingap") == 0) do_mingap = TRUE; - else if (strcmp(optname, "--nogap") == 0) do_nogap = TRUE; - else if (strcmp(optname, "--pfam") == 0) do_pfam = TRUE; - else if (strcmp(optname, "--sam") == 0) samize = TRUE; - else if (strcmp(optname, "--samfrac") == 0) samfrac = atof(optarg); - else if (strcmp(optname, "--informat") == 0) { - fmt = String2SeqfileFormat(optarg); - if (fmt == SQFILE_UNKNOWN) - Die("unrecognized sequence file format \"%s\"", optarg); - } - else if (strcmp(optname, "-h") == 0) { - Banner(stdout, banner); - puts(usage); - puts(experts); - exit(EXIT_SUCCESS); - } - } - - if (argc - optind != 2) - Die("%s\n", usage); - if (force_lower && force_upper) - Die("Can't force both upper case and lower case. Stop trying to confuse me.\n%s", - usage); - if (force_rna && force_dna) - Die("Can't force both RNA and DNA. Stop trying to find bugs. You'll be sorry.\n%s", - usage); - - format = argv[optind]; optind++; - seqfile = argv[optind]; optind++; - - /*********************************************** - * Figure out what format we're supposed to write - ***********************************************/ - - if ((outfmt = String2SeqfileFormat(format)) == SQFILE_UNKNOWN) - Die("Unknown output format %s\n%s", format, usage); - - /*********************************************** - * Reformat the file, printing to stdout. - ***********************************************/ - - /* If the output format is an alignment, then the input format - * has to be an alignment. - */ - if (IsAlignmentFormat(outfmt)) - { - MSAFILE *afp; - MSA *msa; - - if ((afp = MSAFileOpen(seqfile, fmt, NULL)) == NULL) - Die("Alignment file %s could not be opened for reading", seqfile); - - while ((msa = MSAFileRead(afp)) != NULL) - { - /* If asked, convert upper/lower convention and - * gap character conventions now - */ - if (do_mingap) MSAMingap(msa); - if (do_nogap) MSANogap(msa); - if (gapsym) AlignmentHomogenousGapsym(msa->aseq, msa->nseq, msa->alen, gapsym); - if (samize) SAMizeAlignment(msa->aseq, msa->nseq, msa->alen); - if (samfrac >= 0) SAMizeAlignmentByGapFrac(msa->aseq, msa->nseq, msa->alen, samfrac); - - for (i = 0; i < msa->nseq; i++) - { - if (force_dna) ToDNA(msa->aseq[i]); - if (force_rna) ToRNA(msa->aseq[i]); - if (x_is_bad) ToIUPAC(msa->aseq[i]); - if (force_lower) s2lower(msa->aseq[i]); - if (force_upper) s2upper(msa->aseq[i]); - } - - /* This code block can be replaced with a - * MSAFileWrite() call someday... SRE Sun Apr 22 19:17:19 2001 - */ - switch (outfmt) { - case MSAFILE_A2M: WriteA2M(stdout, msa); break; - case MSAFILE_CLUSTAL: WriteClustal(stdout, msa); break; - case MSAFILE_MSF: WriteMSF(stdout, msa); break; - case MSAFILE_PHYLIP: WritePhylip(stdout, msa); break; - case MSAFILE_SELEX: - if (do_pfam) WriteSELEXOneBlock(stdout, msa); - else WriteSELEX(stdout, msa); - break; - case MSAFILE_EPS: EPSWriteSmallMSA(stdout, msa); break; - case MSAFILE_STOCKHOLM: - if (do_pfam) WriteStockholmOneBlock(stdout, msa); - else WriteStockholm(stdout, msa); - break; - default: - Die("can't write. no such alignment format %d\n", outfmt); - } - - MSAFree(msa); - } - MSAFileClose(afp); - } - else - { - if ((dbfp = SeqfileOpen(seqfile, fmt, NULL)) == NULL) - Die("Failed to open sequence file %s for reading", seqfile); - - while (ReadSeq(dbfp, fmt, &seq, &sqinfo)) - { - if (force_dna) ToDNA(seq); - if (force_rna) ToRNA(seq); - if (x_is_bad) ToIUPAC(seq); - if (force_lower) s2lower(seq); - if (force_upper) s2upper(seq); - - WriteSeq(stdout, outfmt, seq, &sqinfo); - FreeSequence(seq, &sqinfo); - } - SeqfileClose(dbfp); - } - - return 0; -} - diff --git a/forester/archive/RIO/others/hmmer/squid/ssi.c b/forester/archive/RIO/others/hmmer/squid/ssi.c deleted file mode 100644 index 04bb4a5..0000000 --- a/forester/archive/RIO/others/hmmer/squid/ssi.c +++ /dev/null @@ -1,1504 +0,0 @@ -/***************************************************************** - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - *****************************************************************/ - -#include -#include -#include -#include -#include -#include -#include "squid.h" -#include "ssi.h" - -static sqd_uint32 v20magic = 0xf3f3e9b1; /* SSI 1.0: "ssi1" + 0x80808080 */ -static sqd_uint32 v20swap = 0xb1e9f3f3; /* byteswapped */ - -static int read_i16(FILE *fp, sqd_uint16 *ret_result); -static int read_i32(FILE *fp, sqd_uint32 *ret_result); -static int read_i64(FILE *fp, sqd_uint64 *ret_result); -static int read_offset(FILE *fp, char mode, SSIOFFSET *ret_offset); -static int write_i16(FILE *fp, sqd_uint16 n); -static int write_i32(FILE *fp, sqd_uint32 n); -static int write_i64(FILE *fp, sqd_uint64 n); -static int write_offset(FILE *fp, SSIOFFSET *offset); -static int binary_search(SSIFILE *sfp, char *key, int klen, SSIOFFSET *base, - sqd_uint32 recsize, sqd_uint32 maxidx); -static int indexfile_position(SSIFILE *sfp, SSIOFFSET *base, sqd_uint32 len, - sqd_uint32 n); -static void clear_ssifile(SSIFILE *sfp); -static int write_index(FILE *fp, SSIINDEX *g); -static int write_index_chunk(SSIINDEX *g); -static sqd_uint64 current_chunk_size(SSIINDEX *g); -static int load_indexfile(SSIFILE *sfp); - -/* Function: SSIOpen() - * Date: SRE, Sun Dec 31 12:40:03 2000 [St. Louis] - * - * Purpose: Opens the SSI index file {filename} and returns - * a SSIFILE * stream thru {ret_sfp}. - * The caller must eventually close this stream using - * SSIClose(). More than one index file can be open - * at once. - * - * Args: filename - full path to a SSI index file - * - * Returns: Returns 0 on success, nonzero on failure. - */ -int -SSIOpen(char *filename, SSIFILE **ret_sfp) -{ - SSIFILE *sfp = NULL; - int status; - if ((sfp = malloc(sizeof(SSIFILE))) == NULL) return SSI_ERR_MALLOC; - if ((sfp->fp = fopen(filename, "rb")) == NULL) return SSI_ERR_NOFILE; - status = load_indexfile(sfp); - *ret_sfp = sfp; - return status; -} -/* load_indexfile(): given a SSIFILE structure with an open and positioned - * stream (fp) -- but no other data loaded -- read the next SSIFILE - * in from disk. We use this routine without its SSIOpen() wrapper - * as part of the external mergesort when creating large indices. - */ -static int -load_indexfile(SSIFILE *sfp) -{ - sqd_uint32 magic; - sqd_uint16 i; /* counter over files */ - int status; /* overall return status if an error is thrown */ - - status = SSI_ERR_BADFORMAT; /* default: almost every kind of error is a bad format error */ - - sfp->filename = NULL; - sfp->fileformat = NULL; - sfp->fileflags = NULL; - sfp->bpl = NULL; - sfp->rpl = NULL; - sfp->nfiles = 0; - if (! read_i32(sfp->fp, &magic)) {status = SSI_ERR_BADMAGIC; goto FAILURE; } - if (magic != v20magic && magic != v20swap) {status = SSI_ERR_BADMAGIC; goto FAILURE; } - if (! read_i32(sfp->fp, &(sfp->flags))) goto FAILURE; - - /* If we have 64-bit offsets, make sure we can deal with them. - */ -#ifndef HAS_64BIT_FILE_OFFSETS - if ((sfp->flags & SSI_USE64_INDEX) || - (sfp->flags & SSI_USE64)) - { status = SSI_ERR_NO64BIT; goto FAILURE; } -#endif - - sfp->imode = (sfp->flags & SSI_USE64_INDEX) ? SSI_OFFSET_I64 : SSI_OFFSET_I32; - sfp->smode = (sfp->flags & SSI_USE64) ? SSI_OFFSET_I64 : SSI_OFFSET_I32; - - if (! read_i16(sfp->fp, &(sfp->nfiles))) goto FAILURE; - if (! read_i32(sfp->fp, &(sfp->nprimary))) goto FAILURE; - if (! read_i32(sfp->fp, &(sfp->nsecondary))) goto FAILURE; - if (! read_i32(sfp->fp, &(sfp->flen))) goto FAILURE; - if (! read_i32(sfp->fp, &(sfp->plen))) goto FAILURE; - if (! read_i32(sfp->fp, &(sfp->slen))) goto FAILURE; - if (! read_i32(sfp->fp, &(sfp->frecsize))) goto FAILURE; - if (! read_i32(sfp->fp, &(sfp->precsize))) goto FAILURE; - if (! read_i32(sfp->fp, &(sfp->srecsize))) goto FAILURE; - - if (! read_offset(sfp->fp, sfp->imode, &(sfp->foffset))) goto FAILURE; - if (! read_offset(sfp->fp, sfp->imode, &(sfp->poffset))) goto FAILURE; - if (! read_offset(sfp->fp, sfp->imode, &(sfp->soffset))) goto FAILURE; - - /* Read the file information and keep it. - * We expect the number of files to be small, so reading it - * once should be advantageous overall. If SSI ever had to - * deal with large numbers of files, you'd probably want to - * read file information on demand. - */ - if (sfp->nfiles == 0) goto FAILURE; - if ((sfp->filename=malloc(sizeof(char *) *sfp->nfiles)) == NULL) {status = SSI_ERR_MALLOC; goto FAILURE; } - for (i = 0; i < sfp->nfiles; i++) sfp->filename[i] = NULL; - if ((sfp->fileformat=malloc(sizeof(sqd_uint32)*sfp->nfiles)) == NULL) {status = SSI_ERR_MALLOC; goto FAILURE; } - if ((sfp->fileflags =malloc(sizeof(sqd_uint32)*sfp->nfiles)) == NULL) {status = SSI_ERR_MALLOC; goto FAILURE; } - if ((sfp->bpl =malloc(sizeof(sqd_uint32)*sfp->nfiles)) == NULL) {status = SSI_ERR_MALLOC; goto FAILURE; } - if ((sfp->rpl =malloc(sizeof(sqd_uint32)*sfp->nfiles)) == NULL) {status = SSI_ERR_MALLOC; goto FAILURE; } - - for (i = 0; i < sfp->nfiles; i++) - { - /* We have to explicitly position, because header and file - * records may expand in the future; frecsize and foffset - * give us forwards compatibility. - */ - if (indexfile_position(sfp, &(sfp->foffset), sfp->frecsize, i) !=0) goto FAILURE; - if ((sfp->filename[i] =malloc(sizeof(char)*sfp->flen)) == NULL) {status = SSI_ERR_MALLOC; goto FAILURE; } - if (fread(sfp->filename[i],sizeof(char),sfp->flen, sfp->fp)!=sfp->flen) goto FAILURE; - if (! read_i32(sfp->fp, &(sfp->fileformat[i]))) goto FAILURE; - if (! read_i32(sfp->fp, &(sfp->fileflags[i]))) goto FAILURE; - if (! read_i32(sfp->fp, &(sfp->bpl[i]))) goto FAILURE; - if (! read_i32(sfp->fp, &(sfp->rpl[i]))) goto FAILURE; - } - - /* Success. Return 0. - */ - return 0; - - FAILURE: - /* Failure: free the damaged structure, return status code. - */ - SSIClose(sfp); - return status; -} - - - -/* Function: SSIGetOffsetByName() - * Date: SRE, Sun Dec 31 13:55:31 2000 [St. Louis] - * - * Purpose: Looks up the string {key} in the open index {sfp}. - * {key} can be either a primary or secondary key. If {key} - * is found, {*ret_fh} contains a unique handle on - * the file that contains {key} (suitable for an SSIFileInfo() - * call, or for comparison to the handle of the last file - * that was opened for retrieval), and {offset} is filled - * in with the offset in that file. - * - * Args: sfp - open index file - * key - string to search for - * ret_fh - RETURN: handle on file that key is in - * ret_offset - RETURN: offset of the start of that key's record - * - * Returns: 0 on success. - * non-zero on error. - */ -int -SSIGetOffsetByName(SSIFILE *sfp, char *key, int *ret_fh, - SSIOFFSET *ret_offset) -{ - int status; - sqd_uint16 fnum; - - /* Look in the primary keys. - */ - status = binary_search(sfp, key, sfp->plen, &(sfp->poffset), sfp->precsize, - sfp->nprimary); - if (status == 0) { - /* We found it as a primary key; get our data & return. - */ - if (! read_i16(sfp->fp, &fnum)) return SSI_ERR_NODATA; - *ret_fh = (int) fnum; - if (! read_offset(sfp->fp, sfp->smode, ret_offset)) return SSI_ERR_NODATA; - - return 0; /* success! (we don't need the other key data) */ - } else if (status == SSI_ERR_NO_SUCH_KEY) { - /* Not in the primary keys? OK, try the secondary keys. - */ - if (sfp->nsecondary > 0) { - char *pkey; - status = binary_search(sfp, key, sfp->slen, &(sfp->soffset), sfp->srecsize, - sfp->nsecondary); - if (status != 0) return status; - if ((pkey = malloc(sizeof(char) * sfp->plen)) == NULL) return SSI_ERR_MALLOC; - if (fread(pkey, sizeof(char), sfp->plen, sfp->fp) != sfp->plen) return SSI_ERR_NODATA; - - status = SSIGetOffsetByName(sfp, pkey, ret_fh, ret_offset); - free(pkey); - } - return status; - - } else return status; - /*NOTREACHED*/ -} - -/* Function: SSIGetOffsetByNumber() - * Date: SRE, Mon Jan 1 19:42:42 2001 [St. Louis] - * - * Purpose: Looks up primary key #{n} in the open index {sfp}. - * {n} ranges from 0..nprimary-1. When key #{n} - * is found, {*ret_fh} contains a unique - * handle on the file that contains {key} (suitable - * for an SSIFileInfo() call, or for comparison to - * the handle of the last file that was opened for retrieval), - * and {offset} is filled in with the offset in that file. - * - * Args: sfp - open index file - * n - primary key number to retrieve. - * ret_fh - RETURN: handle on file that key is in - * ret_offset - RETURN: offset of the start of that key's record - * - * Returns: 0 on success. - * non-zero on error. - */ -int -SSIGetOffsetByNumber(SSIFILE *sfp, int n, int *ret_fh, SSIOFFSET *ret_offset) -{ - sqd_uint16 fnum; - char *pkey; - - if (n >= sfp->nprimary) return SSI_ERR_NO_SUCH_KEY; - if (indexfile_position(sfp, &(sfp->poffset), sfp->precsize, n) != 0) - return SSI_ERR_SEEK_FAILED; - - if ((pkey = malloc(sizeof(char) * sfp->plen)) == NULL) return SSI_ERR_MALLOC; - if (fread(pkey, sizeof(char), sfp->plen, sfp->fp) != sfp->plen) return SSI_ERR_NODATA; - if (! read_i16(sfp->fp, &fnum)) return SSI_ERR_NODATA; - if (! read_offset(sfp->fp, sfp->smode, ret_offset)) return SSI_ERR_NODATA; - *ret_fh = fnum; - free(pkey); - return 0; -} - -/* Function: SSIGetSubseqOffset() - * Date: SRE, Mon Jan 1 19:49:31 2001 [St. Louis] - * - * Purpose: Implements SSI_FAST_SUBSEQ. - * - * Looks up a primary or secondary {key} in the open - * index {sfp}. Asks for the nearest offset to a - * subsequence starting at position {requested_start} - * in the sequence (numbering the sequence 1..L). - * If {key} is found, on return, {ret_fh} - * contains a unique handle on the file that contains - * {key} (suitable for an SSIFileInfo() call, or for - * comparison to the handle of the last file that was - * opened for retrieval); {record_offset} contains the - * disk offset to the start of the record; {data_offset} - * contains the disk offset either exactly at the requested - * residue, or at the start of the line containing the - * requested residue; {ret_actual_start} contains the - * coordinate (1..L) of the first valid residue at or - * after {data_offset}. {ret_actual_start} is <= - * {requested_start}. - * - * Args: sfp - open index file - * key - primary or secondary key to find - * requested_start - residue we'd like to start at (1..L) - * ret_fh - RETURN: handle for file the key is in - * record_offset - RETURN: offset of entire record - * data_offset - RETURN: offset of subseq (see above) - * ret_actual_start- RETURN: coord (1..L) of residue at data_offset - * - * Returns: 0 on success, non-zero on failure. - */ -int -SSIGetSubseqOffset(SSIFILE *sfp, char *key, int requested_start, - int *ret_fh, SSIOFFSET *record_offset, - SSIOFFSET *data_offset, int *ret_actual_start) -{ - int status; - sqd_uint32 len; - int r, b, i, l; /* tmp variables for "clarity", to match docs */ - - /* Look up the key. Rely on the fact that SSIGetOffsetByName() - * leaves the index file positioned at the rest of the data for this key. - */ - status = SSIGetOffsetByName(sfp, key, ret_fh, record_offset); - if (status != 0) return status; - - /* Check that we're allowed to do subseq lookup on that file. - */ - if (! (sfp->fileflags[*ret_fh] & SSI_FAST_SUBSEQ)) - return SSI_ERR_NO_SUBSEQS; - - /* Read the data we need for subseq lookup - */ - if (! read_offset(sfp->fp, sfp->smode, data_offset)) return SSI_ERR_NODATA; - if (! read_i32(sfp->fp, &len)) return SSI_ERR_NODATA; - - /* Set up tmp variables for clarity of equations below, - * and to make them match documentation (ssi-format.tex). - */ - r = sfp->rpl[*ret_fh]; /* residues per line */ - b = sfp->bpl[*ret_fh]; /* bytes per line */ - i = requested_start; /* start position 1..L */ - l = (i-1)/r; /* data line # (0..) that the residue is on */ - if (r == 0 || b == 0) return SSI_ERR_NO_SUBSEQS; - if (i < 0 || i > len) return SSI_ERR_RANGE; - - /* When b = r+1, there's nothing but sequence on each data line (and the \0), - * and we can find each residue precisely. - */ - if (b == r+1) { - if (sfp->smode == SSI_OFFSET_I32) { - data_offset->mode = SSI_OFFSET_I32; - data_offset->off.i32 = data_offset->off.i32 + l*b + (i-1)%r; - } else if (sfp->smode == SSI_OFFSET_I64) { - data_offset->mode = SSI_OFFSET_I64; - data_offset->off.i64 = data_offset->off.i64 + l*b + (i-1)%r; - } - *ret_actual_start = requested_start; - } else { - /* else, there's other stuff on seq lines, so the best - * we can do easily is to position at start of relevant line. - */ - if (sfp->smode == SSI_OFFSET_I32) { - data_offset->mode = SSI_OFFSET_I32; - data_offset->off.i32 = data_offset->off.i32 + l*b; - } else if (sfp->smode == SSI_OFFSET_I64) { - data_offset->mode = SSI_OFFSET_I64; - data_offset->off.i64 = data_offset->off.i64 + l*b; - } - /* yes, the eq below is = 1 + (i-1)/r*r but it's not = i. that's an integer /. */ - *ret_actual_start = 1 + l*r; - } - return 0; -} - -/* Function: SSISetFilePosition() - * Date: SRE, Tue Jan 2 09:13:46 2001 [St. Louis] - * - * Purpose: Uses {offset} to sets the file position for {fp}, usually an - * open sequence file, relative to the start of the file. - * Hides the details of system-dependent shenanigans necessary for - * file positioning in large (>2 GB) files. - * - * Behaves just like fseek(fp, offset, SEEK_SET) for 32 bit - * offsets and <2 GB files. - * - * Warning: if all else fails, in desperation, it will try to - * use fsetpos(). This requires making assumptions about fpos_t - * that may be unwarranted... assumptions that ANSI C prohibits - * me from making... though I believe the ./configure - * script robustly tests whether I can play with fpos_t like this. - * - * Args: fp - file to position. - * offset - SSI offset relative to file start. - * - * Returns: 0 on success, nonzero on error. - */ -int -SSISetFilePosition(FILE *fp, SSIOFFSET *offset) -{ - if (offset->mode == SSI_OFFSET_I32) { - if (fseek(fp, offset->off.i32, SEEK_SET) != 0) return SSI_ERR_SEEK_FAILED; - } -#ifndef HAS_64BIT_FILE_OFFSETS - else return SSI_ERR_NO64BIT; -#elif defined HAVE_FSEEKO && SIZEOF_OFF_T == 8 - else if (fseeko(fp, offset->off.i64, SEEK_SET) != 0) return SSI_ERR_SEEK_FAILED; -#elif defined HAVE_FSEEKO64 && SIZEOF_OFF64_T == 8 - else if (fseeko64(fp, offset->off.i64, SEEK_SET) != 0) return SSI_ERR_SEEK_FAILED; -#elif defined HAVE_FSEEK64 - else if (fseek64(fp, offset->off.i64, SEEK_SET) != 0) return SSI_ERR_SEEK_FAILED; -#elif defined ARITHMETIC_FPOS_T && SIZEOF_FPOS_T == 8 - else if (fsetpos(fp, &(offset->off.i64)) != 0) return SSI_ERR_SEEK_FAILED; -#endif - return 0; -} - - -/* Function: SSIFileInfo() - * Date: SRE, Tue Jan 2 10:31:01 2001 [St. Louis] - * - * Purpose: Given a file number {fh} in an open index file - * {sfp}, retrieve file name {ret_filename} and - * the file format {ret_format}. - * - * {ret_filename} is a pointer to a string maintained - * internally by {sfp}. It should not be free'd; - * SSIClose(sfp) takes care of it. - * - * Args: sfp - open index file - * fh - handle on file to look up - * ret_filename - RETURN: name of file n - * ret_format - RETURN: format of file n - * - * Returns: 0 on success, nonzero on failure. - */ -int -SSIFileInfo(SSIFILE *sfp, int fh, char **ret_filename, int *ret_format) -{ - if (fh < 0 || fh >= sfp->nfiles) return SSI_ERR_BADARG; - *ret_filename = sfp->filename[fh]; - *ret_format = sfp->fileformat[fh]; - return 0; -} - -/* Function: SSIClose() - * Date: SRE, Sun Dec 31 14:56:37 2000 [St. Louis] - * - * Purpose: Close an open {SSIFILE *}. - * - * Args: sfp - index file to close. - * - * Returns: (void) - */ -void -SSIClose(SSIFILE *sfp) -{ - if (sfp != NULL) { - clear_ssifile(sfp); - if (sfp->fp != NULL) fclose(sfp->fp); - free(sfp); - } -} -/* clear_ssifile(): free the innards of SSIFILE, without - * destroying the structure or closing the stream. - */ -static void -clear_ssifile(SSIFILE *sfp) -{ - int i; - - if (sfp->filename != NULL) { - for (i = 0; i < sfp->nfiles; i++) - if (sfp->filename[i] != NULL) free(sfp->filename[i]); - free(sfp->filename); - } - if (sfp->fileformat != NULL) free(sfp->fileformat); - if (sfp->fileflags != NULL) free(sfp->fileflags); - if (sfp->bpl != NULL) free(sfp->bpl); - if (sfp->rpl != NULL) free(sfp->rpl); -} - - -/* Function: SSIRecommendMode() - * Date: SRE, Fri Feb 16 08:23:47 2001 [St. Louis] - * - * Purpose: Examines the file and determines whether it should be - * indexed with large file support or not; returns - * SSI_OFFSET_I32 for most files, SSI_OFFSET_I64 for large - * files, or -1 on failure. - * - * Args: file - name of file to check for size - * - * Returns: -1 on failure (including case where file is too big) - * SSI_OFFSET_I32 for most files (<= 2^31-1 bytes) - * SSI_OFFSET_I64 for large files (> 2^31-1 bytes) - */ -int -SSIRecommendMode(char *file) -{ -#if HAVE_STAT64 - struct stat64 s1; - if (stat64(file, &s1) == 0) { - if (s1.st_size <= 2146483647L) return SSI_OFFSET_I32; - else return SSI_OFFSET_I64; - } -#else - struct stat s2; - if (stat(file, &s2) == 0) { - if (s2.st_size <= 2146483647L) return SSI_OFFSET_I32; - else return SSI_OFFSET_I64; - } -#endif - return -1; -} - - -/* Function: SSICreateIndex() - * Date: SRE, Tue Jan 2 11:23:25 2001 [St. Louis] - * - * Purpose: Creates and initializes a SSI index structure. - * Sequence file offset type is specified by {mode}. - * - * Args: mode - SSI_OFFSET_I32 or SSI_OFFSET_I64, sequence file index mode. - * - * Returns: ptr to new index structure, or NULL on failure. - * Caller is responsible for free'ing the returned - * structure with SSIFreeIndex(). - */ -SSIINDEX * -SSICreateIndex(int mode) -{ - SSIINDEX *g; - - g = NULL; - if ((g = malloc(sizeof(SSIINDEX))) == NULL) goto FAILURE; - g->smode = mode; - g->imode = SSI_OFFSET_I32; /* index always starts as 32-bit; may get upgraded later */ - -#ifndef HAS_64BIT_FILE_OFFSETS - if (mode == SSI_OFFSET_I64) - Die("\ -Can't create a 64-bit SSI index on this system, sorry;\n\ -I don't have 64-bit file offset functions available.\n"); -#endif - - g->filenames = NULL; - g->fileformat = NULL; - g->bpl = NULL; - g->rpl = NULL; - g->flen = 0; - g->nfiles = 0; - - g->pkeys = NULL; - g->plen = 0; - g->nprimary = 0; - g->tot_primary = 0; - - g->skeys = NULL; - g->slen = 0; - g->nsecondary = 0; - g->tot_secondary = 0; - - g->tmpbase = NULL; - g->t1 = NULL; - g->chunkoffset = NULL; - g->nchunks = 0; - - /* temporarily disabled: sort-on-disk needs more thought! */ - /* g->max_chunk_size= maxchunk; */ - g->max_chunk_size = 999999; - - /* All mallocs must go after NULL initializations, because of the cleanup strategy; - * we'll try to free anything non-NULL if a malloc fails. - */ - /* This is temporarily disabled. Sort-on-disk needs more thought! - if ((g->tmpbase = sre_strdup(tmpfile, -1)) == NULL) goto FAILURE; - */ - - if ((g->filenames = malloc(sizeof(char *) * SSI_FILE_BLOCK)) == NULL) goto FAILURE; - if ((g->fileformat= malloc(sizeof(sqd_uint32) * SSI_FILE_BLOCK)) == NULL) goto FAILURE; - if ((g->bpl = malloc(sizeof(sqd_uint32) * SSI_FILE_BLOCK)) == NULL) goto FAILURE; - if ((g->rpl = malloc(sizeof(sqd_uint32) * SSI_FILE_BLOCK)) == NULL) goto FAILURE; - - if ((g->pkeys = malloc(sizeof(struct ssipkey_s)* SSI_KEY_BLOCK))== NULL) goto FAILURE; - if ((g->skeys = malloc(sizeof(struct ssipkey_s)* SSI_KEY_BLOCK))== NULL) goto FAILURE; - - return g; - - FAILURE: - SSIFreeIndex(g); /* free the damaged structure */ - return NULL; -} - -/* Function: SSIGetFilePosition() - * Date: SRE, Tue Jan 2 09:59:26 2001 [St. Louis] - * - * Purpose: Fills {ret_offset} with the current disk - * offset of {fp}, relative to the start of the file. - * {mode} is set to either SSI_OFFSET_I32 or - * SSI_OFFSET_I64. If {mode} is _I32 (32 bit), just wraps - * a call to ftell(); otherwise deals with system-dependent - * details of 64-bit file offsets. - * - * Args: fp - open stream - * mode - SSI_OFFSET_I32 or SSI_OFFSET_I64 - * ret_offset - RETURN: file position - * - * Returns: 0 on success. nonzero on error. - */ -int -SSIGetFilePosition(FILE *fp, int mode, SSIOFFSET *ret_offset) -{ - if (mode == SSI_OFFSET_I32) - { - ret_offset->mode = SSI_OFFSET_I32; - ret_offset->off.i32 = ftell(fp); - if (ret_offset->off.i32 == -1) return SSI_ERR_TELL_FAILED; - } - else if (mode != SSI_OFFSET_I64) abort(); /* only happens on a coding error */ - else { - ret_offset->mode = SSI_OFFSET_I64; -#ifndef HAS_64BIT_FILE_OFFSETS - return SSI_ERR_NO64BIT; -#elif defined HAVE_FTELLO && SIZEOF_OFF_T == 8 - if ((ret_offset->off.i64 = ftello(fp)) == -1) return SSI_ERR_TELL_FAILED; -#elif defined HAVE_FTELLO64 && SIZEOF_OFF64_T == 8 - if ((ret_offset->off.i64 = ftello64(fp)) == -1) return SSI_ERR_TELL_FAILED; -#elif defined HAVE_FTELL64 - if ((ret_offset->off.i64 = ftell64(fp)) == -1) return SSI_ERR_TELL_FAILED; -#elif defined ARITHMETIC_FPOS_T && SIZEOF_FPOS_T == 8 - if (fgetpos(fp, &(ret_offset->off.i64)) != 0) return SSI_ERR_TELL_FAILED; -#endif - } - return 0; -} - -/* Function: SSIAddFileToIndex() - * Date: SRE, Tue Jan 2 12:54:36 2001 [St. Louis] - * - * Purpose: Adds the sequence file {filename}, which is known to - * be in format {fmt}, to the index {g}. Creates and returns - * a unique filehandle {fh} for then associating primary keys - * with this file using SSIAddPrimaryKeyToIndex(). - * - * Args: g - active index - * filename - file to add - * fmt - format code for this file (e.g. SQFILE_FASTA) - * ret_fh - RETURN: unique handle for this file - * - * Returns: 0 on success; nonzero on error. - */ -int -SSIAddFileToIndex(SSIINDEX *g, char *filename, int fmt, int *ret_fh) -{ - int n; - - if (g->nfiles >= SSI_MAXFILES) return SSI_ERR_TOOMANY_FILES; - - n = strlen(filename); - if ((n+1) > g->flen) g->flen = n+1; - - if ((g->filenames[g->nfiles] = sre_strdup(filename, n)) == NULL) return SSI_ERR_MALLOC; - g->fileformat[g->nfiles] = fmt; - g->bpl[g->nfiles] = 0; - g->rpl[g->nfiles] = 0; - *ret_fh = g->nfiles; /* handle is simply = file number */ - g->nfiles++; - - if (g->nfiles % SSI_FILE_BLOCK == 0) { - g->filenames = realloc(g->filenames, sizeof(char *) * (g->nfiles+SSI_FILE_BLOCK)); - if (g->filenames == NULL) return SSI_ERR_MALLOC; - g->fileformat= realloc(g->fileformat,sizeof(int) * (g->nfiles+SSI_FILE_BLOCK)); - if (g->fileformat == NULL) return SSI_ERR_MALLOC; - g->bpl = realloc(g->fileformat,sizeof(int) * (g->nfiles+SSI_FILE_BLOCK)); - if (g->bpl == NULL) return SSI_ERR_MALLOC; - g->rpl = realloc(g->fileformat,sizeof(int) * (g->nfiles+SSI_FILE_BLOCK)); - if (g->rpl == NULL) return SSI_ERR_MALLOC; - } - return 0; -} - - -/* Function: SSISetFileForSubseq() - * Date: SRE, Tue Jan 9 10:02:05 2001 [St. Louis] - * - * Purpose: Set SSI_FAST_SUBSEQ for the file indicated by - * filehandle {fh} in the index {g}, setting - * parameters {bpl} and {rpl} to the values given. - * {bpl} is the number of bytes per sequence data line. - * {rpl} is the number of residues per sequence data line. - * Caller must be sure that {bpl} and {rpl} do not change - * on any line of any sequence record in the file - * (except for the last data line of each record). If - * this is not the case in this file, SSI_FAST_SUBSEQ - * will not work, and this routine should not be - * called. - * - * Args: g - the active index - * fh - handle for file to set SSI_FAST_SUBSEQ on - * bpl - bytes per data line - * rpl - residues per data line - * - * Returns: 0 on success; 1 on error. - */ -int -SSISetFileForSubseq(SSIINDEX *g, int fh, int bpl, int rpl) -{ - if (fh < 0 || fh >= g->nfiles) return SSI_ERR_BADARG; - if (bpl <= 0 || rpl <= 0) return SSI_ERR_BADARG; - g->bpl[fh] = bpl; - g->rpl[fh] = rpl; - return 0; -} - - -/* Function: SSIAddPrimaryKeyToIndex() - * Date: SRE, Tue Jan 2 11:50:54 2001 [St. Louis] - * - * Purpose: Put primary key {key} in the index {g}, while telling - * the index this primary key is in the file associated - * with filehandle {fh} (returned by a previous call - * to SSIAddFileToIndex()), and its record starts at - * position {r_off} in the file. - * - * {d_off} and {L} are optional; they may be left unset - * by passing NULL and 0, respectively. (If one is - * provided, both must be provided.) If they are provided, - * {d_off} gives the position of the first line of sequence - * data in the record, and {L} gives the length of - * the sequence in residues. They are used when - * SSI_FAST_SUBSEQ is set for this file. If SSI_FAST_SUBSEQ - * is not set for the file, {d_off} and {L} will be - * ignored by the index reading API even if they are stored - * by the index writing API, so it doesn't hurt for the - * indexing program to provide them; typically they - * won't know whether it's safe to set SSI_FAST_SUBSEQ - * for the whole file until the whole file has been - * read and every key has already been added to the index. - * - * Args: g - active index - * key - primary key to add - * fh - handle on file that this key's in - * r_off - offset to start of record - * d_off - offset to start of sequence data - * L - length of sequence, or 0 - * - * Returns: 0 on success, nonzero on error. - */ -int -SSIAddPrimaryKeyToIndex(SSIINDEX *g, char *key, int fh, - SSIOFFSET *r_off, SSIOFFSET *d_off, int L) -{ - int n; /* a string length */ - - if (fh >= SSI_MAXFILES) return SSI_ERR_TOOMANY_FILES; - if (g->nprimary >= SSI_MAXKEYS) return SSI_ERR_TOOMANY_KEYS; - if (L > 0 && d_off == NULL) abort(); /* need both. */ - - /* Before adding the key: check how big our chunk of - * index is. If it's getting too large, flush a chunk to disk tmpfile. - */ - if (current_chunk_size(g) >= g->max_chunk_size) write_index_chunk(g); - - n = strlen(key); - if ((n+1) > g->plen) g->plen = n+1; - - if ((g->pkeys[g->nprimary].key = sre_strdup(key, n)) == NULL) return SSI_ERR_MALLOC; - g->pkeys[g->nprimary].fnum = (sqd_uint16) fh; - g->pkeys[g->nprimary].r_off = *r_off; - if (d_off != NULL && L > 0) { - g->pkeys[g->nprimary].d_off = *d_off; - g->pkeys[g->nprimary].len = L; - } else { - /* yeah, this looks stupid, but look: we have to give a valid - looking, non-NULL d_off of some sort, or writes will fail. - It's going to be unused anyway. */ - g->pkeys[g->nprimary].d_off = *r_off; - g->pkeys[g->nprimary].len = 0; - } - g->pkeys[g->nprimary].handle = g->nprimary; - g->nprimary++; - - if (g->nprimary % SSI_KEY_BLOCK == 0) { - g->pkeys = realloc(g->pkeys, sizeof(struct ssipkey_s) * (g->nprimary+SSI_KEY_BLOCK)); - if (g->pkeys == NULL) return SSI_ERR_MALLOC; - } - - return 0; -} - - -/* Function: SSIAddSecondaryKeyToIndex() - * Date: SRE, Tue Jan 2 12:44:40 2001 [St. Louis] - * - * Purpose: Puts secondary key {key} in the index {g}, associating - * it with primary key {pkey} that was previously - * registered by SSIAddPrimaryKeyToIndex(). - * - * Args: g - active index - * key - secondary key to add - * pkey - primary key to associate this key with - * - * Returns: 0 on success, 1 on failure. - */ -int -SSIAddSecondaryKeyToIndex(SSIINDEX *g, char *key, char *pkey) -{ - int n; /* a string length */ - - if (g->nsecondary >= SSI_MAXKEYS) return SSI_ERR_TOOMANY_KEYS; - - n = strlen(key); - if ((n+1) > g->slen) g->slen = n+1; - - if ((g->skeys[g->nsecondary].key = sre_strdup(key, n)) == NULL) return SSI_ERR_MALLOC; - if ((g->skeys[g->nsecondary].pkey = sre_strdup(pkey, -1)) == NULL) return SSI_ERR_MALLOC; - g->nsecondary++; - - if (g->nsecondary % SSI_KEY_BLOCK == 0) { - g->skeys = realloc(g->skeys, sizeof(struct ssiskey_s) * (g->nsecondary+SSI_KEY_BLOCK)); - if (g->skeys == NULL) return SSI_ERR_MALLOC; - } - return 0; -} - - - - -/* Function: SSIWriteIndex() - * Date: SRE, Tue Jan 2 13:55:56 2001 [St. Louis] - * - * Purpose: Writes complete index {g} in SSI format to a - * binary file {file}. Does all - * the overhead of sorting the primary and secondary keys, - * and maintaining the association of secondary keys - * with primary keys during and after the sort. - * - * Args: file - file to write to - * g - index to sort & write out. - * - * Returns: 0 on success, nonzero on error. - */ -/* needed for qsort() */ -static int -pkeysort(const void *k1, const void *k2) -{ - struct ssipkey_s *key1; - struct ssipkey_s *key2; - key1 = (struct ssipkey_s *) k1; - key2 = (struct ssipkey_s *) k2; - return strcmp(key1->key, key2->key); -} -static int -skeysort(const void *k1, const void *k2) -{ - struct ssiskey_s *key1; - struct ssiskey_s *key2; - key1 = (struct ssiskey_s *) k1; - key2 = (struct ssiskey_s *) k2; - return strcmp(key1->key, key2->key); -} -int -SSIWriteIndex(char *file, SSIINDEX *g) -{ - FILE *fp; - int status; - - /* Case 1. Simple: the whole index fit in memory; write it to disk, - * we're done. - */ - if (g->t1 == NULL) { - if ((fp = fopen(file,"wb")) == NULL) return SSI_ERR_NOFILE; - status = write_index(fp, g); - fclose(fp); - g->tot_primary = g->nprimary; - g->tot_secondary = g->nsecondary; - return status; - } - - /* Case 2. Ugly: the index is big (and possibly *really* big, necessitating - * 64-bit offsets in the index itself!); we had to write the index to a tmp - * file on disk. Flush the last chunk to disk; then mergesort the chunks - * until we have one chunk to rule them all, one chunk to bind them. - */ - write_index_chunk(g); /* flush the last chunk. */ - fclose(g->t1); - - Die("oi, you haven't IMPLEMENTED the mergesort yet, dumbass."); - return 0; -} -static int -write_index(FILE *fp, SSIINDEX *g) -{ - int i; - sqd_uint32 header_flags, file_flags; - sqd_uint32 frecsize, precsize, srecsize; - sqd_uint64 foffset, poffset, soffset; - char *s, *s2; - - /* Magic-looking numbers come from adding up sizes - * of things in bytes - */ - frecsize = 16 + g->flen; - precsize = (g->smode == SSI_OFFSET_I64) ? 22+g->plen : 14+g->plen; - srecsize = g->slen + g->plen; - - header_flags = 0; - if (g->smode == SSI_OFFSET_I64) header_flags |= SSI_USE64; - if (g->imode == SSI_OFFSET_I64) header_flags |= SSI_USE64_INDEX; - - /* Magic-looking numbers again come from adding up sizes - * of things in bytes - */ - foffset = (header_flags & SSI_USE64_INDEX) ? 66 : 54; - poffset = foffset + frecsize*g->nfiles; - soffset = poffset + precsize*g->nprimary; - - /* Sort the keys - */ - qsort((void *) g->pkeys, g->nprimary, sizeof(struct ssipkey_s), pkeysort); - qsort((void *) g->skeys, g->nsecondary, sizeof(struct ssiskey_s), skeysort); - - /* Write the header - */ - if (! write_i32(fp, v20magic)) return SSI_ERR_FWRITE; - if (! write_i32(fp, header_flags)) return SSI_ERR_FWRITE; - if (! write_i16(fp, g->nfiles)) return SSI_ERR_FWRITE; - if (! write_i32(fp, g->nprimary)) return SSI_ERR_FWRITE; - if (! write_i32(fp, g->nsecondary)) return SSI_ERR_FWRITE; - if (! write_i32(fp, g->flen)) return SSI_ERR_FWRITE; - if (! write_i32(fp, g->plen)) return SSI_ERR_FWRITE; - if (! write_i32(fp, g->slen)) return SSI_ERR_FWRITE; - if (! write_i32(fp, frecsize)) return SSI_ERR_FWRITE; - if (! write_i32(fp, precsize)) return SSI_ERR_FWRITE; - if (! write_i32(fp, srecsize)) return SSI_ERR_FWRITE; - if (g->imode == SSI_OFFSET_I32) { - if (! write_i32(fp, foffset)) return SSI_ERR_FWRITE; - if (! write_i32(fp, poffset)) return SSI_ERR_FWRITE; - if (! write_i32(fp, soffset)) return SSI_ERR_FWRITE; - } else { - if (! write_i64(fp, foffset)) return SSI_ERR_FWRITE; - if (! write_i64(fp, poffset)) return SSI_ERR_FWRITE; - if (! write_i64(fp, soffset)) return SSI_ERR_FWRITE; - } - - /* The file section - */ - if ((s = malloc(sizeof(char) * g->flen)) == NULL) return SSI_ERR_MALLOC; - for (i = 0; i < g->nfiles; i++) - { - file_flags = 0; - if (g->bpl[i] > 0 && g->rpl[i] > 0) file_flags |= SSI_FAST_SUBSEQ; - - strcpy(s, g->filenames[i]); - if (fwrite(s, sizeof(char), g->flen, fp) != g->flen) return SSI_ERR_FWRITE; - if (! write_i32(fp, g->fileformat[i])) return SSI_ERR_FWRITE; - if (! write_i32(fp, file_flags)) return SSI_ERR_FWRITE; - if (! write_i32(fp, g->bpl[i])) return SSI_ERR_FWRITE; - if (! write_i32(fp, g->rpl[i])) return SSI_ERR_FWRITE; - } - free(s); - - /* The primary key section - */ - if ((s = malloc(sizeof(char) * g->plen)) == NULL) return SSI_ERR_MALLOC; - for (i = 0; i < g->nprimary; i++) - { - strcpy(s, g->pkeys[i].key); - if (fwrite(s, sizeof(char), g->plen, fp) != g->plen) return SSI_ERR_FWRITE; - if (! write_i16( fp, g->pkeys[i].fnum)) return SSI_ERR_FWRITE; - if (! write_offset(fp, &(g->pkeys[i].r_off))) return SSI_ERR_FWRITE; - if (! write_offset(fp, &(g->pkeys[i].d_off))) return SSI_ERR_FWRITE; - if (! write_i32( fp, g->pkeys[i].len)) return SSI_ERR_FWRITE; - } - - /* The secondary key section - */ - if (g->nsecondary > 0) { - if ((s2 = malloc(sizeof(char) * g->slen)) == NULL) return SSI_ERR_MALLOC; - for (i = 0; i < g->nsecondary; i++) - { - strcpy(s2, g->skeys[i].key); - strcpy(s, g->skeys[i].pkey); - if (fwrite(s2, sizeof(char), g->slen, fp) != g->slen) return SSI_ERR_FWRITE; - if (fwrite(s, sizeof(char), g->plen, fp) != g->plen) return SSI_ERR_FWRITE; - } - free(s2); - } - - free(s); - return 0; -} -static int -write_index_chunk(SSIINDEX *g) -{ - int status; - int i; - - SQD_DPRINTF1(("Writing index chunk %d to disk... \n", g->nchunks)); - - /* Save the offset for each chunk in an array; remember how many - * chunks we put into the tmp file t1. - */ - if (g->t1 == NULL) { - char *t1file = NULL; - if ((t1file = sre_strdup(g->tmpbase, -1)) == NULL) goto FAILURE; - if (sre_strcat(&t1file, -1, ".t1", 3) < 0) goto FAILURE; - if ((g->t1 = fopen(t1file, "wb")) == NULL) return SSI_ERR_NOFILE; - free(t1file); - - if ((g->chunkoffset = malloc(sizeof(fpos_t))) == NULL) goto FAILURE; - } else { - if ((g->chunkoffset = realloc(g->chunkoffset, sizeof(fpos_t) * (g->nchunks+1))) == NULL) goto FAILURE; - } - if (fgetpos(g->t1, &(g->chunkoffset[g->nchunks])) != 0) - Die("Index file size has apparently exceeded system limitations, sorry."); - g->nchunks++; - - /* Sort and append this chunk of the index to the open tmp file t1 - */ - if ((status = write_index(g->t1, g)) != 0) return status; - g->tot_primary += g->nprimary; - g->tot_secondary += g->nsecondary; - - /* Now, a partial free'ing of the index - clear the keys, but leave the files - */ - for (i = 0; i < g->nprimary; i++) free(g->pkeys[i].key); - for (i = 0; i < g->nsecondary; i++) free(g->skeys[i].key); - for (i = 0; i < g->nsecondary; i++) free(g->skeys[i].pkey); - free(g->pkeys); - free(g->skeys); - - /* Reset the primary and secondary keys sections, in preparation - * for accumulating more - */ - g->pkeys = NULL; - g->plen = 0; - g->nprimary = 0; - - g->skeys = NULL; - g->slen = 0; - g->nsecondary = 0; - - if ((g->pkeys = malloc(sizeof(struct ssipkey_s)* SSI_KEY_BLOCK))== NULL) goto FAILURE; - if ((g->skeys = malloc(sizeof(struct ssipkey_s)* SSI_KEY_BLOCK))== NULL) goto FAILURE; - return 0; - - FAILURE: - SSIFreeIndex(g); - return SSI_ERR_MALLOC; -} - - - -/* Function: SSIFreeIndex() - * Date: SRE, Tue Jan 2 11:44:08 2001 [St. Louis] - * - * Purpose: Free an index structure {g}. - * - * Args: g - ptr to an open index. - * - * Returns: (void) - */ -void -SSIFreeIndex(SSIINDEX *g) -{ - int i; - if (g != NULL) - { - for (i = 0; i < g->nfiles; i++) free(g->filenames[i]); - for (i = 0; i < g->nprimary; i++) free(g->pkeys[i].key); - for (i = 0; i < g->nsecondary; i++) free(g->skeys[i].key); - for (i = 0; i < g->nsecondary; i++) free(g->skeys[i].pkey); - if (g->filenames != NULL) free(g->filenames); - if (g->fileformat != NULL) free(g->fileformat); - if (g->bpl != NULL) free(g->bpl); - if (g->rpl != NULL) free(g->rpl); - if (g->pkeys != NULL) free(g->pkeys); - if (g->skeys != NULL) free(g->skeys); - if (g->tmpbase != NULL) free(g->tmpbase); - if (g->chunkoffset != NULL) free(g->chunkoffset); - if (g->t1 != NULL) fclose(g->t1); - free(g); - } -} - - -/* Function: SSIErrorString() - * Date: SRE, Tue Jan 2 10:38:10 2001 [St. Louis] - * - * Purpose: Returns a ptr to an internal string corresponding - * to error {n}, a code returned from any of the - * functions in the API that return non-zero on error. - * - * Args: n - error code - * - * Returns: ptr to an internal string. - */ -char * -SSIErrorString(int n) -{ - switch (n) { - case SSI_ERR_OK: return "ok (no error)"; - case SSI_ERR_NODATA: return "no data, fread() failed"; - case SSI_ERR_NO_SUCH_KEY: return "no such key"; - case SSI_ERR_MALLOC: return "out of memory, malloc() failed"; - case SSI_ERR_NOFILE: return "file not found, fopen() failed"; - case SSI_ERR_BADMAGIC: return "not a SSI file? (bad magic)"; - case SSI_ERR_BADFORMAT: return "corrupt format? unexpected data"; - case SSI_ERR_NO64BIT: return "no large file support for this system"; - case SSI_ERR_SEEK_FAILED: return "failed to reposition on disk"; - case SSI_ERR_TELL_FAILED: return "failed to get file position on disk"; - case SSI_ERR_NO_SUBSEQS: return "no fast subseq support for this seqfile"; - case SSI_ERR_RANGE: return "subseq start is out of range"; - case SSI_ERR_BADARG: return "an argument is out of range"; - default: return "unrecognized code"; - } - /*NOTREACHED*/ -} - -static int -read_i16(FILE *fp, sqd_uint16 *ret_result) -{ - sqd_uint16 result; - if (fread(&result, sizeof(sqd_uint16), 1, fp) != 1) return 0; - *ret_result = sre_ntoh16(result); - return 1; -} -static int -write_i16(FILE *fp, sqd_uint16 n) -{ - n = sre_hton16(n); - if (fwrite(&n, sizeof(sqd_uint16), 1, fp) != 1) return 0; - return 1; -} -static int -read_i32(FILE *fp, sqd_uint32 *ret_result) -{ - sqd_uint32 result; - if (fread(&result, sizeof(sqd_uint32), 1, fp) != 1) return 0; - *ret_result = sre_ntoh32(result); - return 1; -} -static int -write_i32(FILE *fp, sqd_uint32 n) -{ - n = sre_hton32(n); - if (fwrite(&n, sizeof(sqd_uint32), 1, fp) != 1) return 0; - return 1; -} -static int -read_i64(FILE *fp, sqd_uint64 *ret_result) -{ - sqd_uint64 result; - if (fread(&result, sizeof(sqd_uint64), 1, fp) != 1) return 0; - *ret_result = sre_ntoh64(result); - return 1; -} -static int -write_i64(FILE *fp, sqd_uint64 n) -{ - n = sre_hton64(n); - if (fwrite(&n, sizeof(sqd_uint64), 1, fp) != 1) return 0; - return 1; -} -static int -read_offset(FILE *fp, char mode, SSIOFFSET *ret_offset) -{ - if (mode == SSI_OFFSET_I32) { - ret_offset->mode = SSI_OFFSET_I32; - if (! read_i32(fp, &(ret_offset->off.i32))) return 0; - } else if (mode == SSI_OFFSET_I64) { - ret_offset->mode = SSI_OFFSET_I64; - if (! read_i64(fp, &(ret_offset->off.i64))) return 0; - } else return 0; - - return 1; -} -static int -write_offset(FILE *fp, SSIOFFSET *offset) -{ - if (offset->mode == SSI_OFFSET_I32) return write_i32(fp, offset->off.i32); - else if (offset->mode == SSI_OFFSET_I64) return write_i64(fp, offset->off.i64); - else abort(); - /*UNREACHED*/ - return 1; /* silence bitchy compilers */ -} - - -/* Function: binary_search() - * Date: SRE, Sun Dec 31 16:05:03 2000 [St. Louis] - * - * Purpose: Find a key in a SSI index, by a binary search - * in an alphabetically sorted list of keys. If successful, - * return 0, and the index file is positioned to read - * the rest of the data for that key. Else returns nonzero. - * - * Args: sfp - an open SSIFILE - * key - key to find - * klen - key length to allocate (plen or slen from sfp) - * base - base offset (poffset or soffset) - * recsize - size of each key record in bytes (precsize or srecsize) - * maxidx - # of keys (nprimary or nsecondary) - * - * Returns: 0 on success, and leaves file positioned for reading remaining - * data for the key. - * Nonzero on failure: - * SSI_ERR_NO_SUCH_KEY - that key's not in the index - * SSI_ERR_MALLOC - a memory allocation failure - * SSI_ERR_NODATA - an fread() failed - */ -static int -binary_search(SSIFILE *sfp, char *key, int klen, SSIOFFSET *base, - sqd_uint32 recsize, sqd_uint32 maxidx) -{ - char *name; - sqd_uint32 left, right, mid; - int cmp; - int status; - - if ((name = malloc (sizeof(char)*klen)) == NULL) return SSI_ERR_MALLOC; - left = 0; - right = maxidx; - while (1) { /* A binary search: */ - mid = (left+right) / 2; /* careful here. only works because - we limit unsigned vars to signed ranges. */ - if ((status = indexfile_position(sfp, base, recsize, mid)) != 0) - { free(name); return status; } - if (fread(name, sizeof(char), klen, sfp->fp) != klen) - { free(name); return SSI_ERR_NODATA; } - cmp = strcmp(name, key); - if (cmp == 0) break; /* found it! */ - else if (left >= right) /* oops, missed it; fail */ - { free(name); return SSI_ERR_NO_SUCH_KEY; } - else if (cmp < 0) left = mid+1; /* it's right of mid */ - else if (cmp > 0) right = mid-1; /* it's left of mid */ - } - free(name); - return 0; /* and sfp->fp is positioned... */ -} - -/* Function: indexfile_position() - * Date: SRE, Mon Jan 1 19:32:49 2001 [St. Louis] - * - * Purpose: Position the open index file {sfp} at the start - * of record {n} in a list of records that starts at - * base offset {base}, where each record takes up {l} - * bytes. (e.g. the position is byte (base + n*l)). - * - * Args: sfp - open SSIFILE - * base - offset of record 0 (e.g. sfp->foffset) - * len - size of each record in bytes (e.g. sfp->frecsize) - * n - which record to get (e.g. 0..sfp->nfiles) - * - * Returns: 0 on success, non-zero on failure. - */ -static int -indexfile_position(SSIFILE *sfp, SSIOFFSET *base, sqd_uint32 len, sqd_uint32 n) -{ - SSIOFFSET pos; - int status; - - if (base->mode == SSI_OFFSET_I32) { - pos.mode = SSI_OFFSET_I32; - pos.off.i32 = base->off.i32 + n*len; - } else if (base->mode == SSI_OFFSET_I64) { - pos.mode = SSI_OFFSET_I64; - pos.off.i64 = base->off.i64 + n*len; - } else return 0; - if ((status = SSISetFilePosition(sfp->fp, &pos)) != 0) return status; - return 0; -} - -/* Function: current_chunk_size() - * Date: SRE, Tue Feb 20 18:23:30 2001 [St. Louis] - * - * Purpose: Calculates the size of the current indexfile chunk, - * in megabytes. - */ -static sqd_uint64 -current_chunk_size(SSIINDEX *g) -{ - sqd_uint64 frecsize, precsize, srecsize; - sqd_uint64 total; - - /* Magic-looking numbers come from adding up sizes - * of things in bytes - */ - frecsize = 16 + g->flen; - precsize = (g->smode == SSI_OFFSET_I64) ? 22+g->plen : 14+g->plen; - srecsize = g->plen+g->slen; - total = (66L + /* header size, if 64bit index offsets */ - frecsize * g->nfiles + /* file section size */ - precsize * g->nprimary + /* primary key section size */ - srecsize * g->nsecondary) / /* secondary key section size */ - 1048576L; - return total; -} - - -#if 0 -static int -mergesort(SSIINDEX *g) -{ - char *infile; /* reading "tape" 1: source. */ - char *outfile; /* writing "tape" 2: destination. */ - SSIFILE *in1; /* on read, a chunk of the SSI file goes in an SSIFILE. */ - SSIFILE *in2; /* and chunk 2 goes in here. */ - FILE *outfp; /* where we're writing the merged data */ - int b; /* b, b+1 are current chunks we're merging from infile */ - char *k1, *k2; /* buffers full of keys to be merged from ch1, ch2 */ - sqd_uint32 base1, pos1, buflen1; /* buffered key input for ch1 */ - sqd_uint32 base2, pos2, buflen2; /* buffered key input for ch2 */ - sqd_uint32 maxbuf; - int status; - - /* Initializations. - */ - /* create the tmp file names */ - if ((infile = sre_strdup(g->tmpbase, -1)) == NULL) return SSI_ERR_MALLOC; - if (sre_strcat(&infile, -1, ".t1", 3) < 0) return SSI_ERR_MALLOC; - if ((outfile = sre_strdup(g->tmpbase, -1)) == NULL) return SSI_ERR_MALLOC; - if (sre_strcat(&outfile, -1, ".t2", 3) < 0) return SSI_ERR_MALLOC; - /* allocate the SSIFILEs for reading chunks */ - if ((in1 = malloc(sizeof(SSIFILE))) == NULL) return SSI_ERR_MALLOC; - if ((in2 = malloc(sizeof(SSIFILE))) == NULL) return SSI_ERR_MALLOC; - - /* Open infile for read; both chunks (in1 and in2) are read from this file, - * from different file offsets kept in g->chunkoffset[] - */ - if ((in1->fp = fopen(infile, "rb")) == NULL) return SSI_ERR_NOFILE; - in2->fp = in1->fp; - if ((outfp = fopen(outfile, "wb")) == NULL) return SSI_ERR_NOFILE; - - for (b = 0; b+1 < g->nchunks; b+=2) - { - if (fsetpos(in1->fp, &(g->chunkoffset[b])) > 0) return SSI_ERR_SEEK_FAILED; - if (fsetpos(in2->fp, &(g->chunkoffset[b+1])) > 0) return SSI_ERR_SEEK_FAILED; - - if (status = load_indexfile(in1) > 0) return status; - if (status = load_indexfile(in2) > 0) return status; - - merge_headers(g, in1, in2); - write_index_header(outfp, g); - - /* Merge the primary key section; - * do a buffered read of the pkeys from ch1 and ch2. - */ - maxbuf = 100000; - if ((k1 = malloc(sizeof(char) * (maxbuf*in1->precsize))) == NULL) return SSI_ERR_MALLOC; - if ((k2 = malloc(sizeof(char) * (maxbuf*in2->precsize))) == NULL) return SSI_ERR_MALLOC; - base1 = pos1 = buflen1 = 0; - base2 = pos2 = buflen2 = 0; - while (base1+pos1 < ch1->nprimary || base2+pos2 < ch2->nprimary) { - /* refill buffer for ch1? */ - if (pos1 == buflen1) { - base1 += buflen1; - pos1 = 0; - buflen1 = MIN(in1->nprimary - base1, maxbuf); - if (buflen1 > 0) { - if (fread(k1, sizeof(char), (buflen1*in1->precsize), in1->fp) - < buflen1*in1->precsize) - return SSI_ERR_NODATA; - } - } - /* refill buffer for ch2? */ - if (pos2 == buflen2) { - base2 += buflen2; - pos2 = 0; - buflen2 = MIN(in2->nprimary - base2, maxbuf); - if (buflen2 > 0) { - if (fread(k2, sizeof(char), (buflen1*in2->precsize), in2->fp) - < buflen2*in2->precsize) - return SSI_ERR_NODATA; - } - } - /* mergesort on keys; be careful of case where we're - out of keys in either ch1 or ch2 */ - if (base2+pos2 == ch2->nprimary || - strcmp(k1+(pos1*in1->precsize), k2+(pos2*in2->precsize))) - write_pkey(t3, &(pk1[pos1]), s); - pos1++; - } else { - write_pkey(t3, &(pk2[pos2]), s); - pos2++; - } - } - free(s); - free(pk1); - free(pk2); - - /* Merge the secondary keys; much like the primary key code above. - */ - maxbuf = 100000; - if ((sk1 = malloc(sizeof(struct ssiskey_s) * maxbuf)) == NULL) return SSI_ERR_MALLOC; - if ((sk2 = malloc(sizeof(struct ssiskey_s) * maxbuf)) == NULL) return SSI_ERR_MALLOC; - if ((s = malloc(sizeof(char) * newch->slen)) == NULL) return SSI_ERR_MALLOC; - base1 = pos1 = buflen1 = 0; - base2 = pos2 = buflen2 = 0; - while (base1+pos1 < ch1->nsecondary || base2+pos2 < ch2->nsecondary) { - /* refill buffer for ch1? */ - if (pos1 == buflen1) { - base1 += buflen1; - pos1 = 0; - buflen1 = MIN(ch1->nsecondary - base1, maxbuf); - if (buflen1 > 0) read_skeys(ch1->fp, sk1, buflen1); - } - /* refill buffer for ch2? */ - if (pos2 == buflen2) { - base2 += buflen2; - pos2 = 0; - buflen2 = MIN(ch2->nsecondary - base2, maxbuf); - if (buflen2 > 0) read_skeys(ch2->fp, sk2, buflen2); - } - /* mergesort on keys; be careful of case where we're - out of keys in either ch1 or ch2 */ - if (base2+pos2 == ch2->nsecondary || pkeysort(&(sk1[pos1]), &(sk2[pos2])) < 0) { - write_skey(t3, &(pk1[pos1]), s); - pos1++; - } else { - write_skey(t3, &(pk2[pos2]), s); - pos2++; - } - } - free(s); - free(pk1); - free(pk2); - - - - - /* clear ch1, ch2, in prep for loading new chunks */ - clear_ssifile(ch1); - clear_ssifile(ch2); - } /* end loop over chunks */ - -} -#endif - - -#ifdef MUGGINS_LETS_ME_SLEEP /* test driving code. */ -/* Minimally: - cc -g -Wall -o shiva -D MUGGINS_LETS_ME_SLEEP ssi.c sqerror.c sre_string.c types.c sre_ctype.c sre_math.c -lm -*/ - -int -main(int argc, char **argv) -{ - char name[32], accession[32]; - SSIINDEX *ssi; - int mode; - SSIOFFSET r_off, d_off; - FILE *ofp; - int i; - int fh; /* a file handle */ - int status; /* return status from a SSI call */ - - mode = SSI_OFFSET_I32; - if ((ssi = SSICreateIndex(mode)) == NULL) - Die("Failed to allocate SSI index"); - - /* Generate two FASTA files, tmp.0 and tmp.1, and index them. - */ - if ((ofp = fopen("tmp.0", "w")) == NULL) - Die("failed to open tmp.0"); - if ((status = SSIAddFileToIndex(ssi, "tmp.0", SQFILE_FASTA, &fh)) != 0) - Die("SSIAddFileToIndex() failed: %s", SSIErrorString(status)); - for (i = 0; i < 10; i++) { - if ((status = SSIGetFilePosition(ofp, mode, &r_off)) != 0) - Die("SSIGetFilePosition() failed: %s", SSIErrorString(status)); - sprintf(name, "seq%d", i); - sprintf(accession, "ac%d", i); - fprintf(ofp, ">%s [%s] Description? we don't need no steenking description.\n", - name, accession); - if ((status = SSIGetFilePosition(ofp, mode, &d_off)) != 0) - Die("SSIGetFilePosition() failed: %s", SSIErrorString(status)); - fprintf(ofp, "AAAAAAAAAA\n"); - fprintf(ofp, "CCCCCCCCCC\n"); - fprintf(ofp, "GGGGGGGGGG\n"); - fprintf(ofp, "TTTTTTTTTT\n"); - - if ((status = SSIAddPrimaryKeyToIndex(ssi, name, fh, &r_off, &d_off, 40)) != 0) - Die("SSIAddPrimaryKeyToIndex() failed: %s", SSIErrorString(status)); - if ((status = SSIAddSecondaryKeyToIndex(ssi, accession, name)) != 0) - Die("SSIAddSecondaryKeyToIndex() failed: %s", SSIErrorString(status)); - } - SSISetFileForSubseq(ssi, fh, 11, 10); - fclose(ofp); - - if ((ofp = fopen("tmp.1", "w")) == NULL) - Die("failed to open tmp.1"); - if ((status = SSIAddFileToIndex(ssi, "tmp.1", SQFILE_FASTA, &fh)) != 0) - Die("SSIAddFileToIndex() failed: %s", SSIErrorString(status)); - for (i = 10; i < 20; i++) { - if ((status = SSIGetFilePosition(ofp, mode, &r_off)) != 0) - Die("SSIGetFilePosition() failed: %s", SSIErrorString(status)); - sprintf(name, "seq%d", i); - sprintf(accession, "ac%d", i); - fprintf(ofp, ">%s [%s] i/o, i/o, it's off to disk we go.\n", - name, accession); - if ((status = SSIGetFilePosition(ofp, mode, &d_off)) != 0) - Die("SSIGetFilePosition() failed: %s", SSIErrorString(status)); - fprintf(ofp, "AAAAAAAAAA 10\n"); - fprintf(ofp, "CCCCCCCCCC 20\n"); - fprintf(ofp, "GGGGGGGGGG 30\n"); - fprintf(ofp, "TTTTTTTTTT 40\n"); - - if ((status = SSIAddPrimaryKeyToIndex(ssi, name, fh, &r_off, &d_off, 40)) != 0) - Die("SSIAddPrimaryKeyToIndex() failed: %s", SSIErrorString(status)); - if ((status = SSIAddSecondaryKeyToIndex(ssi, accession, name)) != 0) - Die("SSIAddSecondaryKeyToIndex() failed: %s", SSIErrorString(status)); - } - SSISetFileForSubseq(ssi, fh, 14, 10); - fclose(ofp); - - /* Write the index to tmp.ssi - */ - if ((status = SSIWriteIndex("tmp.ssi", ssi)) != 0) - Die("SSIWriteIndex() failed: %s", SSIErrorString(status)); - SSIFreeIndex(ssi); - - /* Now reopen the index and run some tests. - */ - exit(0); -} - - -#endif /* test driving code */ - - - diff --git a/forester/archive/RIO/others/hmmer/squid/ssi.h b/forester/archive/RIO/others/hmmer/squid/ssi.h deleted file mode 100644 index 1ecde6c..0000000 --- a/forester/archive/RIO/others/hmmer/squid/ssi.h +++ /dev/null @@ -1,193 +0,0 @@ -/***************************************************************** - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - *****************************************************************/ - -#ifndef SSIH_INCLUDED -#define SSIH_INCLUDED - -/* ssi.h - * Database indexing (SSI format support) - * CVS $Id: ssi.h,v 1.1.1.1 2005/03/22 08:34:21 cmzmasek Exp $ - * - * See: ssi_format.tex in Docs/ - */ - -#include -#include "squid.h" - -/* Limits - */ -#define SSI_MAXFILES 32767 /* 2^15-1 */ -#define SSI_MAXKEYS 2147483647L /* 2^31-1 */ - -/* typedef: SSIOFFSET - * Use the union to save space, since the two offset types are - * mutually exclusive, controlled by "mode" - */ -struct ssioffset_s { - char mode; /* GSI_OFFSET_I32, for example */ - union { - sqd_uint32 i32; /* an offset that fseek() can use */ - sqd_uint64 i64; /* an offset that e.g. fseeko64() can use */ - } off; -}; -typedef struct ssioffset_s SSIOFFSET; -#define SSI_OFFSET_I32 0 -#define SSI_OFFSET_I64 1 - -/* Structure: SSIFILE - * xref: SSI API documentation in ssi-format.tex - */ -struct ssifile_s { - FILE *fp; /* open SSI index file */ - sqd_uint32 flags; /* optional behavior flags */ - sqd_uint16 nfiles; /* number of files = 16 bit int */ - sqd_uint32 nprimary; /* number of primary keys */ - sqd_uint32 nsecondary; /* number of secondary keys */ - sqd_uint32 flen; /* length of filenames (inc '\0') */ - sqd_uint32 plen; /* length of primary keys (inc '\0') */ - sqd_uint32 slen; /* length of secondary keys (inc '\0') */ - sqd_uint32 frecsize; /* # bytes in a file record */ - sqd_uint32 precsize; /* # bytes in a primary key record */ - sqd_uint32 srecsize; /* # bytes in a secondary key record */ - SSIOFFSET foffset; /* disk offset, start of file records */ - SSIOFFSET poffset; /* disk offset, start of pri key recs */ - SSIOFFSET soffset; /* disk offset, start of sec key recs */ - - char imode; /* mode for index file offsets, 32 v. 64 bit */ - char smode; /* mode for sequence file offsets, 32 v. 64 bit */ - - /* File information: - */ - char **filename; /* list of file names [0..nfiles-1] */ - sqd_uint32 *fileformat; /* file formats */ - sqd_uint32 *fileflags; /* optional per-file behavior flags */ - sqd_uint32 *bpl; /* bytes per line in file */ - sqd_uint32 *rpl; /* residues per line in file */ -}; -typedef struct ssifile_s SSIFILE; - -/* optional per-index behavior flags in SSIFILE structure's flags: - */ -#define SSI_USE64 1<<0 /* seq offsets are 64-bit */ -#define SSI_USE64_INDEX 1<<1 /* index file offsets are 64-bit */ - -/* optional per-file behavior flags in fileflags - */ -#define SSI_FAST_SUBSEQ 1<<0 /* can do subseq lookup in this file */ - -/* Structure: SSIINDEX - * - * Used when building up an index and writing it to disk - */ -struct ssipkey_s { /* Primary key data: */ - char *key; /* key name */ - sqd_uint16 fnum; /* file number */ - SSIOFFSET r_off; /* record offset */ - SSIOFFSET d_off; /* data offset */ - sqd_uint32 len; /* sequence length */ - sqd_uint32 handle; /* handle on this key*/ -}; -struct ssiskey_s { /* Secondary key data: */ - char *key; /* secondary key name */ - char *pkey; /* primary key name */ -}; -struct ssiindex_s { - int smode; /* sequence mode: SSI_OFFSET_I32 or _I64 */ - int imode; /* index mode: SSI_OFFSET_I32 or _I64 */ - - char **filenames; - sqd_uint32 *fileformat; - sqd_uint32 *bpl; - sqd_uint32 *rpl; - sqd_uint32 flen; /* length of longest filename, inc '\0' */ - sqd_uint16 nfiles; - - struct ssipkey_s *pkeys; - sqd_uint32 plen; /* length of longest pkey, including '\0' */ - sqd_uint32 nprimary; - sqd_uint32 tot_primary; - - struct ssiskey_s *skeys; - sqd_uint32 slen; /* length of longest skey, including '\0' */ - sqd_uint32 nsecondary; - sqd_uint32 tot_secondary; - - /* The following stuff is for creating really big indexes, where - * we have to write a tmp file to disk with multiple chunks, then - * mergesort the chunks. - */ - char *tmpbase; /* root name of tmp files: .t1 and .t2 */ - FILE *t1; /* open tmp file for collecting chunks */ - fpos_t *chunkoffset; /* array of offsets to individual chunks; 0..nchunks-1 */ - int nchunks; /* total # of chunks in t1 */ - int max_chunk_size; /* maximum size of chunk to hold in memory at one time, in MB */ -}; -typedef struct ssiindex_s SSIINDEX; - -/* These control malloc and realloc chunk sizes in the index - * construction code. - */ -#define SSI_FILE_BLOCK 10 -#define SSI_KEY_BLOCK 100 - -/* Error codes set by the API - */ -#define SSI_ERR_OK 0 -#define SSI_ERR_NODATA 1 /* no data? an fread() failed */ -#define SSI_ERR_NO_SUCH_KEY 2 /* that key's not in the index */ -#define SSI_ERR_MALLOC 3 -#define SSI_ERR_NOFILE 4 /* no such file? an fopen() failed */ -#define SSI_ERR_BADMAGIC 5 /* magic number mismatch in GSIOpen() */ -#define SSI_ERR_BADFORMAT 6 /* didn't read what I expected to fread() */ -#define SSI_ERR_NO64BIT 7 /* needed 64-bit support and didn't have it */ -#define SSI_ERR_SEEK_FAILED 8 /* an fseek() (or similar) failed */ -#define SSI_ERR_TELL_FAILED 9 /* an ftell() (or similar) failed */ -#define SSI_ERR_NO_SUBSEQS 10 /* fast subseq is disallowed */ -#define SSI_ERR_RANGE 11 /* subseq requested is out of range */ -#define SSI_ERR_BADARG 12 /* something wrong with a function argument */ - -#define SSI_ERR_TOOMANY_FILES 13 /* ran out of range for files in an index */ -#define SSI_ERR_TOOMANY_KEYS 14 /* ran out of range for keys in an index */ -#define SSI_ERR_FWRITE 15 - -/* The SSI file reading API: - */ -extern int SSIOpen(char *filename, SSIFILE **ret_sfp); -extern int SSIGetOffsetByName(SSIFILE *sfp, char *key, int *ret_fh, - SSIOFFSET *ret_offset); -extern int SSIGetOffsetByNumber(SSIFILE *sfp, int n, int *ret_fh, - SSIOFFSET *ret_offset); -extern int SSIGetSubseqOffset(SSIFILE *sfp, char *key, int requested_start, - int *ret_fh, SSIOFFSET *record_offset, - SSIOFFSET *data_offset, int *ret_actual_start); -extern int SSISetFilePosition(FILE *fp, SSIOFFSET *offset); -extern int SSIFileInfo(SSIFILE *sfp, int fh, char **ret_filename, int *ret_format); -extern void SSIClose(SSIFILE *sfp); - -/* The SSI index file writing API: - */ -extern int SSIRecommendMode(char *file); -extern SSIINDEX *SSICreateIndex(int mode); -extern int SSIGetFilePosition(FILE *fp, int mode, SSIOFFSET *ret_offset); -extern int SSIAddFileToIndex(SSIINDEX *g, char *filename, int fmt, int *ret_fh); -extern int SSISetFileForSubseq(SSIINDEX *g, int fh, int bpl, int rpl); -extern int SSIAddPrimaryKeyToIndex(SSIINDEX *g, char *key, int fh, - SSIOFFSET *r_off, SSIOFFSET *d_off, - int L); -extern int SSIAddSecondaryKeyToIndex(SSIINDEX *g, char *key, char *pkey); -extern int SSIWriteIndex(char *file, SSIINDEX *g); -extern void SSIFreeIndex(SSIINDEX *g); - -/* The SSI misc. functions API: - */ -extern char *SSIErrorString(int n); - - -#endif /*SSIH_INCLUDED*/ diff --git a/forester/archive/RIO/others/hmmer/squid/stack.c b/forester/archive/RIO/others/hmmer/squid/stack.c deleted file mode 100644 index 51b8664..0000000 --- a/forester/archive/RIO/others/hmmer/squid/stack.c +++ /dev/null @@ -1,103 +0,0 @@ -/***************************************************************** - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - *****************************************************************/ - -/* stack.c - * SRE, Thu Mar 3 10:08:48 1994 - * - * Implementation of generic stack structures. - * RCS $Id: stack.c,v 1.1.1.1 2005/03/22 08:34:25 cmzmasek Exp $ - */ - -#include -#include "squid.h" - -#ifdef MEMDEBUG -#include "dbmalloc.h" -#endif - - -/************************************************************ - * intstack_s implementation. - * - * Functions: InitIntStack() - returns ptr to new stack - * PushIntStack() - (void) - * PopIntStack() - returns 1 on success, 0 if stack empty - * FreeIntStack() - returns number of elements free'd, or 0 if - * stack was empty. - * - * Implementation of the pushdown stack for storing single - * integers. - *************************************************************/ -struct intstack_s * -InitIntStack(void) -{ - struct intstack_s *stack; - - if ((stack = (struct intstack_s *) malloc (sizeof(struct intstack_s))) == NULL) - Die("Memory allocation failure at %s line %d", __FILE__, __LINE__); - stack->nxt = NULL; - return stack; -} -void -PushIntStack(struct intstack_s *stack, int data) -{ - struct intstack_s *new; - - if ((new = (struct intstack_s *) malloc (sizeof(struct intstack_s))) == NULL) - Die("Memory allocation failure at %s line %d", __FILE__, __LINE__); - new->data = data; - - new->nxt = stack->nxt; - stack->nxt = new; -} - -int -PopIntStack(struct intstack_s *stack, int *ret_data) -{ - struct intstack_s *old; - - if (stack->nxt == NULL) return 0; - - old = stack->nxt; - stack->nxt = old->nxt; - - *ret_data = old->data; - free(old); - return 1; -} - -void -ReverseIntStack(struct intstack_s *stack) -{ - struct intstack_s *old; - struct intstack_s *new; - - old = stack->nxt; - stack->nxt = NULL; - while (old != NULL) - { - new = old; /* remove one from top of old stack */ - old = old->nxt; - new->nxt = stack->nxt; /* push it onto new stack */ - stack->nxt = new; - } -} - -int -FreeIntStack( struct intstack_s *stack ) -{ - int data; - int count = 0; - - while (PopIntStack(stack, &data)) - count++; - free(stack); - return count; -} diff --git a/forester/archive/RIO/others/hmmer/squid/stockholm.c b/forester/archive/RIO/others/hmmer/squid/stockholm.c deleted file mode 100644 index f70250e..0000000 --- a/forester/archive/RIO/others/hmmer/squid/stockholm.c +++ /dev/null @@ -1,607 +0,0 @@ -/***************************************************************** - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - *****************************************************************/ - -/* stockholm.c - * SRE, Fri May 28 15:46:41 1999 - * - * Reading/writing of Stockholm format multiple sequence alignments. - * - * example of API: - * - * MSA *msa; - * FILE *fp; -- opened for write with fopen() - * MSAFILE *afp; -- opened for read with MSAFileOpen() - * - * while ((msa = ReadStockholm(afp)) != NULL) - * { - * WriteStockholm(fp, msa); - * MSAFree(msa); - * } - * - * RCS $Id: stockholm.c,v 1.1.1.1 2005/03/22 08:34:30 cmzmasek Exp $ - */ -#include -#include -#include "squid.h" -#include "msa.h" - -static int parse_gf(MSA *msa, char *buf); -static int parse_gs(MSA *msa, char *buf); -static int parse_gc(MSA *msa, char *buf); -static int parse_gr(MSA *msa, char *buf); -static int parse_comment(MSA *msa, char *buf); -static int parse_sequence(MSA *msa, char *buf); -static void actually_write_stockholm(FILE *fp, MSA *msa, int cpl); - -#ifdef TESTDRIVE_STOCKHOLM -/***************************************************************** - * stockholm.c test driver: - * cc -DTESTDRIVE_STOCKHOLM -g -O2 -Wall -o test stockholm.c msa.c gki.c sqerror.c sre_string.c file.c hsregex.c sre_math.c sre_ctype.c -lm - * - */ -int -main(int argc, char **argv) -{ - MSAFILE *afp; - MSA *msa; - char *file; - - file = argv[1]; - - if ((afp = MSAFileOpen(file, MSAFILE_STOCKHOLM, NULL)) == NULL) - Die("Couldn't open %s\n", file); - - while ((msa = ReadStockholm(afp)) != NULL) - { - WriteStockholm(stdout, msa); - MSAFree(msa); - } - - MSAFileClose(afp); - exit(0); -} -/******************************************************************/ -#endif /* testdriver */ - - -/* Function: ReadStockholm() - * Date: SRE, Fri May 21 17:33:10 1999 [St. Louis] - * - * Purpose: Parse the next alignment from an open Stockholm - * format alignment file. Return the alignment, or - * NULL if there are no more alignments in the file. - * - * Args: afp - open alignment file - * - * Returns: MSA * - an alignment object. - * caller responsible for an MSAFree() - * NULL if no more alignments - * - * Diagnostics: - * Will Die() here with a (potentially) useful message - * if a parsing error occurs - */ -MSA * -ReadStockholm(MSAFILE *afp) -{ - MSA *msa; - char *s; - int status; - - if (feof(afp->f)) return NULL; - - /* Initialize allocation of the MSA. - */ - msa = MSAAlloc(10, 0); - - /* Check the magic Stockholm header line. - * We have to skip blank lines here, else we perceive - * trailing blank lines in a file as a format error when - * reading in multi-record mode. - */ - do { - if ((s = MSAFileGetLine(afp)) == NULL) { - MSAFree(msa); - return NULL; - } - } while (IsBlankline(s)); - - if (strncmp(s, "# STOCKHOLM 1.", 14) != 0) - Die("\ -File %s doesn't appear to be in Stockholm format.\n\ -Assuming there isn't some other problem with your file (it is an\n\ -alignment file, right?), please either:\n\ - a) use the Babelfish format autotranslator option (-B, usually);\n\ - b) specify the file's format with the --informat option; or\n\ - a) reformat the alignment to Stockholm format.\n", - afp->fname); - - /* Read the alignment file one line at a time. - */ - while ((s = MSAFileGetLine(afp)) != NULL) - { - while (*s == ' ' || *s == '\t') s++; /* skip leading whitespace */ - - if (*s == '#') { - if (strncmp(s, "#=GF", 4) == 0) status = parse_gf(msa, s); - else if (strncmp(s, "#=GS", 4) == 0) status = parse_gs(msa, s); - else if (strncmp(s, "#=GC", 4) == 0) status = parse_gc(msa, s); - else if (strncmp(s, "#=GR", 4) == 0) status = parse_gr(msa, s); - else status = parse_comment(msa, s); - } - else if (strncmp(s, "//", 2) == 0) break; - else if (*s == '\n') continue; - else status = parse_sequence(msa, s); - - if (status == 0) - Die("Stockholm format parse error: line %d of file %s while reading alignment %s", - afp->linenumber, afp->fname, msa->name == NULL? "" : msa->name); - } - - if (s == NULL && msa->nseq != 0) - Die ("Didn't find // at end of alignment %s", msa->name == NULL ? "" : msa->name); - - if (s == NULL && msa->nseq == 0) { - /* probably just some junk at end of file */ - MSAFree(msa); - return NULL; - } - - MSAVerifyParse(msa); - return msa; -} - - -/* Function: WriteStockholm() - * Date: SRE, Mon May 31 19:15:22 1999 [St. Louis] - * - * Purpose: Write an alignment in standard multi-block - * Stockholm format to an open file. A wrapper - * for actually_write_stockholm(). - * - * Args: fp - file that's open for writing - * msa - alignment to write - * - * Returns: (void) - */ -void -WriteStockholm(FILE *fp, MSA *msa) -{ - actually_write_stockholm(fp, msa, 50); /* 50 char per block */ -} - -/* Function: WriteStockholmOneBlock() - * Date: SRE, Mon May 31 19:15:22 1999 [St. Louis] - * - * Purpose: Write an alignment in Pfam's single-block - * Stockholm format to an open file. A wrapper - * for actually_write_stockholm(). - * - * Args: fp - file that's open for writing - * msa - alignment to write - * - * Returns: (void) - */ -void -WriteStockholmOneBlock(FILE *fp, MSA *msa) -{ - actually_write_stockholm(fp, msa, msa->alen); /* one big block */ -} - - -/* Function: actually_write_stockholm() - * Date: SRE, Fri May 21 17:39:22 1999 [St. Louis] - * - * Purpose: Write an alignment in Stockholm format to - * an open file. This is the function that actually - * does the work. The API's WriteStockholm() - * and WriteStockholmOneBlock() are wrappers. - * - * Args: fp - file that's open for writing - * msa - alignment to write - * cpl - characters to write per line in alignment block - * - * Returns: (void) - */ -static void -actually_write_stockholm(FILE *fp, MSA *msa, int cpl) -{ - int i, j; - int len = 0; - int namewidth; - int typewidth = 0; /* markup tags are up to 5 chars long */ - int markupwidth = 0; /* #=GR, #=GC are four char wide + 1 space */ - char buf[256]; - int currpos; - char *s, *tok; - - /* Figure out how much space we need for name + markup - * to keep the alignment in register. Required by Stockholm - * spec, even though our Stockholm parser doesn't care (Erik's does). - */ - namewidth = 0; - for (i = 0; i < msa->nseq; i++) - if ((len = strlen(msa->sqname[i])) > namewidth) - namewidth = len; - - /* Figure out how much space we need for markup tags - * markupwidth = always 4 if we're doing markup: strlen("#=GR") - * typewidth = longest markup tag - */ - if (msa->ss != NULL) { markupwidth = 4; typewidth = 2; } - if (msa->sa != NULL) { markupwidth = 4; typewidth = 2; } - for (i = 0; i < msa->ngr; i++) - if ((len = strlen(msa->gr_tag[i])) > typewidth) typewidth = len; - - if (msa->rf != NULL) { markupwidth = 4; if (typewidth < 2) typewidth = 2; } - if (msa->ss_cons != NULL) { markupwidth = 4; if (typewidth < 7) typewidth = 7; } - if (msa->sa_cons != NULL) { markupwidth = 4; if (typewidth < 7) typewidth = 7; } - for (i = 0; i < msa->ngc; i++) - if ((len = strlen(msa->gc_tag[i])) > typewidth) typewidth = len; - - - /* Magic Stockholm header - */ - fprintf(fp, "# STOCKHOLM 1.0\n"); - - /* Free text comments - */ - for (i = 0; i < msa->ncomment; i++) - fprintf(fp, "# %s\n", msa->comment[i]); - if (msa->ncomment > 0) fprintf(fp, "\n"); - - /* GF section: per-file annotation - */ - if (msa->name != NULL) fprintf(fp, "#=GF ID %s\n", msa->name); - if (msa->acc != NULL) fprintf(fp, "#=GF AC %s\n", msa->acc); - if (msa->desc != NULL) fprintf(fp, "#=GF DE %s\n", msa->desc); - if (msa->au != NULL) fprintf(fp, "#=GF AU %s\n", msa->au); - if (msa->flags & MSA_SET_GA) fprintf(fp, "#=GF GA %.1f %.1f\n", msa->ga1, msa->ga2); - if (msa->flags & MSA_SET_NC) fprintf(fp, "#=GF TC %.1f %.1f\n", msa->nc1, msa->nc2); - if (msa->flags & MSA_SET_TC) fprintf(fp, "#=GF TC %.1f %.1f\n", msa->tc1, msa->tc2); - for (i = 0; i < msa->ngf; i++) - fprintf(fp, "#=GF %-5s %s\n", msa->gf_tag[i], msa->gf[i]); - fprintf(fp, "\n"); - - - /* GS section: per-sequence annotation - */ - if (msa->flags & MSA_SET_WGT) - { - for (i = 0; i < msa->nseq; i++) - fprintf(fp, "#=GS %-*.*s WT %.2f\n", namewidth, namewidth, msa->sqname[i], msa->wgt[i]); - fprintf(fp, "\n"); - } - if (msa->sqacc != NULL) - { - for (i = 0; i < msa->nseq; i++) - if (msa->sqacc[i] != NULL) - fprintf(fp, "#=GS %-*.*s AC %s\n", namewidth, namewidth, msa->sqname[i], msa->sqacc[i]); - fprintf(fp, "\n"); - } - if (msa->sqdesc != NULL) - { - for (i = 0; i < msa->nseq; i++) - if (msa->sqdesc[i] != NULL) - fprintf(fp, "#=GS %*.*s DE %s\n", namewidth, namewidth, msa->sqname[i], msa->sqdesc[i]); - fprintf(fp, "\n"); - } - for (i = 0; i < msa->ngs; i++) - { - /* Multiannotated GS tags are possible; for example, - * #=GS foo DR PDB; 1xxx; - * #=GS foo DR PDB; 2yyy; - * These are stored, for example, as: - * msa->gs[0][0] = "PDB; 1xxx;\nPDB; 2yyy;" - * and must be decomposed. - */ - for (j = 0; j < msa->nseq; j++) - if (msa->gs[i][j] != NULL) - { - s = msa->gs[i][j]; - while ((tok = sre_strtok(&s, "\n", NULL)) != NULL) - fprintf(fp, "#=GS %*.*s %5s %s\n", namewidth, namewidth, - msa->sqname[j], msa->gs_tag[i], tok); - } - fprintf(fp, "\n"); - } - - /* Alignment section: - * contains aligned sequence, #=GR annotation, and #=GC annotation - */ - for (currpos = 0; currpos < msa->alen; currpos += cpl) - { - if (currpos > 0) fprintf(fp, "\n"); - for (i = 0; i < msa->nseq; i++) - { - strncpy(buf, msa->aseq[i] + currpos, cpl); - buf[cpl] = '\0'; - fprintf(fp, "%-*.*s %s\n", namewidth+typewidth+markupwidth, namewidth+typewidth+markupwidth, - msa->sqname[i], buf); - - if (msa->ss != NULL && msa->ss[i] != NULL) { - strncpy(buf, msa->ss[i] + currpos, cpl); - buf[cpl] = '\0'; - fprintf(fp, "#=GR %-*.*s SS %s\n", namewidth, namewidth, msa->sqname[i], buf); - } - if (msa->sa != NULL && msa->sa[i] != NULL) { - strncpy(buf, msa->sa[i] + currpos, cpl); - buf[cpl] = '\0'; - fprintf(fp, "#=GR %-*.*s SA %s\n", namewidth, namewidth, msa->sqname[i], buf); - } - for (j = 0; j < msa->ngr; j++) - if (msa->gr[j][i] != NULL) { - strncpy(buf, msa->gr[j][i] + currpos, cpl); - buf[cpl] = '\0'; - fprintf(fp, "#=GR %-*.*s %5s %s\n", - namewidth, namewidth, msa->sqname[i], msa->gr_tag[j], buf); - } - } - if (msa->ss_cons != NULL) { - strncpy(buf, msa->ss_cons + currpos, cpl); - buf[cpl] = '\0'; - fprintf(fp, "#=GC %-*.*s %s\n", namewidth+typewidth, namewidth+typewidth, "SS_cons", buf); - } - - if (msa->sa_cons != NULL) { - strncpy(buf, msa->sa_cons + currpos, cpl); - buf[cpl] = '\0'; - fprintf(fp, "#=GC %-*.*s %s\n", namewidth+typewidth, namewidth+typewidth, "SA_cons", buf); - } - - if (msa->rf != NULL) { - strncpy(buf, msa->rf + currpos, cpl); - buf[cpl] = '\0'; - fprintf(fp, "#=GC %-*.*s %s\n", namewidth+typewidth, namewidth+typewidth, "RF", buf); - } - for (j = 0; j < msa->ngc; j++) { - strncpy(buf, msa->gc[j] + currpos, cpl); - buf[cpl] = '\0'; - fprintf(fp, "#=GC %-*.*s %s\n", namewidth+typewidth, namewidth+typewidth, - msa->gc_tag[j], buf); - } - } - fprintf(fp, "//\n"); -} - - - - - -/* Format of a GF line: - * #=GF - */ -static int -parse_gf(MSA *msa, char *buf) -{ - char *gf; - char *featurename; - char *text; - char *s; - - s = buf; - if ((gf = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0; - if ((featurename = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0; - if ((text = sre_strtok(&s, "\n", NULL)) == NULL) return 0; - while (*text && (*text == ' ' || *text == '\t')) text++; - - if (strcmp(featurename, "ID") == 0) - msa->name = sre_strdup(text, -1); - else if (strcmp(featurename, "AC") == 0) - msa->acc = sre_strdup(text, -1); - else if (strcmp(featurename, "DE") == 0) - msa->desc = sre_strdup(text, -1); - else if (strcmp(featurename, "AU") == 0) - msa->au = sre_strdup(text, -1); - else if (strcmp(featurename, "GA") == 0) - { - s = text; - if ((text = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0; - msa->ga1 = atof(text); - if ((text = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0; - msa->ga2 = atof(text); - msa->flags |= MSA_SET_GA; - } - else if (strcmp(featurename, "NC") == 0) - { - s = text; - if ((text = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0; - msa->nc1 = atof(text); - if ((text = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0; - msa->nc2 = atof(text); - msa->flags |= MSA_SET_NC; - } - else if (strcmp(featurename, "TC") == 0) - { - s = text; - if ((text = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0; - msa->tc1 = atof(text); - if ((text = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0; - msa->tc2 = atof(text); - msa->flags |= MSA_SET_TC; - } - else - MSAAddGF(msa, featurename, text); - - return 1; -} - - -/* Format of a GS line: - * #=GS - */ -static int -parse_gs(MSA *msa, char *buf) -{ - char *gs; - char *seqname; - char *featurename; - char *text; - int seqidx; - char *s; - - s = buf; - if ((gs = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0; - if ((seqname = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0; - if ((featurename = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0; - if ((text = sre_strtok(&s, "\n", NULL)) == NULL) return 0; - while (*text && (*text == ' ' || *text == '\t')) text++; - - /* GS usually follows another GS; guess lastidx+1 - */ - seqidx = MSAGetSeqidx(msa, seqname, msa->lastidx+1); - msa->lastidx = seqidx; - - if (strcmp(featurename, "WT") == 0) - { - msa->wgt[seqidx] = atof(text); - msa->flags |= MSA_SET_WGT; - } - - else if (strcmp(featurename, "AC") == 0) - MSASetSeqAccession(msa, seqidx, text); - - else if (strcmp(featurename, "DE") == 0) - MSASetSeqDescription(msa, seqidx, text); - - else - MSAAddGS(msa, featurename, seqidx, text); - - return 1; -} - -/* Format of a GC line: - * #=GC - */ -static int -parse_gc(MSA *msa, char *buf) -{ - char *gc; - char *featurename; - char *text; - char *s; - int len; - - s = buf; - if ((gc = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0; - if ((featurename = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0; - if ((text = sre_strtok(&s, WHITESPACE, &len)) == NULL) return 0; - - if (strcmp(featurename, "SS_cons") == 0) - sre_strcat(&(msa->ss_cons), -1, text, len); - else if (strcmp(featurename, "SA_cons") == 0) - sre_strcat(&(msa->sa_cons), -1, text, len); - else if (strcmp(featurename, "RF") == 0) - sre_strcat(&(msa->rf), -1, text, len); - else - MSAAppendGC(msa, featurename, text); - - return 1; -} - -/* Format of a GR line: - * #=GR - */ -static int -parse_gr(MSA *msa, char *buf) -{ - char *gr; - char *seqname; - char *featurename; - char *text; - int seqidx; - int len; - int j; - char *s; - - s = buf; - if ((gr = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0; - if ((seqname = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0; - if ((featurename = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0; - if ((text = sre_strtok(&s, WHITESPACE, &len)) == NULL) return 0; - - /* GR usually follows sequence it refers to; guess msa->lastidx */ - seqidx = MSAGetSeqidx(msa, seqname, msa->lastidx); - msa->lastidx = seqidx; - - if (strcmp(featurename, "SS") == 0) - { - if (msa->ss == NULL) - { - msa->ss = MallocOrDie(sizeof(char *) * msa->nseqalloc); - msa->sslen = MallocOrDie(sizeof(int) * msa->nseqalloc); - for (j = 0; j < msa->nseqalloc; j++) - { - msa->ss[j] = NULL; - msa->sslen[j] = 0; - } - } - msa->sslen[seqidx] = sre_strcat(&(msa->ss[seqidx]), msa->sslen[seqidx], text, len); - } - else if (strcmp(featurename, "SA") == 0) - { - if (msa->sa == NULL) - { - msa->sa = MallocOrDie(sizeof(char *) * msa->nseqalloc); - msa->salen = MallocOrDie(sizeof(int) * msa->nseqalloc); - for (j = 0; j < msa->nseqalloc; j++) - { - msa->sa[j] = NULL; - msa->salen[j] = 0; - } - } - msa->salen[seqidx] = sre_strcat(&(msa->sa[seqidx]), msa->salen[seqidx], text, len); - } - else - MSAAppendGR(msa, featurename, seqidx, text); - - return 1; -} - - -/* comments are simply stored verbatim, not parsed - */ -static int -parse_comment(MSA *msa, char *buf) -{ - char *s; - char *comment; - - s = buf + 1; /* skip leading '#' */ - if (*s == '\n') { *s = '\0'; comment = s; } /* deal with blank comment */ - else if ((comment = sre_strtok(&s, "\n", NULL)) == NULL) return 0; - - MSAAddComment(msa, comment); - return 1; -} - -static int -parse_sequence(MSA *msa, char *buf) -{ - char *s; - char *seqname; - char *text; - int seqidx; - int len; - - s = buf; - if ((seqname = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0; - if ((text = sre_strtok(&s, WHITESPACE, &len)) == NULL) return 0; - - /* seq usually follows another seq; guess msa->lastidx +1 */ - seqidx = MSAGetSeqidx(msa, seqname, msa->lastidx+1); - msa->lastidx = seqidx; - - msa->sqlen[seqidx] = sre_strcat(&(msa->aseq[seqidx]), msa->sqlen[seqidx], text, len); - return 1; -} - - - diff --git a/forester/archive/RIO/others/hmmer/squid/stockholm.h b/forester/archive/RIO/others/hmmer/squid/stockholm.h deleted file mode 100644 index a9cae55..0000000 --- a/forester/archive/RIO/others/hmmer/squid/stockholm.h +++ /dev/null @@ -1,51 +0,0 @@ -#ifndef STOCKHOLM_H_INCLUDED -#define STOCKHOLM_H_INCLUDED - -#include "gki.h" - -typedef struct { - int *linetype; /* e.g. STOCKHOLM_GF_LINE; always valid */ - int *featurecode; /* all markup codes: e.g. STOCKHOLM_GF_ID; - nonmarkup: always set to STOCKHOLM_UNPARSED */ - char **featurename; /* all unparsed markup codes: string, e.g. "ID"; - all other lines: NULL */ - int *seqidx; /* all GS, GR, GC, sequence lines: which sequence; - other lines: 0 */ - int *len; /* all GR, GC, sequence lines: length of text field; - other lines: 0 */ - char **text; /* all unparsed nonblank lines: rest of data - other lines: NULL */ - int nseqalloc; /* current nseqs allocated for in aseqs and ainfo */ - int nlines; /* number of lines in this skel */ - int nlinealloc; /* current # of lines allocated for in this skel */ - int overall_line; /* line # in file (important in files w/ >1 ali)*/ -} alifile_skeleton; - -#define STOCKHOLM_GF_LINE 0 -#define STOCKHOLM_GS_LINE 1 -#define STOCKHOLM_GC_LINE 2 -#define STOCKHOLM_GR_LINE 3 -#define STOCKHOLM_SEQ_LINE 4 -#define STOCKHOLM_BLANK_LINE 5 -#define STOCKHOLM_COMMENT_LINE 6 - -#define STOCKHOLM_UNPARSED 0 -#define STOCKHOLM_GF_ID 1 -#define STOCKHOLM_GF_AC 2 -#define STOCKHOLM_GF_DE 3 -#define STOCKHOLM_GF_AU 4 -#define STOCKHOLM_GF_GA 5 -#define STOCKHOLM_GF_NC 6 -#define STOCKHOLM_GF_TC 7 -#define STOCKHOLM_GS_WT 100 -#define STOCKHOLM_GS_AC 101 -#define STOCKHOLM_GS_DE 102 -#define STOCKHOLM_GC_CS 200 -#define STOCKHOLM_GC_RF 201 -#define STOCKHOLM_GR_SS 300 -#define STOCKHOLM_GR_SA 301 - -#define SKEL_NSEQLUMP 10 /* allocate for new seqs in blocks of this size */ -#define SKEL_LUMPSIZE 100 /* allocate for new lines in skel in blocks of this size */ - -#endif /*STOCKHOLM_H_INCLUDED*/ diff --git a/forester/archive/RIO/others/hmmer/squid/stopwatch.c b/forester/archive/RIO/others/hmmer/squid/stopwatch.c deleted file mode 100644 index 5f2c4bd..0000000 --- a/forester/archive/RIO/others/hmmer/squid/stopwatch.c +++ /dev/null @@ -1,307 +0,0 @@ -/************************************************************ - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - ************************************************************/ - -/* stopwatch.c - * SRE, Fri Nov 26 14:54:21 1999 [St. Louis] [HMMER] - * SRE, Thu Aug 3 08:11:52 2000 [St. Louis] [moved to SQUID] - * - * Reporting of cpu/system/elapsed time used by a process. - * thanks to Warren Gish for assistance. - * - * Basic API: - * - * Stopwatch_t *w; - * w = StopwatchCreate(); - * - * StopwatchStart(w); - * do_lots_of_stuff; - * StopwatchStop(w); - * StopwatchDisplay(stdout, "CPU time: ", w); - * - * StopwatchFree(w); - * - * Some behavior can be controlled at compile time by #define's: - * - * SRE_STRICT_ANSI: By default, stopwatch module assumes that a - * machine is POSIX-compliant (e.g. has struct tms, sys/times.h, - * and times()). If compiled with -DSRE_STRICT_ANSI, reverts to - * pure ANSI C conformant implementation. This simpler system - * won't report system times, only user and elapsed times. - * - * SRE_ENABLE_PVM: If compiled with -DSRE_ENABLE_PVM, the - * functions StopwatchPVMPack() and StopwatchPVMUnpack() - * are compiled, providing PVM communications ability. - * - * One additional compile-time configuration note: - * PTHREAD_TIMES_HACK: Linux pthreads, as of RH6.0/glibc-devel-2.1.1-6, - * appears to interact poorly with times() -- usage times in all - * but the master thread are lost. A workaround for this bug is - * to run stopwatches in each worker thread, and accumulate those - * times back into the master stopwatch using StopwatchInclude(). - * (Just like a PVM implementation has to do.) In HMMER, this - * behavior is compiled in with -DPTHREAD_TIMES_HACK. No - * changes are made in stopwatch functions themselves, though; - * all the extra code is HMMER code. See hmmcalibrate.c for - * an example. - * - * See hmmcalibrate.c for examples of more complex usage - * in dealing with pthreads and PVM. - */ - -#include -#include -#include -#ifdef SRE_ENABLE_PVM -#include -#endif - -#include "stopwatch.h" - -/* Function: format_time_string() - * Date: SRE, Fri Nov 26 15:06:28 1999 [St. Louis] - * - * Purpose: Given a number of seconds, format into - * hh:mm:ss.xx in a provided buffer. - * - * Args: buf - allocated space (128 is plenty!) - * sec - number of seconds - * do_frac - TRUE (1) to include hundredths of a sec - */ -static void -format_time_string(char *buf, double sec, int do_frac) -{ - int h, m, s, hs; - - h = (int) (sec / 3600.); - m = (int) (sec / 60.) - h * 60; - s = (int) (sec) - h * 3600 - m * 60; - if (do_frac) { - hs = (int) (sec * 100.) - h * 360000 - m * 6000 - s * 100; - sprintf(buf, "%02d:%02d:%02d.%02d", h,m,s,hs); - } else { - sprintf(buf, "%02d:%02d:%02d", h,m,s); - } -} - -/* Function: StopwatchStart() - * Date: SRE, Fri Nov 26 15:07:48 1999 [St. Louis] - * - * Purpose: Start a stopwatch. - * - * Args: w - the watch - */ -void -StopwatchStart(Stopwatch_t *w) -{ - w->t0 = time(NULL); -#ifdef SRE_STRICT_ANSI - w->cpu0 = clock(); -#else - (void) times(&(w->cpu0)); -#endif - - w->elapsed = 0.; - w->user = 0.; - w->sys = 0.; -} - -/* Function: StopwatchStop() - * Date: SRE, Fri Nov 26 15:08:16 1999 [St. Louis] - * - * Purpose: Stop a stopwatch. - * - * The implementation allows "split times": - * you can stop a watch multiple times, reporting - * times at multiple points during program - * execution. - * - * Args: w - the watch - */ -void -StopwatchStop(Stopwatch_t *w) -{ - time_t t1; -#ifdef SRE_STRICT_ANSI - clock_t cpu1; -#else - struct tms cpu1; - long clk_tck; -#endif - - t1 = time(NULL); - w->elapsed = difftime(t1, w->t0); - -#ifdef SRE_STRICT_ANSI - cpu1 = clock(); - w->user = (double) (cpu1- w->cpu0) / (double) CLOCKS_PER_SEC; - w->sys = 0.; /* no way to portably get system time in ANSI C */ - -#else /* assume we're on a POSIX system by default */ - (void) times(&cpu1); - - clk_tck = sysconf(_SC_CLK_TCK); - w->user = (double) (cpu1.tms_utime + cpu1.tms_cutime - - w->cpu0.tms_utime - w->cpu0.tms_cutime) / - (double) clk_tck; - - w->sys = (double) (cpu1.tms_stime + cpu1.tms_cstime - - w->cpu0.tms_stime - w->cpu0.tms_cstime) / - (double) clk_tck; -#endif -} - -/* Function: StopwatchInclude() - * Date: SRE, Fri Nov 26 15:09:34 1999 [St. Louis] - * - * Purpose: Merge the cpu and system times from a slave into - * a master stopwatch. Both watches must be - * stopped, and should not be stopped again unless - * You Know What You're Doing. - * - * Elapsed time is *not* merged; master is assumed - * to be keeping track of the wall clock time, - * and the slave/worker watch is ignored. - * - * Used in two cases: - * 1) PVM; merge in the stopwatch(es) from separate - * process(es) in a cluster. - * 2) Threads, for broken pthreads/times() implementations - * that lose track of cpu times used by spawned - * threads. - * - * Args: w1 - the master stopwatch - * w2 - the slave/worker watch - * - */ -void -StopwatchInclude(Stopwatch_t *w1, Stopwatch_t *w2) -{ - w1->user += w2->user; - w1->sys += w2->sys; -} - -/* Function: StopwatchAlloc(), StopwatchZero(), StopwatchCopy(), - * StopwatchFree() - * Date: SRE, Fri Nov 26 15:13:14 1999 [St. Louis] - * - * Purpose: The usual creation/manipulation/destruction routines - * for a stopwatch object. - */ -Stopwatch_t * -StopwatchCreate(void) -{ - Stopwatch_t *w; - w = malloc(sizeof(Stopwatch_t)); - return w; -} -void -StopwatchZero(Stopwatch_t *w) -{ - w->elapsed = 0.; - w->user = 0.; - w->sys = 0.; -} -void -StopwatchCopy(Stopwatch_t *w1, Stopwatch_t *w2) -{ - w1->t0 = w2->t0; -#ifdef SRE_STRICT_ANSI - w1->cpu0 = w2->cpu0; -#else - w1->cpu0.tms_utime = w2->cpu0.tms_utime; - w1->cpu0.tms_stime = w2->cpu0.tms_stime; - w1->cpu0.tms_cutime = w2->cpu0.tms_cutime; - w1->cpu0.tms_cstime = w2->cpu0.tms_cstime; -#endif - w1->elapsed = w2->elapsed; - w1->user = w2->user; - w1->sys = w2->sys; -} -void -StopwatchFree(Stopwatch_t *w) -{ - free(w); -} - - -/* Function: StopwatchDisplay() - * Date: SRE, Fri Nov 26 15:14:12 1999 [St. Louis] - * - * Purpose: Output a usage summary line from a *stopped* - * stopwatch (the times will reflect the last - * time StopwatchStop() was called.) - * - * For s = "CPU Time: " an example output line is: - * CPU Time: 142.55u 7.17s 149.72 Elapsed: 00:02:35.00 - * - * Args: fp - open file for writing (stdout, possibly) - * s - prefix for the report line - * w - a (recently stopped) stopwatch - * - */ -void -StopwatchDisplay(FILE *fp, char *s, Stopwatch_t *w) -{ - char buf[128]; /* (safely holds up to 10^14 years) */ - - if (s == NULL) - fputs("CPU Time: ", fp); - else - fputs(s, fp); - - format_time_string(buf, w->user+w->sys, 1); -#ifdef SRE_STRICT_ANSI - fprintf(fp, "%.2fu %s ", w->user, buf); -#else - fprintf(fp, "%.2fu %.2fs %s ", w->user, w->sys, buf); -#endif - - format_time_string(buf, w->elapsed, 0); - fprintf(fp, "Elapsed: %s\n", buf); -} - -#ifdef SRE_ENABLE_PVM -/* Function: StopwatchPVMPack(), StopwatchPVMUnpack() - * Date: SRE, Fri Nov 26 15:22:04 1999 [St. Louis] - * - * Purpose: Transmission of stopwatch data in a PVM - * cluster. - */ -void -StopwatchPVMPack(Stopwatch_t *w) -{ - pvm_pkdouble(&(w->elapsed), 1, 1); - pvm_pkdouble(&(w->user), 1, 1); - pvm_pkdouble(&(w->sys), 1, 1); -} -void -StopwatchPVMUnpack(Stopwatch_t *w) -{ - pvm_upkdouble(&(w->elapsed), 1, 1); - pvm_upkdouble(&(w->user), 1, 1); - pvm_upkdouble(&(w->sys), 1, 1); -} -#endif /*SRE_ENABLE_PVM*/ - - -#ifdef TESTDRIVER -int -main(int argc, char **argv) -{ - Stopwatch_t stopwatch; - - StopwatchStart(&stopwatch); - - sleep(5); - - StopwatchStop(&stopwatch); - StopwatchDisplay(stdout, "CPU Time: ", &stopwatch); -} -#endif diff --git a/forester/archive/RIO/others/hmmer/squid/stopwatch.h b/forester/archive/RIO/others/hmmer/squid/stopwatch.h deleted file mode 100644 index 4794a2e..0000000 --- a/forester/archive/RIO/others/hmmer/squid/stopwatch.h +++ /dev/null @@ -1,59 +0,0 @@ -/* stopwatch.h - * SRE, Fri Nov 26 14:54:21 1999 [St. Louis] [HMMER] - * SRE, Thu Aug 3 08:00:35 2000 [St. Louis] [moved to SQUID] - * CVS $Id: stopwatch.h,v 1.1.1.1 2005/03/22 08:34:24 cmzmasek Exp $ - * - * Header file for stopwatch.c module: - * reporting of cpu/system/elapsed time used by a process. - * See stopwatch.c comments for documentation of compile-time - * configuration options and API. - * - ***************************************************************** - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - ***************************************************************** - */ -#include -#include -#ifndef SRE_STRICT_ANSI -#include -#endif - -#ifndef STOPWATCH_H_INCLUDED -#define STOPWATCH_H_INCLUDED - -struct stopwatch_s { - time_t t0; /* Wall clock time, ANSI time() */ -#ifdef SRE_STRICT_ANSI - clock_t cpu0; /* CPU time, ANSI clock() */ -#else - struct tms cpu0; /* CPU/system time, POSIX times()*/ -#endif - - double elapsed; /* elapsed time, seconds */ - double user; /* CPU time, seconds */ - double sys; /* system time, seconds */ -}; -typedef struct stopwatch_s Stopwatch_t; - -extern void StopwatchStart(Stopwatch_t *w); -extern void StopwatchStop(Stopwatch_t *w); -extern void StopwatchInclude(Stopwatch_t *w1, Stopwatch_t *w2); -extern Stopwatch_t *StopwatchCreate(void); -extern void StopwatchZero(Stopwatch_t *w); -extern void StopwatchCopy(Stopwatch_t *w1, Stopwatch_t *w2); -extern void StopwatchFree(Stopwatch_t *w); -extern void StopwatchDisplay(FILE *fp, char *s, Stopwatch_t *w); - -#ifdef HMMER_PVM -extern void StopwatchPVMPack(Stopwatch_t *w); -extern void StopwatchPVMUnpack(Stopwatch_t *w); -#endif - -#endif /*STOPWATCH_H_INCLUDED*/ - diff --git a/forester/archive/RIO/others/hmmer/squid/test_main.c b/forester/archive/RIO/others/hmmer/squid/test_main.c deleted file mode 100644 index 1e80d54..0000000 --- a/forester/archive/RIO/others/hmmer/squid/test_main.c +++ /dev/null @@ -1,25 +0,0 @@ -/* Test of the file.c functions - * cp to ../test_main.c and "make test". - * Usage: ./test - */ - -#include -#include -#include "squid.h" - -int -main(int argc, char **argv) -{ - char *env; - char *file; - FILE *fp; - - env = argv[1]; - file = argv[2]; - - fp = EnvFileOpen(file, env); - if (fp != NULL) printf("File open succeeded\n"); - else printf("File open FAILED\n"); - - return 0; -} diff --git a/forester/archive/RIO/others/hmmer/squid/translate.c b/forester/archive/RIO/others/hmmer/squid/translate.c deleted file mode 100644 index fbf7247..0000000 --- a/forester/archive/RIO/others/hmmer/squid/translate.c +++ /dev/null @@ -1,87 +0,0 @@ -/***************************************************************** - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - *****************************************************************/ - -/* - * translate.c - functions for translating nucleic acid sequence - * created Tue Jan 12 11:27:29 1993, SRE - * - * RCS $Id: translate.c,v 1.1.1.1 2005/03/22 08:34:31 cmzmasek Exp $ - */ - -#include -#include -#include "squid.h" - - -#ifdef MEMDEBUG -#include "dbmalloc.h" -#endif - - - -/* Function: Translate(char *seq, char **code) - * - * Given a ptr to the start of a nucleic acid sequence, - * and a genetic code, translate the sequence into - * amino acid sequence. - * - * code is an array of 65 strings, representing - * the translations of the 64 codons, arranged - * in order AAA, AAC, AAG, AAU, ..., UUA, UUC, UUG, UUU. - * '*' or '***' is used to represent termination - * codons, usually. The final string, code[64], - * is the code for an ambiguous amino acid. - * - * Because of the way space is allocated for the amino - * acid sequence, the amino acid strings cannot be - * longer than 3 letters each. (I don't foresee using - * anything but the single- and triple- letter codes.) - * - * Returns a ptr to the translation string on success, - * or NULL on failure. - */ -char * -Translate(char *seq, char **code) -{ - int codon; /* index for codon */ - char *aaseq; /* RETURN: the translation */ - char *aaptr; /* ptr into aaseq */ - int i; - - if (seq == NULL) - { squid_errno = SQERR_NODATA; return NULL; } - if ((aaseq = (char *) calloc (strlen(seq) + 1, sizeof(char))) == NULL) - Die("calloc failed"); - - aaptr = aaseq; - for (; *seq != '\0' && *(seq+1) != '\0' && *(seq+2) != '\0'; seq += 3) - { - /* calculate the lookup value for - this codon */ - codon = 0; - for (i = 0; i < 3; i++) - { - codon *= 4; - switch (*(seq + i)) { - case 'A': case 'a': break; - case 'C': case 'c': codon += 1; break; - case 'G': case 'g': codon += 2; break; - case 'T': case 't': codon += 3; break; - case 'U': case 'u': codon += 3; break; - default: codon = 64; break; - } - if (codon == 64) break; - } - - strcpy(aaptr, code[codon]); - aaptr += strlen(code[codon]); - } - return aaseq; -} diff --git a/forester/archive/RIO/others/hmmer/squid/translate_main.c b/forester/archive/RIO/others/hmmer/squid/translate_main.c deleted file mode 100644 index 1de9505..0000000 --- a/forester/archive/RIO/others/hmmer/squid/translate_main.c +++ /dev/null @@ -1,226 +0,0 @@ -/***************************************************************** - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - *****************************************************************/ - -/* translate_main.c - * - * translate - create a file of all possible protein ORFs, given - * an input nucleic acid sequence - * - * - * Not currently compliant w/ HMMER API. - * - * 1.02 Thu Apr 20 16:12:41 1995 - * + incorporated into squid - * + -a, -s options added - * - * CVS $Id: translate_main.c,v 1.1.1.1 2005/03/22 08:34:27 cmzmasek Exp $ - */ - -#include -#include -#include -#include "squid.h" -#include "version.h" - -#ifdef NEED_GETOPTH -#include -#endif - -#define OPTIONS "ahl:o:qs:" - -static char usage[] = "\ -Usage: translate [-options] \n\ - Translate a nucleic acid sequence into protein ORFs.\n\ - Available options are:\n\ - -a : translate in full, with stops; no individual ORFs\n\ - -h : help; show brief usage and version info\n\ - -l : report only ORFs greater than minlen (default 20)\n\ - -o : save results in output file\n\ - -q : quiet; silence banner, for piping or redirection\n\ - -s : with -a, set stop character to \n"; - -int -main(int argc, char **argv) -{ - char *seqfile; /* name of seq file to read */ - SQFILE *seqfp; /* ptr to opened seq file */ - int format; /* format of sequence file */ - char *seq; /* ptr to current sequence */ - SQINFO sqinfo; /* sequence information */ - char *revseq; /* reverse complement of seq */ - int start, end; /* coords of ORF in current seq */ - int orfnumber; /* counter for ORFs in current seq */ - char *aaseq[6]; /* full translations in all 6 frames */ - char *orf; /* ptr to translated ORF sequence */ - char *sptr; /* ptr into orf */ - int len; /* length of an ORF */ - int frame; /* counter for frames (3..5 are reverse)*/ - - int minimum_len; /* minimum length of ORFs to print out */ - char *outfile; /* file to save output in */ - FILE *ofp; /* where to direct output */ - char stopchar; /* what to use as a stop character */ - int keepstops; /* TRUE to do six big ORFs */ - int quiet; /* TRUE to silence banner */ - - int optchar; /* option character */ - extern char *optarg; /* for getopt() */ - extern int optind; /* for getopt() */ - - /*********************************************** - * Parse the command line - ***********************************************/ - - format = SQFILE_UNKNOWN; /* autodetect by default */ - minimum_len = 20; - outfile = NULL; - stopchar = '*'; - keepstops = FALSE; - quiet = FALSE; - - while ((optchar = getopt(argc, argv, OPTIONS)) != -1) - switch (optchar) { - - case 'a': keepstops = TRUE; break; - case 'l': minimum_len = atoi(optarg); break; - case 'o': outfile = optarg; break; - case 'q': quiet = TRUE; break; - case 's': stopchar = *optarg; break; - - case 'h': - printf("translate %s, %s\n%s\n", RELEASE, RELEASEDATE, usage); - exit(EXIT_SUCCESS); - default: - Die("%s\n", usage); - } - - if (argc - optind != 1) - Die("Incorrect number of command line arguments\n%s\n", usage); - - seqfile = argv[optind]; - - /*********************************************** - * Open sequence file and output file - ***********************************************/ - - seqfp = SeqfileOpen(seqfile, format, NULL); - if (seqfp == NULL) - Die("Failed to open sequence file %s\n%s\n", - seqfile, usage); - - if (outfile != NULL) - { - if ((ofp = fopen(outfile, "w")) == NULL) - Die("Failed to open output file %s\n", outfile); - } - else - ofp = stdout; - - - /*********************************************** - * Main routine - ***********************************************/ - - if (! quiet) printf("translate %s, %s\n", RELEASE, RELEASEDATE); - - while (ReadSeq(seqfp, seqfp->format, &seq, &sqinfo)) - { - s2upper(seq); - revseq = (char *) malloc (sqinfo.len + 1); - revcomp(revseq, seq); - orfnumber = 1; - - /* Translate seq in all six frames */ - aaseq[0] = Translate(seq, stdcode1); - aaseq[1] = Translate(seq + 1, stdcode1); - aaseq[2] = Translate(seq + 2, stdcode1); - aaseq[3] = Translate(revseq, stdcode1); - aaseq[4] = Translate(revseq + 1, stdcode1); - aaseq[5] = Translate(revseq + 2, stdcode1); - - - - if (keepstops) - { /* full translation including stops */ - for (frame = 0; frame < 6; frame++) - { - fprintf(ofp, "> %s:%d", sqinfo.name, frame); - for (sptr = aaseq[frame]; *sptr; sptr++) - { - if (*sptr == '*') *sptr = stopchar; - if (! ((sptr - aaseq[frame]) % 50)) putc('\n', ofp); - putc((int) *sptr, ofp); - } - putc('\n', ofp); - } - } - else - { /* Print all decent ORF's in FASTA format */ - for (frame = 0; frame < 6; frame++) - { - /* initialize strtok on the first ORF; - termination codons are '*' symbols */ - orf = strtok(aaseq[frame], "*"); - while (orf != NULL) - { - len = strlen(orf); - if (len > minimum_len) - { - /* calculate coords */ - start = (orf - aaseq[frame]) * 3 + 1; - if (frame < 3) start += frame; /* frame corrections */ - else start -= frame-3; - - if (frame < 3) - end = start + len * 3; - else - { - start = -1 * (start - sqinfo.len - 1); - end = start - len * 3; - } - - fprintf(ofp, "> %s.%d length %d, nt %d..%d", - sqinfo.name, - orfnumber, - len, - start, - end); - - for (sptr = orf; *sptr; sptr++) - { - if (! ((sptr - orf) % 50)) - putc('\n', ofp); - putc((int) *sptr, ofp); - } - putc('\n', ofp); - - orfnumber++; - } - - /* pick off next orf */ - orf = strtok(NULL, "*"); - } - } - } - - for (frame = 0; frame < 6; frame++) - free(aaseq[frame]); - FreeSequence(seq, &sqinfo); - free(revseq); - } - - SeqfileClose(seqfp); - - /************************************************** - * Successful return to invocation environment - **************************************************/ - return 0; -} - diff --git a/forester/archive/RIO/others/hmmer/squid/types.c b/forester/archive/RIO/others/hmmer/squid/types.c deleted file mode 100644 index d1e0b16..0000000 --- a/forester/archive/RIO/others/hmmer/squid/types.c +++ /dev/null @@ -1,228 +0,0 @@ -/***************************************************************** - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - *****************************************************************/ - -/* file: types.c - * - * Finicky type checkers for strings. Return 1 (TRUE) if ok, 0 elsewise. - * Also, finicky type converters (sre_ntoh32() and friends) - * - * CVS $Id: types.c,v 1.1.1.1 2005/03/22 08:34:27 cmzmasek Exp $ - */ - -#include -#include -#include "squid.h" - -/* Function: IsInt() - * - * Returns TRUE if s points to something that atoi() will parse - * completely and convert to an integer. - */ -int -IsInt(char *s) -{ - int hex = 0; - - if (s == NULL) {squid_errno = SQERR_PARAMETER; return 0; } - - /* skip whitespace */ - while (isspace((int) (*s))) s++; - /* skip leading sign */ - if (*s == '-' || *s == '+') s++; - /* skip leading conversion signals */ - if ((strncmp(s, "0x", 2) == 0 && (int) strlen(s) > 2) || - (strncmp(s, "0X", 2) == 0 && (int) strlen(s) > 2)) - { - s += 2; - hex = 1; - } - else if (*s == '0' && (int) strlen(s) > 1) - s++; - /* examine remainder for garbage chars */ - if (!hex) - while (*s != '\0') - { - if (!isdigit((int) (*s))) return 0; - s++; - } - else - while (*s != '\0') - { - if (!isxdigit((int) (*s))) return 0; - s++; - } - - return 1; -} - - -/* Function: IsReal() - * - * Purpose: Returns TRUE if s is a string representation - * of a valid floating point number. - */ -int -IsReal(char *s) -{ - int gotdecimal = 0; - int gotexp = 0; - int gotreal = 0; - - if (s == NULL) return 0; - - while (isspace((int) (*s))) s++; /* skip leading whitespace */ - if (*s == '-' || *s == '+') s++; /* skip leading sign */ - - /* Examine remainder for garbage. Allowed one '.' and - * one 'e' or 'E'; if both '.' and e/E occur, '.' - * must be first. - */ - while (*s != '\0') - { - if (isdigit((int) (*s))) - gotreal++; - else if (*s == '.') - { - if (gotdecimal) return 0; /* can't have two */ - if (gotexp) return 0; /* e/E preceded . */ - else gotdecimal++; - } - else if (*s == 'e' || *s == 'E') - { - if (gotexp) return 0; /* can't have two */ - else gotexp++; - } - else if (isspace((int) (*s))) - break; - - s++; - } - - while (isspace((int) (*s))) s++; /* skip trailing whitespace */ - if (*s == '\0' && gotreal) return 1; - else return 0; -} - - -/* Function: Byteswap() - * - * Purpose: Swap between big-endian and little-endian. - * For example: - * int foo = 0x12345678; - * byteswap((char *) &foo, sizeof(int)); - * printf("%x\n", foo) - * gives 78563412. - * - * I don't fully understand byte-swapping issues. - * However, I have tested this on chars through floats, - * on various machines: - * SGI IRIX 4.0.5, SunOS 4.1.3, DEC Alpha OSF/1, Alliant - * - * Date: Sun Feb 12 10:26:22 1995 - */ -void -Byteswap(char *swap, int nbytes) -{ - int x; - char byte; - - for (x = 0; x < nbytes / 2; x++) - { - byte = swap[nbytes - x - 1]; - swap[nbytes - x - 1] = swap[x]; - swap[x] = byte; - } -} - - - -/* Functions: sre_ntoh16(), etc. - * Date: SRE, Sun Dec 31 11:26:53 2000 [St. Louis] - * - * Purpose: Provide functionality of ntohs(), etc; extended - * to 64-bit unsigned ints, and explicitly provided - * in case a machine doesn't have the ntohs() - * family. - * - * If we're using the host functions, - * USE_HOST_BYTESWAP_FUNCTIONS was set to 1 in - * squidconf.h, and we #define'd sre_hton16(x)=hton(x), etc. - * in squid.h. In doing this, we assumed that the - * host functions work on 16- and 32-bit unsigned quantities. - * If for some reason that's not true, set - * USE_HOST_BYTESWAP_FUNCTIONS to 0. - */ -#ifndef USE_HOST_BYTESWAP_FUNCTIONS -sqd_uint16 -sre_ntoh16(sqd_uint16 netshort) -{ -#ifdef WORDS_BIGENDIAN - return netshort; -#else - Byteswap((char *) &netshort, 2); - return netshort; -#endif -} -sqd_uint32 -sre_ntoh32(sqd_uint32 netlong) -{ -#ifdef WORDS_BIGENDIAN - return netlong; -#else - Byteswap((char *) &netlong, 4); - return netlong; -#endif -} -sqd_uint16 -sre_hton16(sqd_uint16 hostshort) -{ -#ifdef WORDS_BIGENDIAN - return hostshort; -#else - Byteswap((char *) &hostshort, 2); - return hostshort; -#endif -} -sqd_uint32 -sre_hton32(sqd_uint32 hostlong) -{ -#ifdef WORDS_BIGENDIAN - return hostlong; -#else - Byteswap((char *) &hostlong, 4); - return hostlong; -#endif -} -#endif /*USE_HOST_BYTESWAP_FUNCTIONS*/ - -sqd_uint64 -sre_ntoh64(sqd_uint64 net_int64) -{ -#ifdef WORDS_BIGENDIAN - return net_int64; -#else - Byteswap((char *) &net_int64, 8); - return net_int64; -#endif -} -sqd_uint64 -sre_hton64(sqd_uint64 host_int64) -{ -#ifdef WORDS_BIGENDIAN - return host_int64; -#else - Byteswap((char *) &host_int64, 8); - return host_int64; -#endif -} - - - - diff --git a/forester/archive/RIO/others/hmmer/squid/weight.c b/forester/archive/RIO/others/hmmer/squid/weight.c deleted file mode 100644 index d33902b..0000000 --- a/forester/archive/RIO/others/hmmer/squid/weight.c +++ /dev/null @@ -1,748 +0,0 @@ -/***************************************************************** - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - *****************************************************************/ - -/* weight.c - * SRE, Thu Mar 3 07:56:01 1994 - * - * Calculate weights for sequences in an alignment. - * RCS $Id: weight.c,v 1.1.1.1 2005/03/22 08:34:33 cmzmasek Exp $ - */ - -#include -#include -#include "squid.h" - -static void upweight(struct phylo_s *tree, int nseq, float *lwt, float *rwt, int node); -static void downweight(struct phylo_s *tree, int nseq, float *lwt, float *rwt, - float *fwt, int node); -static float simple_distance(char *s1, char *s2); -static int simple_diffmx(char **aseqs,int num, float ***ret_dmx); - -/* Function: GSCWeights() - * - * Purpose: Use Erik's tree-based algorithm to set weights for - * sequences in an alignment. upweight() and downweight() - * are derived from Graeme Mitchison's code. - * - * Args: aseq - array of (0..nseq-1) aligned sequences - * nseq - number of seqs in alignment - * alen - length of alignment - * wgt - allocated [0..nseq-1] array of weights to be returned - * - * Return: (void) - * wgt is filled in. - */ -void -GSCWeights(char **aseq, int nseq, int alen, float *wgt) -{ - float **dmx; /* distance (difference) matrix */ - struct phylo_s *tree; - float *lwt, *rwt; /* weight on left, right of this tree node */ - float *fwt; /* final weight assigned to this node */ - int i; - - /* Sanity check first - */ - if (nseq == 1) { wgt[0] = 1.0; return; } - - /* I use a simple fractional difference matrix derived by - * pairwise identity. Perhaps I should include a Poisson - * distance correction. - */ - MakeDiffMx(aseq, nseq, &dmx); - if (! Cluster(dmx, nseq, CLUSTER_MIN, &tree)) Die("Cluster() failed"); - - /* Allocations - */ - lwt = MallocOrDie (sizeof(float) * (2 * nseq - 1)); - rwt = MallocOrDie (sizeof(float) * (2 * nseq - 1)); - fwt = MallocOrDie (sizeof(float) * (2 * nseq - 1)); - - /* lwt and rwt are the total branch weight to the left and - * right of a node or sequence. They are 0..2N-2. 0..N-1 are - * the sequences; these have weight 0. N..2N-2 are the actual - * tree nodes. - */ - for (i = 0; i < nseq; i++) - lwt[i] = rwt[i] = 0.0; - /* recursively calculate rwt, lwt, starting - at node nseq (the root) */ - upweight(tree, nseq, lwt, rwt, nseq); - - /* recursively distribute weight across the - tree */ - fwt[nseq] = nseq; - downweight(tree, nseq, lwt, rwt, fwt, nseq); - /* collect the weights */ - for (i = 0; i < nseq; i++) - wgt[i] = fwt[i]; - - FMX2Free(dmx); - FreePhylo(tree, nseq); - free(lwt); free(rwt); free(fwt); -} - -static void -upweight(struct phylo_s *tree, int nseq, float *lwt, float *rwt, int node) -{ - int ld,rd; - - ld = tree[node-nseq].left; - if (ld >= nseq) upweight(tree, nseq, lwt, rwt, ld); - rd = tree[node-nseq].right; - if (rd >= nseq) upweight(tree, nseq, lwt, rwt, rd); - lwt[node] = lwt[ld] + rwt[ld] + tree[node-nseq].lblen; - rwt[node] = lwt[rd] + rwt[rd] + tree[node-nseq].rblen; -} - - -static void -downweight(struct phylo_s *tree, int nseq, float *lwt, float *rwt, float *fwt, int node) -{ - int ld,rd; - float lnum, rnum; - - ld = tree[node-nseq].left; - rd = tree[node-nseq].right; - if (lwt[node] + rwt[node] > 0.0) - { - fwt[ld] = fwt[node] * (lwt[node] / (lwt[node] + rwt[node])); - fwt[rd] = fwt[node] * (rwt[node] / (lwt[node] + rwt[node])); - } - else - { - lnum = (ld >= nseq) ? tree[ld-nseq].incnum : 1.0; - rnum = (rd >= nseq) ? tree[rd-nseq].incnum : 1.0; - fwt[ld] = fwt[node] * lnum / (lnum + rnum); - fwt[rd] = fwt[node] * rnum / (lnum + rnum); - } - - if (ld >= nseq) downweight(tree, nseq, lwt, rwt, fwt, ld); - if (rd >= nseq) downweight(tree, nseq, lwt, rwt, fwt, rd); -} - - - - -/* Function: VoronoiWeights() - * - * Purpose: Calculate weights using the scheme of Sibbald & - * Argos (JMB 216:813-818 1990). The scheme is - * slightly modified because the original algorithm - * actually doesn't work on gapped alignments. - * The sequences are assumed to be protein. - * - * Args: aseq - array of (0..nseq-1) aligned sequences - * nseq - number of sequences - * alen - length of alignment - * wgt - allocated [0..nseq-1] array of weights to be returned - * - * Return: void - * wgt is filled in. - */ -void -VoronoiWeights(char **aseq, int nseq, int alen, float *wgt) -{ - float **dmx; /* distance (difference) matrix */ - float *halfmin; /* 1/2 minimum distance to other seqs */ - char **psym; /* symbols seen in each column */ - int *nsym; /* # syms seen in each column */ - int symseen[27]; /* flags for observed syms */ - char *randseq; /* randomly generated sequence */ - int acol; /* pos in aligned columns */ - int idx; /* index in sequences */ - int symidx; /* 0..25 index for symbol */ - int i; /* generic counter */ - float min; /* minimum distance */ - float dist; /* distance between random and real */ - float challenge, champion; /* for resolving ties */ - int itscale; /* how many iterations per seq */ - int iteration; - int best; /* index of nearest real sequence */ - - /* Sanity check first - */ - if (nseq == 1) { wgt[0] = 1.0; return; } - - itscale = 50; - - /* Precalculate 1/2 minimum distance to other - * sequences for each sequence - */ - if (! simple_diffmx(aseq, nseq, &dmx)) - Die("simple_diffmx() failed"); - halfmin = MallocOrDie (sizeof(float) * nseq); - for (idx = 0; idx < nseq; idx++) - { - for (min = 1.0, i = 0; i < nseq; i++) - { - if (i == idx) continue; - if (dmx[idx][i] < min) min = dmx[idx][i]; - } - halfmin[idx] = min / 2.0; - } - Free2DArray((void **) dmx, nseq); - - /* Set up the random sequence generating model. - */ - psym = MallocOrDie (alen * sizeof(char *)); - nsym = MallocOrDie (alen * sizeof(int)); - for (acol = 0; acol < alen; acol++) - psym[acol] = MallocOrDie (27 * sizeof(char)); - -/* #ifdef ORIGINAL_SIBBALD_ALGORITHM_IS_BROKEN */ - for (acol = 0; acol < alen; acol++) - { - memset(symseen, 0, sizeof(int) * 27); - for (idx = 0; idx < nseq; idx++) - if (! isgap(aseq[idx][acol])) - { - if (isupper((int) aseq[idx][acol])) - symidx = aseq[idx][acol] - 'A'; - else - symidx = aseq[idx][acol] - 'a'; - if (symidx >= 0 && symidx < 26) - symseen[symidx] = 1; - } - else - symseen[26] = 1; /* a gap */ - - for (nsym[acol] = 0, i = 0; i < 26; i++) - if (symseen[i]) - { - psym[acol][nsym[acol]] = 'A'+i; - nsym[acol]++; - } - if (symseen[26]) { psym[acol][nsym[acol]] = ' '; nsym[acol]++; } - } -/* #endif ORIGINAL_SIBBALD_ALGORITHM_IS_BROKEN */ - - /* Note: the original Sibbald&Argos algorithm calls for - * bounding the sampled space using a template-like random - * sequence generator. However, this leads to one minor - * and one major problem. The minor problem is that - * exceptional amino acids in a column can have a - * significant effect by altering the amount of sampled - * sequence space; the larger the data set, the worse - * this problem becomes. The major problem is that - * there is no reasonable way to deal with gaps. - * Gapped sequences simply inhabit a different dimensionality - * and it's pretty painful to imagine calculating Voronoi - * volumes when the N in your N-space is varying. - * Note that all the examples shown by Sibbald and Argos - * are *ungapped* examples. - * - * The best way I've found to circumvent this problem is - * just not to bound the sampled space; count gaps as - * symbols and generate completely random sequences. - */ -#ifdef ALL_SEQUENCE_SPACE - for (acol = 0; acol < alen; acol++) - { - strcpy(psym[acol], "ACDEFGHIKLMNPQRSTVWY "); - nsym[acol] = 21; - } -#endif - - /* Sibbald and Argos algorithm: - * 1) assign all seqs weight 0. - * 2) generate a "random" sequence - * 3) calculate distance to every other sequence - * (if we get a distance < 1/2 minimum distance - * to other real seqs, we can stop) - * 4) if unique closest sequence, increment its weight 1. - * if multiple closest seq, choose one randomly - * 5) repeat 2-4 for lots of iterations - * 6) normalize all weights to sum to nseq. - */ - randseq = MallocOrDie ((alen+1) * sizeof(char)); - - best = 42.; /* solely to silence GCC uninit warnings. */ - FSet(wgt, nseq, 0.0); - for (iteration = 0; iteration < itscale * nseq; iteration++) - { - for (acol = 0; acol < alen; acol++) - randseq[acol] = (nsym[acol] == 0) ? ' ' : psym[acol][CHOOSE(nsym[acol])]; - randseq[acol] = '\0'; - - champion = sre_random(); - for (min = 1.0, idx = 0; idx < nseq; idx++) - { - dist = simple_distance(aseq[idx], randseq); - if (dist < halfmin[idx]) - { - best = idx; - break; - } - if (dist < min) - { champion = sre_random(); best = idx; min = dist; } - else if (dist == min) - { - challenge = sre_random(); - if (challenge > champion) - { champion = challenge; best = idx; min = dist; } - } - } - wgt[best] += 1.0; - } - - for (idx = 0; idx < nseq; idx++) - wgt[idx] = wgt[idx] / (float) itscale; - - free(randseq); - free(nsym); - free(halfmin); - Free2DArray((void **) psym, alen); -} - - -/* Function: simple_distance() - * - * Purpose: For two identical-length null-terminated strings, return - * the fractional difference between them. (0..1) - * (Gaps don't count toward anything.) - */ -static float -simple_distance(char *s1, char *s2) -{ - int diff = 0; - int valid = 0; - - for (; *s1 != '\0'; s1++, s2++) - { - if (isgap(*s1) || isgap(*s2)) continue; - if (*s1 != *s2) diff++; - valid++; - } - return (valid > 0 ? ((float) diff / (float) valid) : 0.0); -} - -/* Function: simple_diffmx() - * - * Purpose: Given a set of flushed, aligned sequences, construct - * an NxN fractional difference matrix using the - * simple_distance rule. - * - * Args: aseqs - flushed, aligned sequences - * num - number of aseqs - * ret_dmx - RETURN: difference matrix (caller must free) - * - * Return: 1 on success, 0 on failure. - */ -static int -simple_diffmx(char **aseqs, - int num, - float ***ret_dmx) -{ - float **dmx; /* RETURN: distance matrix */ - int i,j; /* counters over sequences */ - - /* Allocate - */ - if ((dmx = (float **) malloc (sizeof(float *) * num)) == NULL) - Die("malloc failed"); - for (i = 0; i < num; i++) - if ((dmx[i] = (float *) malloc (sizeof(float) * num)) == NULL) - Die("malloc failed"); - - /* Calculate distances, symmetric matrix - */ - for (i = 0; i < num; i++) - for (j = i; j < num; j++) - dmx[i][j] = dmx[j][i] = simple_distance(aseqs[i], aseqs[j]); - - /* Return - */ - *ret_dmx = dmx; - return 1; -} - - - -/* Function: BlosumWeights() - * Date: SRE, Fri Jul 16 17:33:59 1999 (St. Louis) - * - * Purpose: Assign weights to a set of aligned sequences - * using the BLOSUM rule: - * - do single linkage clustering at some pairwise identity - * - in each cluster, give each sequence 1/clustsize - * total weight. - * - * The clusters have no pairwise link >= maxid. - * - * O(N) in memory. Probably ~O(NlogN) in time; O(N^2) - * in worst case, which is no links between sequences - * (e.g., values of maxid near 1.0). - * - * Args: aseqs - alignment - * nseq - number of seqs in alignment - * alen - # of columns in alignment - * maxid - fractional identity (e.g. 0.62 for BLOSUM62) - * wgt - [0..nseq-1] array of weights to be returned - */ -void -BlosumWeights(char **aseqs, int nseq, int alen, float maxid, float *wgt) -{ - int *c, nc; - int *nmem; /* number of seqs in each cluster */ - int i; /* loop counter */ - - SingleLinkCluster(aseqs, nseq, alen, maxid, &c, &nc); - - FSet(wgt, nseq, 1.0); - nmem = MallocOrDie(sizeof(int) * nc); - - for (i = 0; i < nc; i++) nmem[i] = 0; - for (i = 0; i < nseq; i++) nmem[c[i]]++; - for (i = 0; i < nseq; i++) wgt[i] = 1. / (float) nmem[c[i]]; - - free(nmem); - free(c); - return; -} - - -/* Function: PositionBasedWeights() - * Date: SRE, Fri Jul 16 17:47:22 1999 [St. Louis] - * - * Purpose: Implementation of Henikoff and Henikoff position-based - * weights (JMB 243:574-578, 1994) [Henikoff94b]. - * - * A significant advantage of this approach that Steve and Jorja - * don't point out is that it is O(N) in memory, unlike - * many other approaches like GSC weights or Voronoi. - * - * A potential disadvantage that they don't point out - * is that in the theoretical limit of infinite sequences - * in the alignment, weights go flat: eventually every - * column has at least one representative of each of 20 aa (or 4 nt) - * in it. - * - * They also don't give a rule for how to handle gaps. - * The rule used here seems the obvious and sensible one - * (ignore them). This means that longer sequences - * initially get more weight; hence a "double - * normalization" in which the weights are first divided - * by sequence length (to compensate for that effect), - * then normalized to sum to nseq. - * - * Limitations: - * Implemented in a way that's alphabet-independent: - * it uses the 26 upper case letters as "residues". - * Any alphabetic character in aseq is interpreted as - * a unique "residue" (case insensitively; lower case - * mapped to upper case). All other characters are - * interpreted as gaps. - * - * This way, we don't have to pass around any alphabet - * type info (DNA vs. RNA vs. protein) and don't have - * to deal with remapping IUPAC degenerate codes - * probabilistically. However, on the down side, - * a sequence with a lot of degenerate IUPAC characters - * will get an artifactually high PB weight. - * - * Args: aseq - sequence alignment to weight - * nseq - number of sequences in alignment - * alen - length of alignment - * wgt - RETURN: weights filled in (pre-allocated 0..nseq-1) - * - * Returns: (void) - * wgt is allocated (0..nseq-1) by caller, and filled in here. - */ -void -PositionBasedWeights(char **aseq, int nseq, int alen, float *wgt) -{ - int rescount[26]; /* count of A-Z residues in a column */ - int nres; /* number of different residues in col */ - int idx, pos; /* indices into aseq */ - int x; - float norm; - - FSet(wgt, nseq, 0.0); - for (pos = 0; pos < alen; pos++) - { - for (x = 0; x < 26; x++) rescount[x] = 0; - for (idx = 0; idx < nseq; idx++) - if (isalpha(aseq[idx][pos])) - rescount[toupper(aseq[idx][pos]) - 'A'] ++; - - nres = 0; - for (x = 0; x < 26; x++) - if (rescount[x] > 0) nres++; - - for (idx = 0; idx < nseq; idx++) - if (isalpha(aseq[idx][pos])) - wgt[idx] += 1. / (float) (nres * rescount[toupper(aseq[idx][pos]) - 'A']); - } - - for (idx = 0; idx < nseq; idx++) - wgt[idx] /= (float) DealignedLength(aseq[idx]); - norm = (float) nseq / FSum(wgt, nseq); - FScale(wgt, nseq, norm); - return; -} - - - - -/* Function: FilterAlignment() - * Date: SRE, Wed Jun 30 09:19:30 1999 [St. Louis] - * - * Purpose: Constructs a new alignment by removing near-identical - * sequences from a given alignment (where identity is - * calculated *based on the alignment*). - * Does not affect the given alignment. - * Keeps earlier sequence, discards later one. - * - * Usually called as an ad hoc sequence "weighting" mechanism. - * - * Limitations: - * Unparsed Stockholm markup is not propagated into the - * new alignment. - * - * Args: msa -- original alignment - * cutoff -- fraction identity cutoff. 0.8 removes sequences > 80% id. - * ret_new -- RETURN: new MSA, usually w/ fewer sequences - * - * Return: (void) - * ret_new must be free'd by caller: MSAFree(). - */ -void -FilterAlignment(MSA *msa, float cutoff, MSA **ret_new) -{ - int nnew; /* number of seqs in new alignment */ - int *list; - int *useme; - float ident; - int i,j; - int remove; - - /* find which seqs to keep (list) */ - /* diff matrix; allow ragged ends */ - list = MallocOrDie (sizeof(int) * msa->nseq); - useme = MallocOrDie (sizeof(int) * msa->nseq); - for (i = 0; i < msa->nseq; i++) useme[i] = FALSE; - - nnew = 0; - for (i = 0; i < msa->nseq; i++) - { - remove = FALSE; - for (j = 0; j < nnew; j++) - { - ident = PairwiseIdentity(msa->aseq[i], msa->aseq[list[j]]); - if (ident > cutoff) - { - remove = TRUE; - printf("removing %12s -- fractional identity %.2f to %s\n", - msa->sqname[i], ident, - msa->sqname[list[j]]); - break; - } - } - if (remove == FALSE) { - list[nnew++] = i; - useme[i] = TRUE; - } - } - - MSASmallerAlignment(msa, useme, ret_new); - free(list); - free(useme); - return; -} - - -/* Function: SampleAlignment() - * Date: SRE, Wed Jun 30 10:13:56 1999 [St. Louis] - * - * Purpose: Constructs a new, smaller alignment by sampling a given - * number of sequences at random. Does not change the - * alignment nor the order of the sequences. - * - * If you ask for a sample that is larger than nseqs, - * it silently returns the original alignment. - * - * Not really a weighting method, but this is as good - * a place as any to keep it, since it's similar in - * construction to FilterAlignment(). - * - * Args: msa -- original alignment - * sample -- number of sequences in new alignment (0 < sample <= nseq) - * ret_new -- RETURN: new MSA - * - * Return: (void) - * ret_new must be free'd by caller: MSAFree(). - */ -void -SampleAlignment(MSA *msa, int sample, MSA **ret_new) -{ - int *list; /* array for random selection w/o replace */ - int *useme; /* array of flags 0..nseq-1: TRUE to use */ - int i, idx; - int len; - - /* Allocations - */ - list = (int *) MallocOrDie (sizeof(int) * msa->nseq); - useme = (int *) MallocOrDie (sizeof(int) * msa->nseq); - for (i = 0; i < msa->nseq; i++) - { - list[i] = i; - useme[i] = FALSE; - } - - /* Sanity check. - */ - if (sample >= msa->nseq) sample = msa->nseq; - - /* random selection w/o replacement */ - for (len = msa->nseq, i = 0; i < sample; i++) - { - idx = CHOOSE(len); - printf("chose %d: %s\n", list[idx], msa->sqname[list[idx]]); - useme[list[idx]] = TRUE; - list[idx] = list[--len]; - } - - MSASmallerAlignment(msa, useme, ret_new); - free(list); - free(useme); - return; -} - - -/* Function: SingleLinkCluster() - * Date: SRE, Fri Jul 16 15:02:57 1999 [St. Louis] - * - * Purpose: Perform simple single link clustering of seqs in a - * sequence alignment. A pairwise identity threshold - * defines whether two sequences are linked or not. - * - * Important: runs in O(N) memory, unlike standard - * graph decomposition algorithms that use O(N^2) - * adjacency matrices or adjacency lists. Requires - * O(N^2) time in worst case (which is when you have - * no links at all), O(NlogN) in "average" - * case, and O(N) in best case (when there is just - * one cluster in a completely connected graph. - * - * (Developed because hmmbuild could no longer deal - * with GP120, a 16,013 sequence alignment.) - * - * Limitations: - * CASE-SENSITIVE. Assumes aseq have been put into - * either all lower or all upper case; or at least, - * within a column, there's no mixed case. - * - * Algorithm: - * I don't know if this algorithm is published. I - * haven't seen it in graph theory books, but that might - * be because it's so obvious that nobody's bothered. - * - * In brief, we're going to do a breadth-first search - * of the graph, and we're going to calculate links - * on the fly rather than precalculating them into - * some sort of standard adjacency structure. - * - * While working, we keep two stacks of maximum length N: - * a : list of vertices that are still unconnected. - * b : list of vertices that we've connected to - * in our current breadth level, but we haven't - * yet tested for other connections to a. - * The current length (number of elements in) a and b are - * kept in na, nb. - * - * We store our results in an array of length N: - * c : assigns each vertex to a component. for example - * c[4] = 1 means that vertex 4 is in component 1. - * nc is the number of components. Components - * are numbered from 0 to nc-1. We return c and nc - * to our caller. - * - * The algorithm is: - * - * Initialisation: - * a <-- all the vertices - * na <-- N - * b <-- empty set - * nb <-- 0 - * nc <-- 0 - * - * Then: - * while (a is not empty) - * pop a vertex off a, push onto b - * while (b is not empty) - * pop vertex v off b - * assign c[v] = nc - * for each vertex w in a: - * compare v,w. If w is linked to v, remove w - * from a, push onto b. - * nc++ - * q.e.d. :) - * - * Args: aseq - aligned sequences - * nseq - number of sequences in aseq - * alen - alignment length - * maxid - fractional identity threshold 0..1. if id >= maxid, seqs linked - * ret_c - RETURN: 0..nseq-1 assignments of seqs to components (clusters) - * ret_nc - RETURN: number of components - * - * Returns: void. - * ret_c is allocated here. Caller free's with free(*ret_c) - */ -void -SingleLinkCluster(char **aseq, int nseq, int alen, float maxid, - int **ret_c, int *ret_nc) -{ - int *a, na; /* stack of available vertices */ - int *b, nb; /* stack of working vertices */ - int *c; /* array of results */ - int nc; /* total number of components */ - int v,w; /* index of a working vertices */ - int i; /* loop counter */ - - /* allocations and initializations - */ - a = MallocOrDie (sizeof(int) * nseq); - b = MallocOrDie (sizeof(int) * nseq); - c = MallocOrDie (sizeof(int) * nseq); - for (i = 0; i < nseq; i++) a[i] = i; - na = nseq; - nb = 0; - nc = 0; - - /* Main algorithm - */ - while (na > 0) - { - v = a[na-1]; na--; /* pop a vertex off a, */ - b[nb] = v; nb++; /* and push onto b */ - while (nb > 0) - { - v = b[nb-1]; nb--; /* pop vertex off b */ - c[v] = nc; /* assign it to component nc */ - for (i = na-1; i >= 0; i--)/* backwards, becase of deletion/swapping we do*/ - if (simple_distance(aseq[v], aseq[a[i]]) < 1. - maxid) /* linked? */ - { - w = a[i]; a[i] = a[na-1]; na--; /* delete w from a (note swap) */ - b[nb] = w; nb++; /* push w onto b */ - } - } - nc++; - } - - /* Cleanup and return - */ - free(a); - free(b); - *ret_c = c; - *ret_nc = nc; - return; -} diff --git a/forester/archive/RIO/others/hmmer/squid/weight_main.c b/forester/archive/RIO/others/hmmer/squid/weight_main.c deleted file mode 100644 index 6bc3d65..0000000 --- a/forester/archive/RIO/others/hmmer/squid/weight_main.c +++ /dev/null @@ -1,187 +0,0 @@ -/***************************************************************** - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - *****************************************************************/ - -/* weight_main.c - * SRE, Thu Mar 3 13:43:39 1994 - * - * Calculate weights for a sequence alignment. - * CVS $Id: weight_main.c,v 1.1.1.1 2005/03/22 08:34:30 cmzmasek Exp $ - */ - -#include -#include -#include -#include - -#include "squid.h" -#include "msa.h" - -static char banner[] = "weight - calculate sequence weights for an alignment"; - -static char usage[] = "\ -Usage: weight [-options] \n\ - Available options:\n\ - -b : use BLOSUM weighting scheme at fractional identity\n\ - -f : filter out seqs w/ fractional ident > [0-1]\n\ - -h : help; print version and usage info\n\ - -o : save weight-annotated alignment in \n\ - -p : use position based weight scheme (Henikoff & Henikoff)\n\ - -s : sample sequences at random into a new alignment\n\ - -v : use Voronoi weight scheme (Sibbald & Argos) \n\ -"; - -static char experts[] = "\ - Expert options:\n\ - --informat : specify alignment file format \n\ - allowed formats: SELEX, MSF, Clustal, a2m, PHYLIP\n\ - --quiet : suppress verbose banner\n\ -"; - -static struct opt_s OPTIONS[] = { - { "-b", TRUE, sqdARG_FLOAT }, - { "-f", TRUE, sqdARG_FLOAT }, - { "-h", TRUE, sqdARG_NONE }, - { "-o", TRUE, sqdARG_STRING }, - { "-p", TRUE, sqdARG_NONE }, - { "-s", TRUE, sqdARG_INT }, - { "-v", TRUE, sqdARG_NONE }, - { "--informat", FALSE, sqdARG_STRING }, - { "--quiet", FALSE, sqdARG_NONE }, -}; -#define NOPTIONS (sizeof(OPTIONS) / sizeof(struct opt_s)) - -int -main(int argc, char **argv) -{ - char *seqfile; /* file containing aligned seqs */ - MSAFILE *afp; /* pointer to open alignment file */ - MSA *msa; /* multiple sequence alignment */ - int fmt; /* expected format of alignment file */ - int idx; - char *outfile; /* output file for weighted alignment */ - FILE *ofp; /* open outfile */ - - int do_voronoi; /* use Sibbald/Argos Voronoi scheme */ - int do_blosum; /* use BLOSUM weighting scheme */ - int do_pbased; /* use position-based weights */ - int do_filter; /* use filtering scheme */ - float idlevel; /* identity level to filter at, [0-1] */ - int samplesize; /* if >0, don't weight, random sample */ - int be_quiet; /* TRUE to suppress banner */ - - char *optname; /* name of option found by Getopt() */ - char *optarg; /* argument found by Getopt() */ - int optind; /* index in argv[] */ - - /*********************************************** - * Parse command line - ***********************************************/ - - fmt = MSAFILE_UNKNOWN; /* autodetect file format by default */ - outfile = NULL; - do_blosum = FALSE; - do_voronoi = FALSE; - do_pbased = FALSE; - do_filter = FALSE; - samplesize = 0; - be_quiet = FALSE; - idlevel = 0.; /* just to suppress gcc uninit warnings */ - - while (Getopt(argc, argv, OPTIONS, NOPTIONS, usage, - &optind, &optname, &optarg)) - { - if (strcmp(optname, "-b") == 0) - { do_blosum = TRUE; idlevel = atof(optarg); } - else if (strcmp(optname, "-f") == 0) - { do_filter = TRUE; idlevel = atof(optarg); } - else if (strcmp(optname, "-o") == 0) outfile = optarg; - else if (strcmp(optname, "-p") == 0) do_pbased = TRUE; - else if (strcmp(optname, "-s") == 0) samplesize = atoi(optarg); - else if (strcmp(optname, "-v") == 0) do_voronoi = TRUE; - else if (strcmp(optname, "--quiet") == 0) be_quiet = TRUE; - else if (strcmp(optname, "--informat") == 0) { - fmt = String2SeqfileFormat(optarg); - if (fmt == MSAFILE_UNKNOWN) - Die("unrecognized sequence file format \"%s\"", optarg); - if (! IsAlignmentFormat(fmt)) - Die("%s is an unaligned format, can't read as an alignment", optarg); - } - else if (strcmp(optname, "-h") == 0) - { - Banner(stdout, banner); - puts(usage); - puts(experts); - exit(EXIT_SUCCESS); - } - } - - if (argc -optind != 1) - Die("Wrong number of arguments specified on command line\n%s\n", usage); - seqfile = argv[optind]; - - if (outfile == NULL) - ofp = stdout; - else if ((ofp = fopen(outfile, "w")) == NULL) - Die("Failed to open alignment output file %s", outfile); - - if (do_voronoi + do_pbased + do_blosum + do_filter + samplesize > 1) - Die("Choose only one weighting scheme, please.\n%s\n", usage); - - if (do_voronoi || samplesize > 0) - sre_srandom(time(0)); - - if (! be_quiet) - Banner(stdout, banner); - - /*********************************************** - * Open the input alignment file and start... - * be prepared to deal with multiple entries in Stockholm files - ***********************************************/ - - if ((afp = MSAFileOpen(seqfile, fmt, NULL)) == NULL) - Die("Alignment file %s could not be opened for reading", seqfile); - - while ((msa = MSAFileRead(afp)) != NULL) - { - for (idx = 0; idx < msa->nseq; idx++) - s2upper(msa->aseq[idx]); - - if (do_filter || samplesize > 0) - { - MSA *new; - - if (do_filter) - FilterAlignment(msa, idlevel, &new); - else if (samplesize > 0) - SampleAlignment(msa, samplesize, &new); - - if (new != NULL) { - WriteStockholm(ofp, new); - MSAFree(msa); - MSAFree(new); - } - } - else - { - if (do_voronoi) VoronoiWeights(msa->aseq, msa->nseq, msa->alen, msa->wgt); - else if (do_blosum) BlosumWeights(msa->aseq, msa->nseq, msa->alen, idlevel, msa->wgt); - else if (do_pbased) PositionBasedWeights(msa->aseq, msa->nseq, msa->alen, msa->wgt); - else GSCWeights (msa->aseq, msa->nseq, msa->alen, msa->wgt); - - msa->flags |= MSA_SET_WGT; - WriteStockholm(ofp, msa); - MSAFree(msa); - } - } - MSAFileClose(afp); - fclose(ofp); - return EXIT_SUCCESS; -} - diff --git a/forester/archive/RIO/others/hmmer/src/Makefile.in b/forester/archive/RIO/others/hmmer/src/Makefile.in deleted file mode 100644 index 8113e57..0000000 --- a/forester/archive/RIO/others/hmmer/src/Makefile.in +++ /dev/null @@ -1,128 +0,0 @@ -############################################################ -# Makefile for HMMER src directory -# CVS $Id: Makefile.in,v 1.1.1.1 2005/03/22 08:34:05 cmzmasek Exp $ -########### -# HMMER - Biological sequence analysis with profile HMMs -# Copyright (C) 1992-1999 Washington University School of Medicine -# All Rights Reserved -# -# This source code is distributed under the terms of the -# GNU General Public License. See the files COPYING and LICENSE -# for details. -########### - -## your compiler and compiler flags -# -CC = @CC@ -CFLAGS = @CFLAGS@ - -## other defined flags. -# DEFS contains stuff that autoconf -# decides on. MDEFS contains stuff that we added to -# the configure script tests. LIBS contains system -# libraries that the configure script decides we need. -# -MDEFS = @MDEFS@ @DEFS@ -LIBS = @LIBS@ -lm - -## archiving command, and ranlib command if you need one. -# In general, you shouldn't need to change these, and they're -# only used for building the testsuite anyway... e.g. we -# make a "libhmmer.a" library for building the testsuite. -# -AR = ar rcv -RANLIB = @RANLIB@ - -# Configuration for optional pthreads multiprocessor support -# -PTHREAD_LIBS = @PTHREAD_LIBS@ -PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ - - -# Configuration for optional PVM functionality -# -PVMFLAG = @PVMFLAG@ -PVMLIBDIR = @PVMLIBDIR@ -PVMINCDIR = @PVMINCDIR@ -PVMLIBS = @PVMLIBS@ -PVMPROGS = @PVMPROGS@ - -SHELL = /bin/sh -MYLIBS = -lsquid -MYLIBDIR = -L../squid -MYINCDIR = -I../squid - -PROGS = hmmalign\ - hmmbuild\ - hmmcalibrate\ - hmmconvert\ - hmmemit\ - hmmfetch\ - hmmindex\ - hmmpfam\ - hmmsearch\ - ${PVMPROGS} - -OBJS = alphabet.o\ - core_algorithms.o\ - debug.o\ - display.o\ - emit.o\ - emulation.o\ - histogram.o\ - hmmio.o\ - mathsupport.o\ - masks.o\ - misc.o\ - modelmakers.o\ - plan7.o\ - plan9.o\ - postprob.o\ - prior.o\ - pvm.o\ - threads.o\ - tophits.o\ - trace.o - -HDRS = config.h\ - funcs.h\ - globals.h\ - postprob.h\ - structs.h - -.c.o: - $(CC) $(CFLAGS) $(MDEFS) $(PTHREAD_CFLAGS) $(PVMFLAG) $(MYINCDIR) $(PVMINCDIR) -c $< - -################################################################# -## Targets defining how to make HMMER executables. -## -all: $(PROGS) - -$(PROGS): @EXEC_DEPENDENCY@ $(OBJS) - $(CC) $(CFLAGS) $(PTHREAD_CFLAGS) $(MDEFS) $(MYLIBDIR) $(PVMLIBDIR) -o $@ $@.o $(OBJS) $(PVMLIBS) $(MYLIBS) $(PTHREAD_LIBS) $(LIBS) - - -################################################################# -## Targets used in making HMMER module for testsuite compilation. -## -module: libhmmer.a - -libhmmer.a: $(OBJS) - $(AR) libhmmer.a $(OBJS) - $(RANLIB) libhmmer.a - chmod 644 libhmmer.a - - -################################################################# -## Miscellaneous targets. -## -distclean: - make clean - -rm -f Makefile version.h - -clean: - -rm -f *.o *~ Makefile.bak core $(PROGS) TAGS gmon.out libhmmer.a - -TAGS: - etags -t *.c *.h Makefile.in - diff --git a/forester/archive/RIO/others/hmmer/src/alphabet.c b/forester/archive/RIO/others/hmmer/src/alphabet.c deleted file mode 100644 index a431207..0000000 --- a/forester/archive/RIO/others/hmmer/src/alphabet.c +++ /dev/null @@ -1,426 +0,0 @@ -/************************************************************ - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - ************************************************************/ - -/* alphabet.c - * Configuration of the global symbol alphabet information. - * RCS $Id: alphabet.c,v 1.1.1.1 2005/03/22 08:34:08 cmzmasek Exp $ - */ - -#include -#include -#include -#ifdef HMMER_THREADS -#include -#endif /* HMMER_THREADS */ - -#include "config.h" -#include "structs.h" -#include "funcs.h" -#include "squid.h" - -#ifdef MEMDEBUG -#include "dbmalloc.h" -#endif - -static void set_degenerate(char iupac, char *syms); - - -/* Function: DetermineAlphabet() - * - * Purpose: From a set of sequences (raw or aligned), make a good - * guess whether they're Nucleic, Amino, or something - * else, and set alphabet accordingly. - * - * If Alphabet_type is already set, that means our - * autodetection was overridden from the command line, - * and we just set the other globals accordingly. - */ -void -DetermineAlphabet(char **rseqs, int nseq) -{ - int idx; - int other, nucleic, amino; - int type; - - /* Autodetection of alphabet type. - */ - type = hmmNOTSETYET; - other = nucleic = amino = 0; - for (idx = 0; idx < nseq; idx++) { - switch (Seqtype(rseqs[idx])) { - case kRNA: nucleic++; break; - case kDNA: nucleic++; break; - case kAmino: amino++; break; - case kOtherSeq: other++; break; - default: Die("No such alphabet type"); - } - } - - if (nucleic == nseq) type = hmmNUCLEIC; - else if (amino == nseq) type = hmmAMINO; - else if (nucleic > amino && nucleic > other) { - Warn("Looks like nucleic acid sequence, hope that's right"); - type = hmmNUCLEIC; - } - else if (amino > nucleic && amino > other) { - Warn("Looks like amino acid sequence, hope that's right"); - type = hmmAMINO; - } - else Die("Sorry, I can't tell if that's protein or DNA"); - - /* Now set up the alphabet. - */ - SetAlphabet(type); -} - - -/* Function: SetAlphabet() - * - * Purpose: Set the alphabet globals, given an alphabet type - * of either hmmAMINO or hmmNUCLEIC. - */ -void -SetAlphabet(int type) -{ - int x; -#ifdef HMMER_THREADS - pthread_mutex_t alphabet_lock; /* alphabet is global; must protect to be threadsafe */ - int rtn; /* return code from pthreads */ - - if ((rtn = pthread_mutex_init(&alphabet_lock, NULL)) != 0) - Die("pthread_mutex_init FAILED; %s\n", strerror(rtn)); - if ((rtn = pthread_mutex_lock(&alphabet_lock)) != 0) - Die("pthread_mutex_lock FAILED: %s\n", strerror(rtn)); -#endif - - /* Because the alphabet information is global, we must - * be careful to make this a thread-safe function. The mutex - * (above) takes care of that. But, indeed, it's also - * just good sense (and more efficient) to simply never - * allow resetting the alphabet. If type is Alphabet_type, - * silently return; else die with an alphabet mismatch - * warning. - */ - if (Alphabet_type != hmmNOTSETYET) - { - if (type != Alphabet_type) - Die("An alphabet type conflict occurred.\nYou probably mixed a DNA seq file with a protein model, or vice versa."); - -#ifdef HMMER_THREADS - if ((rtn = pthread_mutex_unlock(&alphabet_lock)) != 0) - Die("pthread_mutex_unlock failure: %s\n", strerror(rtn)); -#endif - return; - } - - switch(type) { /* Alphabet is not a string - careful! */ - case hmmAMINO: - Alphabet_type = type; - strncpy(Alphabet, "ACDEFGHIKLMNPQRSTVWYBZX", 23); - Alphabet_size = 20; - Alphabet_iupac = 23; - for (x = 0; x < Alphabet_iupac; x++) { - memset(Degenerate[x], 0, Alphabet_size); - } - for (x = 0; x < Alphabet_size; x++) { - Degenerate[x][x] = 1; - DegenCount[x] = 1; - } - set_degenerate('B', "ND"); - set_degenerate('Z', "QE"); - set_degenerate('X', "ACDEFGHIKLMNPQRSTVWY"); - break; - case hmmNUCLEIC: - Alphabet_type = type; - strncpy(Alphabet, "ACGTUNRYMKSWHBVDX", 17); - Alphabet_size = 4; - Alphabet_iupac = 17; - for (x = 0; x < Alphabet_iupac; x++) { - memset(Degenerate[x], 0, Alphabet_size); - } - for (x = 0; x < Alphabet_size; x++) { - Degenerate[x][x] = 1; - DegenCount[x] = 1; - } - set_degenerate('U', "T"); - set_degenerate('N', "ACGT"); - set_degenerate('X', "ACGT"); - set_degenerate('R', "AG"); - set_degenerate('Y', "CT"); - set_degenerate('M', "AC"); - set_degenerate('K', "GT"); - set_degenerate('S', "CG"); - set_degenerate('W', "AT"); - set_degenerate('H', "ACT"); - set_degenerate('B', "CGT"); - set_degenerate('V', "ACG"); - set_degenerate('D', "AGT"); - break; - default: Die("No support for non-nucleic or protein alphabets"); - } - -#ifdef HMMER_THREADS - if ((rtn = pthread_mutex_unlock(&alphabet_lock)) != 0) - Die("pthread_mutex_unlock failure: %s\n", strerror(rtn)); -#endif -} - -/* Function: SymbolIndex() - * - * Purpose: Convert a symbol to its index in Alphabet[]. - * Bogus characters are converted to 'X'. - * More robust than the SYMIDX() macro but - * presumably slower. - */ -int -SymbolIndex(char sym) -{ - char *s; - return ((s = strchr(Alphabet, (char) toupper((int) sym))) == NULL) ? - Alphabet_iupac-1 : s - Alphabet; -} - - -/* Function: DigitizeSequence() - * - * Purpose: Internal representation of a sequence in HMMER is - * as a char array. 1..L are the indices - * of seq symbols in Alphabet[]. 0,L+1 are sentinel - * bytes, set to be Alphabet_iupac -- i.e. one more - * than the maximum allowed index. - * - * Assumes that 'X', the fully degenerate character, - * is the last character in the allowed alphabet. - * - * Args: seq - sequence to be digitized (0..L-1) - * L - length of sequence - * - * Return: digitized sequence, dsq. - * dsq is allocated here and must be free'd by caller. - */ -char * -DigitizeSequence(char *seq, int L) -{ - char *dsq; - int i; - - dsq = MallocOrDie (sizeof(char) * (L+2)); - dsq[0] = dsq[L+1] = (char) Alphabet_iupac; - for (i = 1; i <= L; i++) - dsq[i] = SymbolIndex(seq[i-1]); - return dsq; -} - - -/* Function: DedigitizeSequence() - * Date: SRE, Tue Dec 16 10:39:19 1997 [StL] - * - * Purpose: Returns a 0..L-1 character string, converting the - * dsq back to the real alphabet. - */ -char * -DedigitizeSequence(char *dsq, int L) -{ - char *seq; - int i; - - seq = MallocOrDie(sizeof(char) * (L+1)); - for (i = 0; i < L; i++) - seq[i] = Alphabet[(int) dsq[i+1]]; - seq[L] = '\0'; - return seq; -} - - -/* Function: DigitizeAlignment() - * - * Purpose: Given an alignment, return digitized unaligned - * sequence array. (Tracebacks are always relative - * to digitized unaligned seqs, even if they are - * faked from an existing alignment in modelmakers.c.) - * - * Args: msa - alignment to digitize - * ret_dsqs - RETURN: array of digitized unaligned sequences - * - * Return: (void) - * dsqs is alloced here. Free2DArray(dseqs, nseq). - */ -void -DigitizeAlignment(MSA *msa, char ***ret_dsqs) -{ - char **dsq; - int idx; /* counter for sequences */ - int dpos; /* position in digitized seq */ - int apos; /* position in aligned seq */ - - dsq = (char **) MallocOrDie (sizeof(char *) * msa->nseq); - for (idx = 0; idx < msa->nseq; idx++) { - dsq[idx] = (char *) MallocOrDie (sizeof(char) * (msa->alen+2)); - - dsq[idx][0] = (char) Alphabet_iupac; /* sentinel byte at start */ - - for (apos = 0, dpos = 1; apos < msa->alen; apos++) { - if (! isgap(msa->aseq[idx][apos])) /* skip gaps */ - dsq[idx][dpos++] = SymbolIndex(msa->aseq[idx][apos]); - } - dsq[idx][dpos] = (char) Alphabet_iupac; /* sentinel byte at end */ - } - *ret_dsqs = dsq; -} - - -/* Function: P7CountSymbol() - * - * Purpose: Given a possibly degenerate symbol code, increment - * a symbol counter array (generally an emission - * probability vector in counts form) appropriately. - * - * Args: counters: vector to count into. [0..Alphabet_size-1] - * symidx: symbol index to count: [0..Alphabet_iupac-1] - * wt: weight to use for the count; often 1.0 - * - * Return: (void) - */ -void -P7CountSymbol(float *counters, char symidx, float wt) -{ - int x; - - if (symidx < Alphabet_size) - counters[(int) symidx] += wt; - else - for (x = 0; x < Alphabet_size; x++) { - if (Degenerate[(int) symidx][x]) - counters[x] += wt / (float) DegenCount[(int) symidx]; - } -} - - -/* Function: DefaultGeneticCode() - * - * Purpose: Configure aacode, mapping triplets to amino acids. - * Triplet index: AAA = 0, AAC = 1, ... UUU = 63. - * AA index: alphabetical: A=0,C=1... Y=19 - * Stop codon: -1. - * Uses the stdcode1[] global translation table from SQUID. - * - * Args: aacode - preallocated 0.63 array for genetic code - * - * Return: (void) - */ -void -DefaultGeneticCode(int *aacode) -{ - int x; - - for (x = 0; x < 64; x++) { - if (*(stdcode1[x]) == '*') aacode[x] = -1; - else aacode[x] = SYMIDX(*(stdcode1[x])); - } -} - - -/* Function: DefaultCodonBias() - * - * Purpose: Configure a codonbias table, mapping triplets to - * probability of using the triplet for the amino acid - * it represents: P(triplet | aa). - * The default is to assume codons are used equiprobably. - * - * Args: codebias: 0..63 array of P(triplet|aa), preallocated. - * - * Return: (void) - */ -void -DefaultCodonBias(float *codebias) -{ - codebias[0] = 1./2.; /* AAA Lys 2 */ - codebias[1] = 1./2.; /* AAC Asn 2 */ - codebias[2] = 1./2.; /* AAG Lys 2 */ - codebias[3] = 1./2.; /* AAU Asn 2 */ - codebias[4] = 1./4.; /* ACA Thr 4 */ - codebias[5] = 1./4.; /* ACC Thr 4 */ - codebias[6] = 1./4.; /* ACG Thr 4 */ - codebias[7] = 1./4.; /* ACU Thr 4 */ - codebias[8] = 1./6.; /* AGA Ser 6 */ - codebias[9] = 1./6.; /* AGC Arg 6 */ - codebias[10] = 1./6.; /* AGG Ser 6 */ - codebias[11] = 1./6.; /* AGU Arg 6 */ - codebias[12] = 1./3.; /* AUA Ile 3 */ - codebias[13] = 1./3.; /* AUC Ile 3 */ - codebias[14] = 1.; /* AUG Met 1 */ - codebias[15] = 1./3.; /* AUU Ile 3 */ - codebias[16] = 1./2.; /* CAA Gln 2 */ - codebias[17] = 1./2.; /* CAC His 2 */ - codebias[18] = 1./2.; /* CAG Gln 2 */ - codebias[19] = 1./2.; /* CAU His 2 */ - codebias[20] = 1./4.; /* CCA Pro 4 */ - codebias[21] = 1./4.; /* CCC Pro 4 */ - codebias[22] = 1./4.; /* CCG Pro 4 */ - codebias[23] = 1./4.; /* CCU Pro 4 */ - codebias[24] = 1./6.; /* CGA Arg 6 */ - codebias[25] = 1./6.; /* CGC Arg 6 */ - codebias[26] = 1./6.; /* CGG Arg 6 */ - codebias[27] = 1./6.; /* CGU Arg 6 */ - codebias[28] = 1./6.; /* CUA Leu 6 */ - codebias[29] = 1./6.; /* CUC Leu 6 */ - codebias[30] = 1./6.; /* CUG Leu 6 */ - codebias[31] = 1./6.; /* CUU Leu 6 */ - codebias[32] = 1./2.; /* GAA Glu 2 */ - codebias[33] = 1./2.; /* GAC Asp 2 */ - codebias[34] = 1./2.; /* GAG Glu 2 */ - codebias[35] = 1./2.; /* GAU Asp 2 */ - codebias[36] = 1./4.; /* GCA Ala 4 */ - codebias[37] = 1./4.; /* GCC Ala 4 */ - codebias[38] = 1./4.; /* GCG Ala 4 */ - codebias[39] = 1./4.; /* GCU Ala 4 */ - codebias[40] = 1./4.; /* GGA Gly 4 */ - codebias[41] = 1./4.; /* GGC Gly 4 */ - codebias[42] = 1./4.; /* GGG Gly 4 */ - codebias[43] = 1./4.; /* GGU Gly 4 */ - codebias[44] = 1./4.; /* GUA Val 4 */ - codebias[45] = 1./4.; /* GUC Val 4 */ - codebias[46] = 1./4.; /* GUG Val 4 */ - codebias[47] = 1./4.; /* GUU Val 4 */ - codebias[48] = 0.; /* UAA och - */ - codebias[49] = 1./2.; /* UAC Tyr 2 */ - codebias[50] = 0.; /* UAG amb - */ - codebias[51] = 1./2.; /* UAU Tyr 2 */ - codebias[52] = 1./6.; /* UCA Ser 6 */ - codebias[53] = 1./6.; /* UCC Ser 6 */ - codebias[54] = 1./6.; /* UCG Ser 6 */ - codebias[55] = 1./6.; /* UCU Ser 6 */ - codebias[56] = 0.; /* UGA opa - */ - codebias[57] = 1./2.; /* UGC Cys 2 */ - codebias[58] = 1.; /* UGG Trp 1 */ - codebias[59] = 1./2.; /* UGU Cys 2 */ - codebias[60] = 1./6.; /* UUA Leu 6 */ - codebias[61] = 1./2.; /* UUC Phe 2 */ - codebias[62] = 1./6.; /* UUG Leu 6 */ - codebias[63] = 1./2.; /* UUU Phe 2 */ -} - - - -/* Function: set_degenerate() - * - * Purpose: convenience function for setting up - * Degenerate[][] global for the alphabet. - */ -static void -set_degenerate(char iupac, char *syms) -{ - DegenCount[strchr(Alphabet,iupac)-Alphabet] = strlen(syms); - while (*syms) { - Degenerate[strchr(Alphabet,iupac)-Alphabet] - [strchr(Alphabet,*syms)-Alphabet] = 1; - syms++; - } -} diff --git a/forester/archive/RIO/others/hmmer/src/camJul97.c b/forester/archive/RIO/others/hmmer/src/camJul97.c deleted file mode 100644 index e9b364f..0000000 --- a/forester/archive/RIO/others/hmmer/src/camJul97.c +++ /dev/null @@ -1,747 +0,0 @@ -/* Source code from Cambridge visit July 1997 - * - * Position-specific matrices. - */ - -#include -#include -#include -#include -#include -#include - -#include "funcs.h" -#include "config.h" -#include "structs.h" -#include "squid.h" - -#ifdef MEMDEBUG -#include "dbmalloc.h" -#endif - -/* Function: MakeStarHMM() - * - * Purpose: Given an HMM with counts, create an HMM according - * to the star rule. In star models we typically expect - * that the counts have been collected using BLOSUM style - * weights. - * - * Args: hmm - HMM structure containing counts data - * mx - Star vectors, mx[q][x] - * pq - vector prior P(q) - * nq - number of vectors - * pri - Dirichlet priors for other parameters - * - * Return: (void) - * hmm is converted to probabilities. - */ -void -MakeStarHMM(struct plan7_s *hmm, float **mx, float *pq, int nq, struct p7prior_s *pri) -{ - int k; /* counter over model position */ - int x; /* counter over symbol/transition */ - float *pxa; /* P(x | a) : our parameter estimate */ - float *pqa; /* P(q | a) for all q */ - int q; /* counters over vectors q */ - int ai; /* counter over symbols */ - - /* Match emissions: Star rule implementation. - */ - pxa = (float *) MallocOrDie(sizeof(float) * Alphabet_size); - pqa = (float *) MallocOrDie(sizeof(float) * nq); - for (k = 1; k <= hmm->M; k++) - { - /* calculate log P(q | a) unnormalized (i.e. + log P(a))*/ - for (q = 0; q < nq; q++) - { - pqa[q] = log(pq[q]); - for (ai = 0; ai < Alphabet_size; ai++) - pqa[q] += hmm->mat[k][ai] * log(mx[q][ai]); - } - /* calculate log P(x | a) unnormalized (i.e + log P(a))*/ - for (x = 0; x < Alphabet_size; x++) - { - pxa[x] = pqa[0] + log(mx[0][x]); - for (q = 1; q < nq; q++) - pxa[x] = LogSum(pxa[x], (pqa[q] + log(mx[q][x]))); - } - /* normalize now to get P(x|a) and store */ - LogNorm(pxa, Alphabet_size); - FCopy(hmm->mat[k], pxa, Alphabet_size); - } - - - /* Everything else is done according to P7PriorifyHMM() - */ - /* Model-dependent transitions are handled simply; Laplace. - */ - FSet(hmm->begin+2, hmm->M-1, 0.); /* wipe internal BM entries */ - FSet(hmm->end+1, hmm->M-1, 0.); /* wipe internal ME exits */ - hmm->tbd1 += 1.0; - hmm->begin[1] += 1.0; - - /* Main model transitions and insert emissions - */ - for (k = 1; k < hmm->M; k++) - { - P7PriorifyTransitionVector(hmm->t[k], pri); - P7PriorifyEmissionVector(hmm->ins[k], pri, pri->inum, pri->iq, pri->i, NULL); - } - - Plan7Renormalize(hmm); - free(pxa); - free(pqa); - return; -} - - - - -#ifdef SRE_REMOVED -/* Function: MakeIslandHMM() - * - * Purpose: Given a sequence alignment of i = 1..nseq sequences, - * with columns j = 1..alen; and a sequence index idx - * to build the island from. Return a Plan7 island HMM in - * probability form. - * - * Args: aseqs - alignment - * ainfo - alignment info - * idx - index of which sequence to build island from - * null - random sequence model [0..Alphabet_size-1] - * mx - probability matrices mx[q][root b][x] - * bpri - priors on root distributions bpri[q][root b] - * qpri - prior probability distribution over matrices - * nmx - number of joint probability matrices - * - * Return: a new Plan7 HMM - */ -struct plan7_s * -MakeIslandHMM(char **aseqs, AINFO *ainfo, int idx, - float null[MAXABET], float ***mx, float **bpri, - float *qpri, int nmx) -{ - struct plan7_s *hmm; /* RETURN: Plan7 HMM */ - int j; /* column position index */ - int k; /* model position index */ - int q; /* counter for matrices */ - int x; /* counter for symbols */ - float *mat; /* a match emission probability vector */ - float **probq; /* posterior P(q | column) */ - int sym; /* index of a symbol in alphabet */ - float max; - int qmax; - float **pxaq; /* P(x | a,q) vectors, [q][x] */ - int b; /* counter over root symbols */ - - /* Allocate a model which is the length of the - * raw sequence. - */ - hmm = AllocPlan7(DealignedLength(aseqs[idx])); - if (ainfo->sqinfo[idx].flags & SQINFO_NAME) - Plan7SetName(hmm, ainfo->sqinfo[idx].name); - if (ainfo->sqinfo[idx].flags & SQINFO_DESC) - Plan7SetDescription(hmm, ainfo->sqinfo[idx].desc); - Plan7SetNullModel(hmm, null, 350./351.); /* p1 made up; shouldn't matter*/ - - mat = (float *) MallocOrDie( sizeof(float) * Alphabet_size); - pxaq = FMX2Alloc(nmx, Alphabet_size); - - /* Calculate the posterior probability distribution - * probq (= P(q | col)) over nmx different matrices - * at each column j -- probq[0..alen-1][0..nmx-1]; - * currently does not use the prior on q, but does a - * winner-take-all rule. - */ - probq = FMX2Alloc(ainfo->alen, nmx); - calc_probq(aseqs, ainfo, mx, bpri, qpri, nmx, probq); - - /* Debugging - */ - print_probq(stdout, probq, ainfo->alen, nmx); - - for (k = 1, j = 0; j < ainfo->alen; j++) - { - if (isgap(aseqs[idx][j])) continue; - - if (strchr(Alphabet, aseqs[idx][j]) != NULL) - sym = SYMIDX(aseqs[idx][j]); - else - Die("MakeIslandHMM() can't handle ambiguous query symbols yet"); - - - /* Calculate P(x | a, q) emission vectors for all matrices q - */ - for (q = 0; q < nmx; q++) - { - for (x = 0; x < Alphabet_size; x++) - { - pxaq[q][x] = 0.0; - for (b = 0; b < 20; b++) - pxaq[q][x] += mx[q][b][x] * mx[q][b][sym] * bpri[q][b]; - } - FNorm(pxaq[q], Alphabet_size); - } - - /* Sum P(x | a, q) emission vectors over matrices q: - * P(x | a, col) = \sum_q P(x | a, q, col) P(q | a, col) - * = \sum_q P(x | a, q) P(q | col) - */ - for (x = 0; x < Alphabet_size; x++) - { - hmm->mat[k][x] = 0.; - for (q = 0; q < nmx; q++) - hmm->mat[k][x] += probq[j][q] * pxaq[q][x]; - if (k < hmm->M) - hmm->ins[k][x] = null[x]; - } - - /* Reference annotation on columns: most probable matrix - */ - max = -FLT_MAX; - for (q = 0; q < nmx; q++) - if (probq[j][q] > max) { qmax = q; max = probq[j][q]; } - hmm->rf[k] = 'a'+(char)qmax; /* q > 9, so convert to char a-z*/ - - /* Consensus annotation on columns: original sequence. - */ - hmm->cs[k] = aseqs[idx][j]; - - k++; - } - - /* State transitions are set subjectively - */ - hmm->tbd1 = 0.02; - for (k = 1; k < hmm->M; k++) - { - hmm->t[k][TMM] = 0.97; - hmm->t[k][TMI] = 0.02; - hmm->t[k][TMD] = 0.01; - hmm->t[k][TIM] = 0.20; - hmm->t[k][TII] = 0.80; - hmm->t[k][TDM] = 0.90; - hmm->t[k][TDD] = 0.10; - } - - hmm->flags |= PLAN7_HASPROB | PLAN7_RF | PLAN7_CS; - - FMX2Free(pxaq); - FMX2Free(probq); - free(mat); - return hmm; -} -#endif - - -/* Function: ReadGJMMatrices() - * - * Purpose: Read GJM's file format for star-based mixture matrices. - * Very first line is nq. - * First line of a set is P(q), the prior of the matrix. - * Second line contains P(b|q), the prior of the root symbols, - * _in arbitrary order_ (the root distribution is not over AA's!) - * Third line is blank. - * Next 20 lines give a 20x20 matrix of conditional probabilities; - * rows = root symbols b; cols = leaf symbols x; - * mx[row][col] = P(x | b). - * - * Instead of storing as matrices, store as q x r vectors. - * - * Return: (void) - * mx, pq, nq are returned via passed pointers. - * Caller must free FMX2Free(mx) - * Caller must free(pq). - */ -void -ReadGJMMatrices(FILE *fp, float ***ret_mx, float **ret_pq, int *ret_nq) -{ - float **mx; /* conditional p's [0..nq-1][0..19] */ - float *pq; /* priors on vectors, [0..nq-1] */ - int nq, nr; /* number of matrices, rows */ - char buf[2048]; - float tmppq; /* prior for matrix */ - int q,r; /* counter for matrices, rows */ - int x; /* counter for symbols */ - char *s; /* tmp pointer into buf */ - - - /* allocations */ - if (fgets(buf, 2048, fp) == NULL) Die("read failed"); - nr = 20; - nq = atoi(buf); - mx = FMX2Alloc(nq*nr, 20); - pq = (float *) MallocOrDie (nq*nr * sizeof(float)); - - /* parse matrices */ - for (q = 0; q < nq; q++) - { - if (fgets(buf, 2048, fp) == NULL) Die("parse failed"); - tmppq = atof(buf); - - if (fgets(buf, 2048, fp) == NULL) Die("parse failed"); - s = strtok(buf, "\n\t "); - for (r = 0; r < nr; r++) - { - pq[q*nr + r] = atof(s) * tmppq; - s = strtok(NULL, "\n\t "); - } - if (fgets(buf, 2048, fp) == NULL) Die("parse failed"); - - for (r = 0; r < 20; r++) - { - if (fgets(buf, 2048, fp) == NULL) Die("parse failed"); - s = strtok(buf, "\n\t "); - for (x = 0; x < 20; x++) - { - mx[q*nr+r][x] = atof(s); - s = strtok(NULL, "\n\t "); - } - } - /* two blank lines */ - if (fgets(buf, 2048, fp) == NULL) Die("parse failed"); - if (fgets(buf, 2048, fp) == NULL) Die("parse failed"); - } - - *ret_mx = mx; - *ret_pq = pq; - *ret_nq = nq*nr; - return; -} - - -#ifdef SRE_REMOVED -/* Function: OldReadGJMMatrices() - * - * Purpose: Read GJM's file format for joint probability matrix sets. - * - * Return: (void) - * mx, qprior, nmx are returned via passed pointers. - * Caller must free mx: each matrix by FMX2Free(), then free(mx). - * Caller must also free(qprior). - */ -void -OldReadGJMMatrices(FILE *fp, float ****ret_mx, float **ret_qprior, int *ret_nmx) -{ - float ***mx; /* joint prob matrix [0..nmx-1][0..19][0..19] */ - float *qprior; /* priors on matrices, [0..nmx-1] */ - int nmx; /* number of matrices */ - char buf[2048]; - int q; /* counter for matrices */ - int idx; /* index for this matrix seen in file */ - int r,c; /* counter for row, column */ - char *s; /* tmp pointer into buf */ - - /* pass one: count matrices */ - nmx = 0; - while (fgets(buf, 2048, fp) != NULL) - if (Strparse("use [0-9]+ = .+", buf, 0) == 0) - nmx++; - rewind(fp); - /* allocations */ - qprior = (float *) MallocOrDie (20 * sizeof(float)); - mx = (float ***) MallocOrDie (nmx * sizeof(float **)); - for (q = 0; q < nmx; q++) - mx[q] = FMX2Alloc(20, 20); - - /* pass two: parse matrices */ - q = 0; - while (fgets(buf, 2048, fp) != NULL) - { - if (Strparse("use ([0-9]+) = (.+)", buf, 2) != 0) - continue; - idx = atoi(sqd_parse[1]); - qprior[q] = atof(sqd_parse[2]); - - /* skip two lines in his new format */ - if (fgets(buf, 2048, fp) == NULL) Die("ReadGJMMatrices(): parse failed"); - if (fgets(buf, 2048, fp) == NULL) Die("ReadGJMMatrices(): parse failed"); - - for (r = 0; r < 20; r++) - { - if (fgets(buf, 2048, fp) == NULL) - Die("ReadGJMMatrices(): parse failed"); - s = strtok(buf, "\n\t "); - for (c = 0; c < 20; c++) - { - mx[q][r][c] = atof(s); - s = strtok(NULL, "\n\t "); - } - } - q++; - } - - *ret_mx = mx; - *ret_qprior = qprior; - *ret_nmx = nmx; - return; -} - -/* Function: OldPrintGJMMatrix() - * - * Purpose: (debugging, basically): print out Graeme's - * joint probability matrices in log odds integer form. - * - */ -void -OldPrintGJMMatrix(FILE *fp, float **jmx, float *rnd, int N) -{ - int r, c; - - fprintf(fp, " "); - for (c = 0; c < N; c++) - fprintf(fp, " %c ", Alphabet[c]); - fprintf(fp, "\n"); - - for (r = 0; r < N; r++) - { - fprintf(fp, "%c ", Alphabet[r]); - for (c = 0; c < N; c++) - fprintf(fp, "%3d ", - (int) (10. * sreLOG2(jmx[r][c] / (rnd[r] * rnd[c])))); - fprintf(fp, "\n"); - } -} -#endif /* SRE_REMOVED*/ - -/* Function: Joint2SubstitutionMatrix() - * - * Purpose: Convert a joint probability matrix to a substitution - * matrix. - * - * Convention here for substitution matrices is - * smx[r][c] = r->c = P(c|r). - * - * We obtain the substitution matrix from the following logic: - * P(rc) = P(c|r) P(r); - * P(r) = \sum_c P(rc); - * thus P(c|r) = P(rc) / \sum_c P(rc) - * - * Args: jmx - NxN P(rc) joint probability matrix - * smx - NxN P(c|r) substitution matrix, alloced in caller - * N - size of matrices; typically Alphabet_size - * - * Return: (void) - * smx is filled in. - */ -void -Joint2SubstitutionMatrix(float **jmx, float **smx, int N) -{ - float pr; /* P(r) = \sum_c P(rc) */ - int r,c; /* counters for rows, columns */ - - for (r = 0; r < N; r++) - { - for (pr = 0., c = 0; c < N; c++) - pr += jmx[r][c]; - for (c = 0; c < N; c++) - smx[r][c] = jmx[r][c] / pr; - } -} - - -#ifdef SRE_REMOVED -/* Function: BlosumWeights() - * - * Purpose: Assign weights to a set of aligned sequences - * using the BLOSUM rule: - * - do single linkage clustering at some pairwise identity - * - in each cluster, give each sequence 1/clustsize - * total weight. - * - * Args: aseqs - alignment - * N - number of seqs in alignment - * maxid - fractional identity (e.g. 0.62 for BLOSUM62) - * clust - [0..nseq-1] vector of cluster assignments, filled here (or NULL) - * ret_nc - total number of clusters found (or pass NULL) - */ -void -BlosumWeights(char **aseqs, AINFO *ainfo, float maxid, int *clust,int *ret_nc) -{ - float **dmx; /* difference matrix */ - struct phylo_s *tree; /* UPGMA tree */ - float mindiff; /* minimum distance between clusters */ - int c; /* counter for clusters */ - struct intstack_s *stack; - int node; - int i; - - mindiff = 1.0 - maxid; - /* first we do a difference matrix */ - MakeDiffMx(aseqs, ainfo->nseq, &dmx); - /* then we build a tree */ - Cluster(dmx, ainfo->nseq, CLUSTER_MIN, &tree); - - /* Find clusters below mindiff. - * The rule is: - * -traverse the tree - * -if the parent is > mindiff and current < mindiff, then - * make current node a cluster. - */ - for (i = 0; i < ainfo->nseq; i++) - { - ainfo->sqinfo[i].weight = 1.0; - ainfo->sqinfo[i].flags |= SQINFO_WGT; - } - - stack = InitIntStack(); - PushIntStack(stack, 0); /* push root on stack to start */ - c = 0; - while (PopIntStack(stack, &node)) - { - if ((node == 0 || tree[tree[node].parent-ainfo->nseq].diff > mindiff) && - tree[node].diff < mindiff) - { /* we're at a cluster */ - for (i = 0; i < ainfo->nseq; i++) - if (tree[node].is_in[i]) - { - ainfo->sqinfo[i].weight = 1.0 / (float) tree[node].incnum; - if (clust != NULL) clust[i] = c; - } - c++; - } - else /* we're not a cluster, keep traversing */ - { - if (tree[node].right >= ainfo->nseq) - PushIntStack(stack, tree[node].right - ainfo->nseq); - else - { - c++; - if (clust != NULL) clust[tree[node].right] = c; /* single seq, wgt 1.0 */ - } - - if (tree[node].left >= ainfo->nseq) - PushIntStack(stack, tree[node].left - ainfo->nseq); - else - { - c++; - if (clust != NULL) clust[tree[node].left] = c; - } - } - } - FreeIntStack(stack); - FreePhylo(tree, ainfo->nseq); - FMX2Free(dmx); - if (ret_nc != NULL) *ret_nc = c; - return; -} -#endif - - -#ifdef SRE_REMOVED -/* Function: calc_probq() - * - * Purpose: Calculate the posterior probability distribution - * P(q | a_j) for every column j in the alignment - * and every matrix choice q. - * - * Probabilistic, based on a star topology. - * Uses a BLOSUM-like rule to cluster the sequences in - * the alignment into groups with some seq identity (62%). - * Finds the consensus (majority rule) residue in - * each cluster as the representative. - * Then P(q | col) comes by Bayes: - * = (P(col | q) P(q) / Z - * where the likelihood - * P(col | q) = \sum_b [\prod_i P(a_i | q,b)] P(b | q) - * log P(col | q) = \logsum_b P(b|q) + \sum_i \log(P(a_i | q,b)) - * - * Args: aseqs - alignment - * ainfo - optional info for alignment - * mx - conditional probability matrices [0..nmx-1][root b][x] - * bprior- root priors [0..nmx-1][root b] - * qprior- prior prob distribution over matrices - * nmx - number of matrices - * probq - RETURN: posterior probabilities, [0..alen-1][0..nmx-1] - * alloc'ed in called, filled in here. - * - * Return: (void) - * probq is filled in. - */ -static void -calc_probq(char **aseqs, AINFO *ainfo, float ***mx, float **bprior, - float *qprior, int nmx, float **probq) -{ - int q; /* counter over matrices */ - int a1; /* counter over sequences */ - int j; /* counter over columns */ - int *clust; /* assignment of seqs to clusters 0..nseq-1 */ - int nclust; /* number of clusters */ - float *wgt; /* weights on seqs, 0..nseq-1 */ - int *sym; /* symbol indices in a column */ - float obs[MAXABET]; /* number of symbols observed in a column */ - int i, x; - float maxc; - float ngap; - float bterm[20]; /* intermediate in calculation, over root b's */ - int b; /* counter over root symbols */ - - /* Use the BLOSUM rule to calculate weights and clusters - * for sequences in the alignment - */ - wgt = (float *) MallocOrDie (sizeof(float) * ainfo->nseq); - clust = (int *) MallocOrDie (sizeof(int) * ainfo->nseq); - BlosumWeights(aseqs, ainfo, 0.62, clust, wgt, &nclust); - - /* Use the BLOSUM rule to calculate a "likelihood" function - * P(column | q) for each column. - */ - sym = (int *) MallocOrDie (sizeof(int) * nclust); - for (j = 0; j < ainfo->alen; j++) - { - /* Find majority rule symbols in this col */ - for (i = 0; i < nclust; i++) - { - FSet(obs, Alphabet_size, 0.); - ngap = 0.; - for (a1 = 0; a1 < ainfo->nseq; a1++) - if (clust[a1] == i) - if (isgap(aseqs[a1][j])) ngap += 0.; - else P7CountSymbol(obs, SymbolIndex(aseqs[a1][j]), 1.0); - - maxc = -1.; - for (x = 0; x < Alphabet_size; x++) - if (obs[x] > maxc) { maxc = obs[x]; sym[i] = x; } - /* either if no symbols observed, or more gaps than syms: */ - if (ngap >= maxc) sym[i] = -1; - } - /* Calculate log likelihood + log prior */ - for (q = 0; q < nmx; q++) - { - for (b = 0; b < 20; b++) - { - bterm[b] = bprior[q][b]; - for (i = 0; i < nclust; i++) - if (sym[i] >= 0) - bterm[b] += log(mx[q][b][sym[i]]); - } - probq[j][q] = log(qprior[q]) + FLogSum(bterm, 20); - } - LogNorm(probq[j], nmx); /* normalize -> gives posterior. */ - } - free(sym); - free(wgt); - free(clust); -} - - -/* Function: old_calc_probq() OBSOLETE VERSION - * - * Purpose: Calculate the posterior probability distribution - * P(q | a_j) for every column j in the alignment - * and every matrix choice q. - * - * Non-probabilistic. Uses a BLOSUM-like rule to - * find the single best matrix for a column, then - * assigns it a posterior of 1.0. - * - * This was version 1: a competitive learning rule, - * posterior either 1.0 or 0.0. - * - * Args: aseqs - alignment - * ainfo - optional info for alignment - * jmx - *joint* probability matrices [0..nmx-1][0..19][0..19] - * qprior- prior prob distribution over matrices [UNUSED] - * nmx - number of matrices - * probq - RETURN: posterior probabilities, [0..alen-1][0..nmx-1] - * alloc'ed in called, filled in here. - * - * Return: (void) - * probq is filled in. - */ -static void -old_calc_probq(char **aseqs, AINFO *ainfo, float ***jmx, float *qprior, - int nmx, float **probq) -{ - int q; /* counter over matrices */ - int a1, a2; /* counters over sequences */ - int j; /* counter over columns */ - float x; /* BLOSUM-style objective function */ - float maxx; /* maximum x so far */ - int maxq; /* maximum q so far */ - int *clust; /* assignment of seqs to clusters 0..nseq-1 */ - int nclust; /* number of clusters */ - float *wgt; /* weights on seqs, 0..nseq-1 */ - int *sym; /* symbol indices in a column */ - - - /* Use the BLOSUM rule to calculate weights and clusters - * for sequences in the alignment - */ - wgt = (float *) MallocOrDie (sizeof(float) * ainfo->nseq); - clust = (int *) MallocOrDie (sizeof(int) * ainfo->nseq); - BlosumWeights(aseqs, ainfo, 0.62, clust, wgt, &nclust); - - /* Use the BLOSUM rule to calculate a "likelihood" function - * P(column | q) for each column. - */ - sym = (int *) MallocOrDie (sizeof(int) * ainfo->nseq); - for (j = 0; j < ainfo->alen; j++) - { - for (a1 = 0; a1 < ainfo->nseq; a1++) - if (!isgap(aseqs[a1][j]) && - strchr(Alphabet, aseqs[a1][j]) != NULL) - { - sym[a1] = SYMIDX(aseqs[a1][j]); - if (sym[a1] >= Alphabet_size) sym[a1] = -1; /* no degenerates */ - } - else sym[a1] = -1; - - maxx = -FLT_MAX; - for (q = 0; q < nmx; q++) - { - x = 0.; - for (a1 = 0; a1 < ainfo->nseq; a1++) - for (a2 = 0; a2 < ainfo->nseq; a2++) - if (sym[a1] >= 0 && sym[a2] >= 0 && clust[a1] != clust[a2]) - x += wgt[a1] * wgt[a2] * log(jmx[q][sym[a1]][sym[a2]]); - -#ifdef SRE_REMOVED - printf("%% col %3d mx %c x = %f\n", - j+1, 'a'+(char)q, x); -#endif - - if (x > maxx) - { - maxx = x; - maxq = q; - } - } - FSet(probq[j], nmx, 0.0); - probq[j][maxq] = 1.0; /* winner-take-all rule */ - } - - free(sym); - free(wgt); - free(clust); -} - - -/* Function: print_probq() - * - * Purpose: Debugging output. - * probq is the posterior probability P(q | column) of - * a matrix q given an observed alignment column. - * Indexed probq[0..alen-1][0..nmx-1]. - */ -static void -print_probq(FILE *fp, float **probq, int alen, int nmx) -{ - int c; /* counter for columns */ - int q; /* counter for matrices */ - - fputs("### probq debugging output\n", fp); - fputs(" ", fp); - for (q = 0; q < nmx; q++) - fprintf(fp, " %c ", 'a'+(char)q); - fputs("\n", fp); - - for (c = 0; c < alen; c++) - { - fprintf(fp, "%4d ", c); - for (q = 0; q < nmx; q++) - fprintf(fp, "%5.3f ", probq[c][q]); - fputs("\n", fp); - } -} -#endif diff --git a/forester/archive/RIO/others/hmmer/src/config.h b/forester/archive/RIO/others/hmmer/src/config.h deleted file mode 100644 index fb89df2..0000000 --- a/forester/archive/RIO/others/hmmer/src/config.h +++ /dev/null @@ -1,52 +0,0 @@ -/************************************************************ - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - ************************************************************/ - -/* config.h - * - * Configurable compile-time parameters in HMMER. - */ - -#ifndef CONFIGH_INCLUDED -#define CONFIGH_INCLUDED - -/* RAMLIMIT determines the point at which we switch from fast, - * full dynamic programming to slow, linear-memory divide and conquer - * dynamic programming algorithms. It is the minimum amount of available - * RAM on the systems the package will run on. It can be overridden - * from the Makefile. - * By default, we assume we have 32 Mb RAM available (per thread). - */ -#ifndef RAMLIMIT -#define RAMLIMIT 32 -#endif - -/* HMMER_NCPU determines the number of threads/processors that - * a threads version will parallelize across. This can be overridden - * by -DHMMER_NCPU=x in the Makefile, and by a setenv HMMER_NCPU x - * in the environment, and usually by a command line option. - * Usually we detect the number of processors dynamically, but - * on some systems (FreeBSD and Linux, notably), we can't. On - * these systems we assume 2 processors by default. That assumption - * can be overridden here if HMMER_NCPU is uncommented. - */ -/* #define HMMER_NCPU 4 */ - -#define INTSCALE 1000.0 /* scaling constant for floats to integer scores */ -#define MAXABET 20 /* maximum size of alphabet (4 or 20) */ -#define MAXCODE 23 /* maximum degenerate alphabet size (17 or 23) */ -#define MAXDCHLET 200 /* maximum # Dirichlet components in mixture prior */ -#define NINPUTS 4 /* number of inputs into structural prior */ -#define INFTY 987654321 /* infinity for purposes of integer DP cells */ -#define NXRAY 4 /* number of structural inputs */ -#define LOGSUM_TBL 20000 /* controls precision of ILogsum() */ -#define ALILENGTH 50 /* length of displayed alignment lines */ - -#endif /*CONFIGH_INCLUDED*/ - diff --git a/forester/archive/RIO/others/hmmer/src/core_algorithms.c b/forester/archive/RIO/others/hmmer/src/core_algorithms.c deleted file mode 100644 index b4fc349..0000000 --- a/forester/archive/RIO/others/hmmer/src/core_algorithms.c +++ /dev/null @@ -1,2445 +0,0 @@ -/************************************************************ - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - ************************************************************/ - -/* core_algorithms.c - * SRE, Mon Nov 11 15:58:52 1996 - * CVS $Id: core_algorithms.c,v 1.1.1.1 2005/03/22 08:34:11 cmzmasek Exp $ - * - * Simple and robust "research" implementations of Forward, Backward, - * and Viterbi for Plan7. - */ - -#include "structs.h" -#include "config.h" -#include "funcs.h" -#include "squid.h" - -#include -#include - -static float get_wee_midpt(struct plan7_s *hmm, char *dsq, int L, - int k1, char t1, int s1, - int k3, char t3, int s3, - int *ret_k2, char *ret_t2, int *ret_s2); - - -/* Function: AllocPlan7Matrix() - * - * Purpose: Allocate a dynamic programming matrix for standard Forward, - * Backward, or Viterbi, with scores kept as scaled log-odds - * integers. Keeps 2D arrays compact in RAM in an attempt - * to maximize cache hits. Sets up individual ptrs to the - * four matrix components as a convenience. - * - * Args: rows - number of rows to allocate; typically L+1 - * M - size of model - * xmx, mmx, imx, dmx - * - RETURN: ptrs to four mx components as a convenience - * - * Return: mx - * mx is allocated here. Caller frees with FreeDPMatrix(mx). - */ - -struct dpmatrix_s * -AllocPlan7Matrix(int rows, int M, int ***xmx, int ***mmx, int ***imx, int ***dmx) -{ - struct dpmatrix_s *mx; - int i; - - mx = (struct dpmatrix_s *) MallocOrDie (sizeof(struct dpmatrix_s)); - mx->xmx = (int **) MallocOrDie (sizeof(int *) * rows); - mx->mmx = (int **) MallocOrDie (sizeof(int *) * rows); - mx->imx = (int **) MallocOrDie (sizeof(int *) * rows); - mx->dmx = (int **) MallocOrDie (sizeof(int *) * rows); - mx->xmx[0] = (int *) MallocOrDie (sizeof(int) * (rows*5)); - mx->mmx[0] = (int *) MallocOrDie (sizeof(int) * (rows*(M+2))); - mx->imx[0] = (int *) MallocOrDie (sizeof(int) * (rows*(M+2))); - mx->dmx[0] = (int *) MallocOrDie (sizeof(int) * (rows*(M+2))); - for (i = 1; i < rows; i++) - { - mx->xmx[i] = mx->xmx[0] + (i*5); - mx->mmx[i] = mx->mmx[0] + (i*(M+2)); - mx->imx[i] = mx->imx[0] + (i*(M+2)); - mx->dmx[i] = mx->dmx[0] + (i*(M+2)); - } - - if (xmx != NULL) *xmx = mx->xmx; - if (mmx != NULL) *mmx = mx->mmx; - if (imx != NULL) *imx = mx->imx; - if (dmx != NULL) *dmx = mx->dmx; - return mx; -} - -/* Function: FreePlan7Matrix() - * - * Purpose: Free a dynamic programming matrix allocated by AllocPlan7Matrix(). - * - * Return: (void) - */ -void -FreePlan7Matrix(struct dpmatrix_s *mx) -{ - free (mx->xmx[0]); - free (mx->mmx[0]); - free (mx->imx[0]); - free (mx->dmx[0]); - free (mx->xmx); - free (mx->mmx); - free (mx->imx); - free (mx->dmx); - free (mx); -} - -/* Function: AllocShadowMatrix() - * - * Purpose: Allocate a dynamic programming traceback pointer matrix for - * a Viterbi algorithm. - * - * Args: rows - number of rows to allocate; typically L+1 - * M - size of model - * xtb, mtb, itb, dtb - * - RETURN: ptrs to four mx components as a convenience - * - * Return: mx - * mx is allocated here. Caller frees with FreeDPMatrix(mx). - */ - -struct dpshadow_s * -AllocShadowMatrix(int rows, int M, char ***xtb, char ***mtb, char ***itb, char ***dtb) -{ - struct dpshadow_s *tb; - int i; - - tb = (struct dpshadow_s *) MallocOrDie (sizeof(struct dpshadow_s)); - tb->xtb = (char **) MallocOrDie (sizeof(char *) * rows); - tb->mtb = (char **) MallocOrDie (sizeof(char *) * rows); - tb->itb = (char **) MallocOrDie (sizeof(char *) * rows); - tb->dtb = (char **) MallocOrDie (sizeof(char *) * rows); - tb->esrc = (int *) MallocOrDie (sizeof(int) * rows); - tb->xtb[0] = (char *) MallocOrDie (sizeof(char) * (rows*5)); - tb->mtb[0] = (char *) MallocOrDie (sizeof(char) * (rows*(M+2))); - tb->itb[0] = (char *) MallocOrDie (sizeof(char) * (rows*(M+2))); - tb->dtb[0] = (char *) MallocOrDie (sizeof(char) * (rows*(M+2))); - for (i = 1; i < rows; i++) - { - tb->xtb[i] = tb->xtb[0] + (i*5); - tb->mtb[i] = tb->mtb[0] + (i*(M+2)); - tb->itb[i] = tb->itb[0] + (i*(M+2)); - tb->dtb[i] = tb->dtb[0] + (i*(M+2)); - } - - if (xtb != NULL) *xtb = tb->xtb; - if (mtb != NULL) *mtb = tb->mtb; - if (itb != NULL) *itb = tb->itb; - if (dtb != NULL) *dtb = tb->dtb; - return tb; -} - -/* Function: FreeShadowMatrix() - * - * Purpose: Free a dynamic programming matrix allocated by AllocShadowMatrix(). - * - * Return: (void) - */ -void -FreeShadowMatrix(struct dpshadow_s *tb) -{ - free (tb->xtb[0]); - free (tb->mtb[0]); - free (tb->itb[0]); - free (tb->dtb[0]); - free (tb->esrc); - free (tb->xtb); - free (tb->mtb); - free (tb->itb); - free (tb->dtb); - free (tb); -} - -/* Function: P7ViterbiSize() - * Date: SRE, Fri Mar 6 15:13:20 1998 [St. Louis] - * - * Purpose: Returns the ballpark predicted memory requirement for a - * P7Viterbi() alignment, in MB. - * - * Currently L must fit in an int (< 2 GB), but we have - * to deal with LM > 2 GB - e.g. watch out for overflow, do - * the whole calculation in floating point. Bug here detected - * in 2.1.1 by David Harper, Sanger Centre. - * - * Args: L - length of sequence - * M - length of HMM - * - * Returns: # of MB - */ -int -P7ViterbiSize(int L, int M) -{ - float Mbytes; - - /* We're excessively precise here, but it doesn't cost - * us anything to be pedantic. The four terms are: - * 1. the matrix structure itself; - * 2. the O(NM) main matrix (this dominates!) - * 3. ptrs into the rows of the matrix - * 4. storage for 5 special states. (xmx) - */ - Mbytes = (float) sizeof(struct dpmatrix_s); - Mbytes += 3. * (float) (L+1) * (float) (M+2) * (float) sizeof(int); - Mbytes += 4. * (float) (L+1) * (float) sizeof(int *); - Mbytes += 5. * (float) (L+1) * (float) sizeof(int); - Mbytes /= 1048576.; - return (int) Mbytes; -} - -/* Function: P7SmallViterbiSize() - * Date: SRE, Fri Mar 6 15:20:04 1998 [St. Louis] - * - * Purpose: Returns the ballpark predicted memory requirement for - * a P7SmallViterbi() alignment, in MB. - * - * P7SmallViterbi() is a wrapper, calling both P7ParsingViterbi() - * and P7WeeViterbi(). P7ParsingViterbi() typically dominates - * the memory requirement, so the value returned - * is the P7ParsingViterbi() number. - * - * We don't (yet) worry about overflow issues like we did with - * P7ViterbiSize(). We'll have many other 32-bit int issues in the - * code if we overflow here. - * - * Args: L - length of sequence - * M - length of HMM - * - * Returns: # of MB - */ -int -P7SmallViterbiSize(int L, int M) -{ - return ((2 * sizeof(struct dpmatrix_s) + - 12 * (M+2) * sizeof(int) + /* 2 matrices w/ 2 rows */ - 16 * sizeof(int *) + /* ptrs into rows of matrix */ - 20 * sizeof(int) + /* 5 special states */ - 2 * (L+1) * sizeof(int)) /* traceback indices */ - / 1000000); -} - - -/* Function: P7WeeViterbiSize() - * Date: SRE, Fri Mar 6 15:40:42 1998 [St. Louis] - * - * Purpose: Returns the ballpark predicted memory requirement for - * a P7WeeViterbi() alignment, in MB. - * - * Args: L - length of sequence - * M - length of HMM - * - * Returns: # of MB - */ -int -P7WeeViterbiSize(int L, int M) -{ - return ((2 * sizeof(struct dpmatrix_s) + - 12 * (M+2) * sizeof(int) + /* 2 matrices w/ 2 rows */ - 16 * sizeof(int *) + /* ptrs into rows of matrix */ - 20 * sizeof(int) + /* 5 special states */ - 2 * (L+2) * sizeof(int) + /* stacks for starts/ends (overkill) */ - (L+2) * sizeof(int) + /* k assignments to seq positions */ - (L+2) * sizeof(char)) /* state assignments to seq pos */ - / 1000000); -} - - -/* Function: P7Forward() - * - * Purpose: The Forward dynamic programming algorithm. - * The scaling issue is dealt with by working in log space - * and calling ILogsum(); this is a slow but robust approach. - * - * Args: dsq - sequence in digitized form - * L - length of dsq - * hmm - the model - * ret_mx - RETURN: dp matrix; pass NULL if it's not wanted - * - * Return: log P(S|M)/P(S|R), as a bit score. - */ -float -P7Forward(char *dsq, int L, struct plan7_s *hmm, struct dpmatrix_s **ret_mx) -{ - struct dpmatrix_s *mx; - int **xmx; - int **mmx; - int **imx; - int **dmx; - int i,k; - int sc; - - /* Allocate a DP matrix with 0..L rows, 0..M-1 columns. - */ - mx = AllocPlan7Matrix(L+1, hmm->M, &xmx, &mmx, &imx, &dmx); - - /* Initialization of the zero row. - * Note that xmx[i][stN] = 0 by definition for all i, - * and xmx[i][stT] = xmx[i][stC], so neither stN nor stT need - * to be calculated in DP matrices. - */ - xmx[0][XMN] = 0; /* S->N, p=1 */ - xmx[0][XMB] = hmm->xsc[XTN][MOVE]; /* S->N->B, no N-tail */ - xmx[0][XME] = xmx[0][XMC] = xmx[0][XMJ] = -INFTY; /* need seq to get here */ - for (k = 0; k <= hmm->M; k++) - mmx[0][k] = imx[0][k] = dmx[0][k] = -INFTY; /* need seq to get here */ - - /* Recursion. Done as a pull. - * Note some slightly wasteful boundary conditions: - * tsc[0] = -INFTY for all eight transitions (no node 0) - * D_M and I_M are wastefully calculated (they don't exist) - */ - for (i = 1; i <= L; i++) - { - mmx[i][0] = imx[i][0] = dmx[i][0] = -INFTY; - for (k = 1; k < hmm->M; k++) - { - mmx[i][k] = ILogsum(ILogsum(mmx[i-1][k-1] + hmm->tsc[k-1][TMM], - imx[i-1][k-1] + hmm->tsc[k-1][TIM]), - ILogsum(xmx[i-1][XMB] + hmm->bsc[k], - dmx[i-1][k-1] + hmm->tsc[k-1][TDM])); - mmx[i][k] += hmm->msc[(int) dsq[i]][k]; - - dmx[i][k] = ILogsum(mmx[i][k-1] + hmm->tsc[k-1][TMD], - dmx[i][k-1] + hmm->tsc[k-1][TDD]); - imx[i][k] = ILogsum(mmx[i-1][k] + hmm->tsc[k][TMI], - imx[i-1][k] + hmm->tsc[k][TII]); - imx[i][k] += hmm->isc[(int) dsq[i]][k]; - } - mmx[i][hmm->M] = ILogsum(ILogsum(mmx[i-1][hmm->M-1] + hmm->tsc[hmm->M-1][TMM], - imx[i-1][hmm->M-1] + hmm->tsc[hmm->M-1][TIM]), - ILogsum(xmx[i-1][XMB] + hmm->bsc[hmm->M-1], - dmx[i-1][hmm->M-1] + hmm->tsc[hmm->M-1][TDM])); - mmx[i][hmm->M] += hmm->msc[(int) dsq[i]][hmm->M]; - - /* Now the special states. - * remember, C and J emissions are zero score by definition - */ - xmx[i][XMN] = xmx[i-1][XMN] + hmm->xsc[XTN][LOOP]; - - xmx[i][XME] = -INFTY; - for (k = 1; k <= hmm->M; k++) - xmx[i][XME] = ILogsum(xmx[i][XME], mmx[i][k] + hmm->esc[k]); - - xmx[i][XMJ] = ILogsum(xmx[i-1][XMJ] + hmm->xsc[XTJ][LOOP], - xmx[i][XME] + hmm->xsc[XTE][LOOP]); - - xmx[i][XMB] = ILogsum(xmx[i][XMN] + hmm->xsc[XTN][MOVE], - xmx[i][XMJ] + hmm->xsc[XTJ][MOVE]); - - xmx[i][XMC] = ILogsum(xmx[i-1][XMC] + hmm->xsc[XTC][LOOP], - xmx[i][XME] + hmm->xsc[XTE][MOVE]); - } - - sc = xmx[L][XMC] + hmm->xsc[XTC][MOVE]; - - if (ret_mx != NULL) *ret_mx = mx; - else FreePlan7Matrix(mx); - - return Scorify(sc); /* the total Forward score. */ -} - - -/* Function: P7Viterbi() - * - * Purpose: The Viterbi dynamic programming algorithm. - * Identical to Forward() except that max's - * replace sum's. - * - * Args: dsq - sequence in digitized form - * L - length of dsq - * hmm - the model - * ret_tr - RETURN: traceback; pass NULL if it's not wanted - * - * Return: log P(S|M)/P(S|R), as a bit score - */ -float -P7Viterbi(char *dsq, int L, struct plan7_s *hmm, struct p7trace_s **ret_tr) -{ - struct dpmatrix_s *mx; - struct p7trace_s *tr; - int **xmx; - int **mmx; - int **imx; - int **dmx; - int i,k; - int sc; - - /* Allocate a DP matrix with 0..L rows, 0..M-1 columns. - */ - mx = AllocPlan7Matrix(L+1, hmm->M, &xmx, &mmx, &imx, &dmx); - - /* Initialization of the zero row. - */ - xmx[0][XMN] = 0; /* S->N, p=1 */ - xmx[0][XMB] = hmm->xsc[XTN][MOVE]; /* S->N->B, no N-tail */ - xmx[0][XME] = xmx[0][XMC] = xmx[0][XMJ] = -INFTY; /* need seq to get here */ - for (k = 0; k <= hmm->M; k++) - mmx[0][k] = imx[0][k] = dmx[0][k] = -INFTY; /* need seq to get here */ - - /* Recursion. Done as a pull. - * Note some slightly wasteful boundary conditions: - * tsc[0] = -INFTY for all eight transitions (no node 0) - * D_M and I_M are wastefully calculated (they don't exist) - */ - for (i = 1; i <= L; i++) { - mmx[i][0] = imx[i][0] = dmx[i][0] = -INFTY; - - for (k = 1; k <= hmm->M; k++) { - /* match state */ - mmx[i][k] = -INFTY; - if ((sc = mmx[i-1][k-1] + hmm->tsc[k-1][TMM]) > mmx[i][k]) - mmx[i][k] = sc; - if ((sc = imx[i-1][k-1] + hmm->tsc[k-1][TIM]) > mmx[i][k]) - mmx[i][k] = sc; - if ((sc = xmx[i-1][XMB] + hmm->bsc[k]) > mmx[i][k]) - mmx[i][k] = sc; - if ((sc = dmx[i-1][k-1] + hmm->tsc[k-1][TDM]) > mmx[i][k]) - mmx[i][k] = sc; - if (hmm->msc[(int) dsq[i]][k] != -INFTY) mmx[i][k] += hmm->msc[(int) dsq[i]][k]; - else mmx[i][k] = -INFTY; - - /* delete state */ - dmx[i][k] = -INFTY; - if ((sc = mmx[i][k-1] + hmm->tsc[k-1][TMD]) > dmx[i][k]) - dmx[i][k] = sc; - if ((sc = dmx[i][k-1] + hmm->tsc[k-1][TDD]) > dmx[i][k]) - dmx[i][k] = sc; - - /* insert state */ - if (k < hmm->M) { - imx[i][k] = -INFTY; - if ((sc = mmx[i-1][k] + hmm->tsc[k][TMI]) > imx[i][k]) - imx[i][k] = sc; - if ((sc = imx[i-1][k] + hmm->tsc[k][TII]) > imx[i][k]) - imx[i][k] = sc; - if (hmm->isc[(int)dsq[i]][k] != -INFTY) imx[i][k] += hmm->isc[(int) dsq[i]][k]; - else imx[i][k] = -INFTY; - } - } - - /* Now the special states. Order is important here. - * remember, C and J emissions are zero score by definition, - */ - /* N state */ - xmx[i][XMN] = -INFTY; - if ((sc = xmx[i-1][XMN] + hmm->xsc[XTN][LOOP]) > -INFTY) - xmx[i][XMN] = sc; - - /* E state */ - xmx[i][XME] = -INFTY; - for (k = 1; k <= hmm->M; k++) - if ((sc = mmx[i][k] + hmm->esc[k]) > xmx[i][XME]) - xmx[i][XME] = sc; - /* J state */ - xmx[i][XMJ] = -INFTY; - if ((sc = xmx[i-1][XMJ] + hmm->xsc[XTJ][LOOP]) > -INFTY) - xmx[i][XMJ] = sc; - if ((sc = xmx[i][XME] + hmm->xsc[XTE][LOOP]) > xmx[i][XMJ]) - xmx[i][XMJ] = sc; - - /* B state */ - xmx[i][XMB] = -INFTY; - if ((sc = xmx[i][XMN] + hmm->xsc[XTN][MOVE]) > -INFTY) - xmx[i][XMB] = sc; - if ((sc = xmx[i][XMJ] + hmm->xsc[XTJ][MOVE]) > xmx[i][XMB]) - xmx[i][XMB] = sc; - - /* C state */ - xmx[i][XMC] = -INFTY; - if ((sc = xmx[i-1][XMC] + hmm->xsc[XTC][LOOP]) > -INFTY) - xmx[i][XMC] = sc; - if ((sc = xmx[i][XME] + hmm->xsc[XTE][MOVE]) > xmx[i][XMC]) - xmx[i][XMC] = sc; - } - /* T state (not stored) */ - sc = xmx[L][XMC] + hmm->xsc[XTC][MOVE]; - - if (ret_tr != NULL) { - P7ViterbiTrace(hmm, dsq, L, mx, &tr); - *ret_tr = tr; - } - - FreePlan7Matrix(mx); - return Scorify(sc); /* the total Viterbi score. */ -} - - -/* Function: P7ViterbiTrace() - * Date: SRE, Sat Aug 23 10:30:11 1997 (St. Louis Lambert Field) - * - * Purpose: Traceback of a Viterbi matrix: i.e. retrieval - * of optimum alignment. - * - * Args: hmm - hmm, log odds form, used to make mx - * dsq - sequence aligned to (digital form) 1..N - * N - length of seq - * mx - the matrix to trace back in, N x hmm->M - * ret_tr - RETURN: traceback. - * - * Return: (void) - * ret_tr is allocated here. Free using P7FreeTrace(). - */ -void -P7ViterbiTrace(struct plan7_s *hmm, char *dsq, int N, - struct dpmatrix_s *mx, struct p7trace_s **ret_tr) -{ - struct p7trace_s *tr; - int curralloc; /* current allocated length of trace */ - int tpos; /* position in trace */ - int i; /* position in seq (1..N) */ - int k; /* position in model (1..M) */ - int **xmx, **mmx, **imx, **dmx; - int sc; /* temp var for pre-emission score */ - - /* Overallocate for the trace. - * S-N-B- ... - E-C-T : 6 states + N is minimum trace; - * add N more as buffer. - */ - curralloc = N * 2 + 6; - P7AllocTrace(curralloc, &tr); - - xmx = mx->xmx; - mmx = mx->mmx; - imx = mx->imx; - dmx = mx->dmx; - - /* Initialization of trace - * We do it back to front; ReverseTrace() is called later. - */ - tr->statetype[0] = STT; - tr->nodeidx[0] = 0; - tr->pos[0] = 0; - tr->statetype[1] = STC; - tr->nodeidx[1] = 0; - tr->pos[1] = 0; - tpos = 2; - i = N; /* current i (seq pos) we're trying to assign */ - - /* Traceback - */ - while (tr->statetype[tpos-1] != STS) { - switch (tr->statetype[tpos-1]) { - case STM: /* M connects from i-1,k-1, or B */ - sc = mmx[i+1][k+1] - hmm->msc[(int) dsq[i+1]][k+1]; - if (sc == xmx[i][XMB] + hmm->bsc[k+1]) - { - /* Check for wing unfolding */ - if (Prob2Score(hmm->begin[k+1], hmm->p1) + 1 * INTSCALE <= hmm->bsc[k+1]) - while (k > 0) - { - tr->statetype[tpos] = STD; - tr->nodeidx[tpos] = k--; - tr->pos[tpos] = 0; - tpos++; - if (tpos == curralloc) - { /* grow trace if necessary */ - curralloc += N; - P7ReallocTrace(tr, curralloc); - } - } - - tr->statetype[tpos] = STB; - tr->nodeidx[tpos] = 0; - tr->pos[tpos] = 0; - } - else if (sc == mmx[i][k] + hmm->tsc[k][TMM]) - { - tr->statetype[tpos] = STM; - tr->nodeidx[tpos] = k--; - tr->pos[tpos] = i--; - } - else if (sc == imx[i][k] + hmm->tsc[k][TIM]) - { - tr->statetype[tpos] = STI; - tr->nodeidx[tpos] = k; - tr->pos[tpos] = i--; - } - else if (sc == dmx[i][k] + hmm->tsc[k][TDM]) - { - tr->statetype[tpos] = STD; - tr->nodeidx[tpos] = k--; - tr->pos[tpos] = 0; - } - else Die("traceback failed"); - break; - - case STD: /* D connects from M,D */ - if (dmx[i][k+1] == mmx[i][k] + hmm->tsc[k][TMD]) - { - tr->statetype[tpos] = STM; - tr->nodeidx[tpos] = k--; - tr->pos[tpos] = i--; - } - else if (dmx[i][k+1] == dmx[i][k] + hmm->tsc[k][TDD]) - { - tr->statetype[tpos] = STD; - tr->nodeidx[tpos] = k--; - tr->pos[tpos] = 0; - } - else Die("traceback failed"); - break; - - case STI: /* I connects from M,I */ - sc = imx[i+1][k] - hmm->isc[(int) dsq[i+1]][k]; - if (sc == mmx[i][k] + hmm->tsc[k][TMI]) - { - tr->statetype[tpos] = STM; - tr->nodeidx[tpos] = k--; - tr->pos[tpos] = i--; - } - else if (sc == imx[i][k] + hmm->tsc[k][TII]) - { - tr->statetype[tpos] = STI; - tr->nodeidx[tpos] = k; - tr->pos[tpos] = i--; - } - else Die("traceback failed"); - break; - - case STN: /* N connects from S, N */ - if (i == 0 && xmx[i][XMN] == 0) - { - tr->statetype[tpos] = STS; - tr->nodeidx[tpos] = 0; - tr->pos[tpos] = 0; - } - else if (i > 0 && xmx[i+1][XMN] == xmx[i][XMN] + hmm->xsc[XTN][LOOP]) - { - tr->statetype[tpos] = STN; - tr->nodeidx[tpos] = 0; - tr->pos[tpos] = 0; /* note convention adherence: */ - tr->pos[tpos-1] = i--; /* first N doesn't emit */ - } - else Die("traceback failed"); - break; - - case STB: /* B connects from N, J */ - if (xmx[i][XMB] == xmx[i][XMN] + hmm->xsc[XTN][MOVE]) - { - tr->statetype[tpos] = STN; - tr->nodeidx[tpos] = 0; - tr->pos[tpos] = 0; - } - else if (xmx[i][XMB] == xmx[i][XMJ] + hmm->xsc[XTJ][MOVE]) - { - tr->statetype[tpos] = STJ; - tr->nodeidx[tpos] = 0; - tr->pos[tpos] = 0; - } - else Die("traceback failed"); - break; - - case STE: /* E connects from any M state. k set here */ - for (k = hmm->M; k >= 1; k--) - if (xmx[i][XME] == mmx[i][k] + hmm->esc[k]) - { - /* check for wing unfolding */ - if (Prob2Score(hmm->end[k], 1.) + 1*INTSCALE <= hmm->esc[k]) - { - int dk; /* need a tmp k while moving thru delete wing */ - for (dk = hmm->M; dk > k; dk--) - { - tr->statetype[tpos] = STD; - tr->nodeidx[tpos] = dk; - tr->pos[tpos] = 0; - tpos++; - if (tpos == curralloc) - { /* grow trace if necessary */ - curralloc += N; - P7ReallocTrace(tr, curralloc); - } - } - } - - tr->statetype[tpos] = STM; - tr->nodeidx[tpos] = k--; - tr->pos[tpos] = i--; - break; - } - if (k < 0) Die("traceback failed"); - break; - - case STC: /* C comes from C, E */ - if (xmx[i][XMC] == xmx[i-1][XMC] + hmm->xsc[XTC][LOOP]) - { - tr->statetype[tpos] = STC; - tr->nodeidx[tpos] = 0; - tr->pos[tpos] = 0; /* note convention adherence: */ - tr->pos[tpos-1] = i--; /* first C doesn't emit */ - } - else if (xmx[i][XMC] == xmx[i][XME] + hmm->xsc[XTE][MOVE]) - { - tr->statetype[tpos] = STE; - tr->nodeidx[tpos] = 0; - tr->pos[tpos] = 0; /* E is a nonemitter */ - } - else Die("Traceback failed."); - break; - - case STJ: /* J connects from E, J */ - if (xmx[i][XMJ] == xmx[i-1][XMJ] + hmm->xsc[XTJ][LOOP]) - { - tr->statetype[tpos] = STJ; - tr->nodeidx[tpos] = 0; - tr->pos[tpos] = 0; /* note convention adherence: */ - tr->pos[tpos-1] = i--; /* first J doesn't emit */ - } - else if (xmx[i][XMJ] == xmx[i][XME] + hmm->xsc[XTE][LOOP]) - { - tr->statetype[tpos] = STE; - tr->nodeidx[tpos] = 0; - tr->pos[tpos] = 0; /* E is a nonemitter */ - } - else Die("Traceback failed."); - break; - - default: - Die("traceback failed"); - - } /* end switch over statetype[tpos-1] */ - - tpos++; - if (tpos == curralloc) - { /* grow trace if necessary */ - curralloc += N; - P7ReallocTrace(tr, curralloc); - } - - } /* end traceback, at S state; tpos == tlen now */ - tr->tlen = tpos; - P7ReverseTrace(tr); - *ret_tr = tr; -} - - -/* Function: P7SmallViterbi() - * Date: SRE, Fri Mar 6 15:29:41 1998 [St. Louis] - * - * Purpose: Wrapper function, for linear memory alignment - * with same arguments as P7Viterbi(). - * - * Calls P7ParsingViterbi to break the sequence - * into fragments. Then, based on size of fragments, - * calls either P7Viterbi() or P7WeeViterbi() to - * get traces for them. Finally, assembles all these - * traces together to produce an overall optimal - * trace for the sequence. - * - * If the trace isn't needed for some reason, - * all we do is call P7ParsingViterbi. - * - * Args: dsq - sequence in digitized form - * L - length of dsq - * hmm - the model - * ret_tr - RETURN: traceback; pass NULL if it's not wanted - * - * Returns: Score of optimal alignment in bits. - */ -float -P7SmallViterbi(char *dsq, int L, struct plan7_s *hmm, struct p7trace_s **ret_tr) -{ - struct p7trace_s *ctr; /* collapsed trace of optimal parse */ - struct p7trace_s *tr; /* full trace of optimal alignment */ - struct p7trace_s **tarr; /* trace array */ - int ndom; /* number of subsequences */ - int i; /* counter over domains */ - int pos; /* position in sequence */ - int tpos; /* position in trace */ - int tlen; /* length of full trace */ - int sqlen; /* length of a subsequence */ - int totlen; /* length of L matched by model (as opposed to N/C/J) */ - float sc; /* score of optimal alignment */ - int t2; /* position in a subtrace */ - - /* Step 1. Call P7ParsingViterbi to calculate an optimal parse - * of the sequence into single-hit subsequences; this parse - * is returned in a "collapsed" trace - */ - sc = P7ParsingViterbi(dsq, L, hmm, &ctr); - - /* If we don't want full trace, we're done */ - if (ret_tr == NULL) - { - P7FreeTrace(ctr); - return sc; - } - - /* Step 2. Call either P7Viterbi or P7WeeViterbi on each subsequence - * to recover a full traceback of each, collecting them in - * an array. - */ - ndom = ctr->tlen/2 - 1; - tarr = MallocOrDie(sizeof(struct p7trace_s *) * ndom); - tlen = totlen = 0; - for (i = 0; i < ndom; i++) - { - sqlen = ctr->pos[i*2+2] - ctr->pos[i*2+1]; /* length of subseq */ - - if (P7ViterbiSize(sqlen, hmm->M) > RAMLIMIT) - P7WeeViterbi(dsq + ctr->pos[i*2+1], sqlen, hmm, &(tarr[i])); - else - P7Viterbi(dsq + ctr->pos[i*2+1], sqlen, hmm, &(tarr[i])); - - tlen += tarr[i]->tlen - 4; /* not counting S->N,...,C->T */ - totlen += sqlen; - } - - /* Step 3. Compose the subtraces into one big final trace. - * This is wasteful because we're going to TraceDecompose() - * it again in both hmmsearch and hmmpfam to look at - * individual domains; but we do it anyway so the P7SmallViterbi - * interface looks exactly like the P7Viterbi interface. Maybe - * long traces shouldn't include all the N/J/C states anyway, - * since they're unambiguously implied. - */ - - /* Calculate total trace len and alloc; - * nonemitting SNCT + nonemitting J's + emitting NJC - */ - tlen += 4 + (ndom-1) + (L-totlen); - P7AllocTrace(tlen, &tr); - tr->tlen = tlen; - - /* Add N-terminal trace framework - */ - tr->statetype[0] = STS; - tr->nodeidx[0] = 0; - tr->pos[0] = 0; - tr->statetype[1] = STN; - tr->nodeidx[1] = 0; - tr->pos[1] = 0; - tpos = 2; - /* add implied N's */ - for (pos = 1; pos <= ctr->pos[1]; pos++) - { - tr->statetype[tpos] = STN; - tr->nodeidx[tpos] = 0; - tr->pos[tpos] = pos; - tpos++; - } - - /* Add each subseq trace in, with its appropriate - * sequence offset derived from the collapsed trace - */ - for (i = 0; i < ndom; i++) - { /* skip SN, CT framework at ends */ - for (t2 = 2; t2 < tarr[i]->tlen-2; t2++) - { - tr->statetype[tpos] = tarr[i]->statetype[t2]; - tr->nodeidx[tpos] = tarr[i]->nodeidx[t2]; - if (tarr[i]->pos[t2] > 0) - tr->pos[tpos] = tarr[i]->pos[t2] + ctr->pos[i*2+1]; - else - tr->pos[tpos] = 0; - tpos++; - } - /* add nonemitting J or C */ - tr->statetype[tpos] = (i == ndom-1) ? STC : STJ; - tr->nodeidx[tpos] = 0; - tr->pos[tpos] = 0; - tpos++; - /* add implied emitting J's */ - if (i != ndom-1) - for (pos = ctr->pos[i*2+2]+1; pos <= ctr->pos[(i+1)*2+1]; pos++) - { - tr->statetype[tpos] = STJ; - tr->nodeidx[tpos] = 0; - tr->pos[tpos] = pos; - tpos++; - } - } - - /* add implied C's */ - for (pos = ctr->pos[ndom*2]+1; pos <= L; pos++) - { - tr->statetype[tpos] = STC; - tr->nodeidx[tpos] = 0; - tr->pos[tpos] = pos; - tpos++; - } - /* add terminal T */ - tr->statetype[tpos] = STT; - tr->nodeidx[tpos] = 0; - tr->pos[tpos] = 0; - tpos++; - - for (i = 0; i < ndom; i++) P7FreeTrace(tarr[i]); - free(tarr); - P7FreeTrace(ctr); - - *ret_tr = tr; - return sc; -} - - - - -/* Function: P7ParsingViterbi() - * Date: SRE, Wed Mar 4 14:07:31 1998 [St. Louis] - * - * Purpose: The "hmmfs" linear-memory algorithm for finding - * the optimal alignment of a very long sequence to - * a looping, multihit (e.g. Plan7) model, parsing it into - * a series of nonoverlapping subsequences that match - * the model once. Other algorithms (e.g. P7Viterbi() - * or P7WeeViterbi()) are applied subsequently to - * these subsequences to recover complete alignments. - * - * The hmmfs algorithm appears briefly in [Durbin98], - * but is otherwise unpublished. - * - * The traceback structure returned is special: a - * "collapsed" trace S->B->E->...->B->E->T, where - * stateidx is unused and pos is used to indicate the - * position of B and E in the sequence. The matched - * subsequence is B_pos+1...E_pos. The number of - * matches in the trace is (tlen/2)-1. - * - * Args: dsq - sequence in digitized form - * L - length of dsq - * hmm - the model (log odds scores ready) - * ret_tr - RETURN: a collapsed traceback. - * - * Returns: Score of the optimal Viterbi alignment, in bits. - */ -float -P7ParsingViterbi(char *dsq, int L, struct plan7_s *hmm, struct p7trace_s **ret_tr) -{ - struct dpmatrix_s *mx; /* two rows of score matrix */ - struct dpmatrix_s *tmx; /* two rows of misused score matrix: traceback ptrs */ - struct p7trace_s *tr; /* RETURN: collapsed traceback */ - int **xmx, **mmx, **dmx, **imx; /* convenience ptrs to score matrix */ - int **xtr, **mtr, **dtr, **itr; /* convenience ptrs to traceback pointers */ - int *btr, *etr; /* O(L) trace ptrs for B, E state pts in seq */ - int sc; /* integer score of optimal alignment */ - int i,k,tpos; /* index for seq, model, trace position */ - int cur, prv; /* indices for rolling dp matrix */ - int curralloc; /* size of allocation for tr */ - - - /* Alloc a DP matrix and traceback pointers, two rows each, O(M). - * Alloc two O(L) arrays to trace back through the sequence thru B and E. - */ - mx = AllocPlan7Matrix(2, hmm->M, &xmx, &mmx, &imx, &dmx); - tmx = AllocPlan7Matrix(2, hmm->M, &xtr, &mtr, &itr, &dtr); - btr = MallocOrDie(sizeof(int) * (L+1)); - etr = MallocOrDie(sizeof(int) * (L+1)); - - /* Initialization of the zero row. - */ - xmx[0][XMN] = 0; /* S->N, p=1 */ - xmx[0][XMB] = hmm->xsc[XTN][MOVE]; /* S->N->B, no N-tail */ - btr[0] = 0; - xmx[0][XME] = xmx[0][XMC] = xmx[0][XMJ] = -INFTY; /* need seq to get here */ - etr[0] = -1; - for (k = 0; k <= hmm->M; k++) - mmx[0][k] = imx[0][k] = dmx[0][k] = -INFTY; /* need seq to get here */ - - /* Recursion. Done as a pull. Rolling index trick. Trace ptr propagation trick. - * Note some slightly wasteful boundary conditions: - * tsc[0] = -INFTY for all eight transitions (no node 0) - * D_M and I_M are wastefully calculated (they don't exist) - * - * Notes on traceback pointer propagation. - * - In the path B->E, we propagate the i that B was aligned to in the optimal - * alignment, via mtr, dtr, and itr. - * - When we reach an E, we record the i of the B it started from in etr. - * - In a looping path E->J...->B or terminal path E->C...->T, we propagate - * the i that E was aligned to in the optimal alignment via xtr[][XMC] - * and xtr[][XMJ]. - * - When we enter B, we record the i of the best previous E, or 0 if there - * isn't one, in btr. - */ - for (i = 1; i <= L; i++) { - cur = i % 2; - prv = !cur; - - mmx[cur][0] = imx[cur][0] = dmx[cur][0] = -INFTY; - - for (k = 1; k <= hmm->M; k++) { - /* match state */ - mmx[cur][k] = -INFTY; - if ((sc = mmx[prv][k-1] + hmm->tsc[k-1][TMM]) > -INFTY) - { mmx[cur][k] = sc; mtr[cur][k] = mtr[prv][k-1]; } - if ((sc = imx[prv][k-1] + hmm->tsc[k-1][TIM]) > mmx[cur][k]) - { mmx[cur][k] = sc; mtr[cur][k] = itr[prv][k-1]; } - if ((sc = xmx[prv][XMB] + hmm->bsc[k]) > mmx[cur][k]) - { mmx[cur][k] = sc; mtr[cur][k] = i-1; } - if ((sc = dmx[prv][k-1] + hmm->tsc[k-1][TDM]) > mmx[cur][k]) - { mmx[cur][k] = sc; mtr[cur][k] = dtr[prv][k-1]; } - if (hmm->msc[(int) dsq[i]][k] != -INFTY) - mmx[cur][k] += hmm->msc[(int) dsq[i]][k]; - else - mmx[cur][k] = -INFTY; - - /* delete state */ - dmx[cur][k] = -INFTY; - if ((sc = mmx[cur][k-1] + hmm->tsc[k-1][TMD]) > -INFTY) - { dmx[cur][k] = sc; dtr[cur][k] = mtr[cur][k-1]; } - if ((sc = dmx[cur][k-1] + hmm->tsc[k-1][TDD]) > dmx[cur][k]) - { dmx[cur][k] = sc; dtr[cur][k] = dtr[cur][k-1]; } - - /* insert state */ - if (k < hmm->M) { - imx[cur][k] = -INFTY; - if ((sc = mmx[prv][k] + hmm->tsc[k][TMI]) > -INFTY) - { imx[cur][k] = sc; itr[cur][k] = mtr[prv][k]; } - if ((sc = imx[prv][k] + hmm->tsc[k][TII]) > imx[cur][k]) - { imx[cur][k] = sc; itr[cur][k] = itr[prv][k]; } - if (hmm->isc[(int) dsq[i]][k] != -INFTY) - imx[cur][k] += hmm->isc[(int) dsq[i]][k]; - else - imx[cur][k] = -INFTY; - } - } - - /* Now the special states. Order is important here. - * remember, C and J emissions are zero score by definition, - */ - /* N state */ - xmx[cur][XMN] = -INFTY; - if ((sc = xmx[prv][XMN] + hmm->xsc[XTN][LOOP]) > -INFTY) - xmx[cur][XMN] = sc; - /* E state */ - xmx[cur][XME] = -INFTY; - for (k = 1; k <= hmm->M; k++) - if ((sc = mmx[cur][k] + hmm->esc[k]) > xmx[cur][XME]) - { xmx[cur][XME] = sc; etr[i] = mtr[cur][k]; } - /* J state */ - xmx[cur][XMJ] = -INFTY; - if ((sc = xmx[prv][XMJ] + hmm->xsc[XTJ][LOOP]) > -INFTY) - { xmx[cur][XMJ] = sc; xtr[cur][XMJ] = xtr[prv][XMJ]; } - if ((sc = xmx[cur][XME] + hmm->xsc[XTE][LOOP]) > xmx[cur][XMJ]) - { xmx[cur][XMJ] = sc; xtr[cur][XMJ] = i; } - /* B state */ - xmx[cur][XMB] = -INFTY; - if ((sc = xmx[cur][XMN] + hmm->xsc[XTN][MOVE]) > -INFTY) - { xmx[cur][XMB] = sc; btr[i] = 0; } - if ((sc = xmx[cur][XMJ] + hmm->xsc[XTJ][MOVE]) > xmx[cur][XMB]) - { xmx[cur][XMB] = sc; btr[i] = xtr[cur][XMJ]; } - /* C state */ - xmx[cur][XMC] = -INFTY; - if ((sc = xmx[prv][XMC] + hmm->xsc[XTC][LOOP]) > -INFTY) - { xmx[cur][XMC] = sc; xtr[cur][XMC] = xtr[prv][XMC]; } - if ((sc = xmx[cur][XME] + hmm->xsc[XTE][MOVE]) > xmx[cur][XMC]) - { xmx[cur][XMC] = sc; xtr[cur][XMC] = i; } - } - /* T state (not stored) */ - sc = xmx[cur][XMC] + hmm->xsc[XTC][MOVE]; - - /***************************************************************** - * Collapsed traceback stage. - * xtr[L%2][XMC] contains the position j of the previous E - * etr[j] contains the position i of the previous B - * btr[i] contains the position j of the previous E, or 0 - * continue until btr[i] = 0. - *****************************************************************/ - - curralloc = 2; /* minimum: no hits */ - P7AllocTrace(curralloc, &tr); - - /* Init of collapsed trace. Back to front; we ReverseTrace() later. - */ - tpos = 0; - tr->statetype[tpos] = STT; - tr->pos[tpos] = 0; - i = xtr[L%2][XMC]; - while (i > 0) - { - curralloc += 2; - P7ReallocTrace(tr, curralloc); - - tpos++; - tr->statetype[tpos] = STE; - tr->pos[tpos] = i; - i = etr[i]; - - tpos++; - tr->statetype[tpos] = STB; - tr->pos[tpos] = i; - i = btr[i]; - } - - tpos++; - tr->statetype[tpos] = STS; - tr->pos[tpos] = 0; - tr->tlen = tpos + 1; - P7ReverseTrace(tr); - - FreePlan7Matrix(mx); - FreePlan7Matrix(tmx); - free(btr); - free(etr); - - *ret_tr = tr; - return Scorify(sc); -} - -/* Function: P7WeeViterbi() - * Date: SRE, Wed Mar 4 08:24:04 1998 [St. Louis] - * - * Purpose: Hirschberg/Myers/Miller linear memory alignment. - * See [Hirschberg75,MyM-88a] for the idea of the algorithm. - * Adapted to HMM implementation. - * - * Requires that you /know/ that there's only - * one hit to the model in the sequence: either - * because you're forcing single-hit, or you've - * previously called P7ParsingViterbi to parse - * the sequence into single-hit segments. The reason - * for this is that a cyclic model (a la Plan7) - * defeats the nice divide and conquer trick. - * (I think some trickery with propagated trace pointers - * could get around this but haven't explored it.) - * This is implemented by ignoring transitions - * to/from J state. - * - * Args: dsq - sequence in digitized form - * L - length of dsq - * hmm - the model - * ret_tr - RETURN: traceback. - * - * Returns: Score of the optimal Viterbi alignment. - */ -float -P7WeeViterbi(char *dsq, int L, struct plan7_s *hmm, struct p7trace_s **ret_tr) -{ - struct p7trace_s *tr; /* RETURN: traceback */ - int *kassign; /* 0..L+1, alignment of seq positions to model nodes */ - char *tassign; /* 0..L+1, alignment of seq positions to state types */ - int *endlist; /* stack of end points on sequence to work on */ - int *startlist; /* stack of start points on sequence to work on */ - int lpos; /* position in endlist, startlist */ - int k1, k2, k3; /* start, mid, end in model */ - char t1, t2, t3; /* start, mid, end in state type */ - int s1, s2, s3; /* start, mid, end in sequence */ - float sc; /* score of segment optimal alignment */ - float ret_sc; /* optimal score over complete seq */ - int tlen; /* length needed for trace */ - int i, k, tpos; /* index in sequence, model, trace */ - - - /* Initialize. - */ - kassign = MallocOrDie (sizeof(int) * (L+1)); - tassign = MallocOrDie (sizeof(char)* (L+1)); - endlist = MallocOrDie (sizeof(int) * (L+1)); - startlist = MallocOrDie (sizeof(int) * (L+1)); - - lpos = 0; - startlist[lpos] = 1; - endlist[lpos] = L; - kassign[1] = 1; - kassign[L] = hmm->M; - tassign[1] = STS; /* temporary boundary condition! will become N or M */ - tassign[L] = STT; /* temporary boundary condition! will become M or C */ - - /* Recursive divide-and-conquer alignment. - */ - while (lpos >= 0) - { - /* Pop a segment off the stack */ - s1 = startlist[lpos]; - k1 = kassign[s1]; - t1 = tassign[s1]; - s3 = endlist[lpos]; - k3 = kassign[s3]; - t3 = tassign[s3]; - lpos--; - /* find optimal midpoint of segment */ - sc = get_wee_midpt(hmm, dsq, L, k1, t1, s1, k3, t3, s3, &k2, &t2, &s2); - kassign[s2] = k2; - tassign[s2] = t2; - /* score is valid on first pass */ - if (t1 == STS && t3 == STT) ret_sc = sc; - - /* push N-terminal segment on stack */ - if (t2 != STN && (s2 - s1 > 1 || (s2 - s1 == 1 && t1 == STS))) - { - lpos++; - startlist[lpos] = s1; - endlist[lpos] = s2; - } - /* push C-terminal segment on stack */ - if (t2 != STC && (s3 - s2 > 1 || (s3 - s2 == 1 && t3 == STT))) - { - lpos++; - startlist[lpos] = s2; - endlist[lpos] = s3; - } - - if (t2 == STN) - { /* if we see STN midpoint, we know the whole N-term is STN */ - for (; s2 >= s1; s2--) { - kassign[s2] = 1; - tassign[s2] = STN; - } - } - if (t2 == STC) - { /* if we see STC midpoint, we know whole C-term is STC */ - for (; s2 <= s3; s2++) { - kassign[s2] = hmm->M; - tassign[s2] = STC; - } - } - } - - /***************************************************************** - * Construct a traceback structure from kassign/tassign by interpolating - * necessary states. - * Trace allocation is as follows. We clearly need L emitting states. - * We also need nonemitting states as follows: - * STS,STN,STB,STE,STC,STT = 6 - * STD: count k2-k1-1 in kassign M->M's - * Also, count N->M's and M->C's (potential wing unfoldings)... - * ...and be careful to check wing unfoldings when there aren't - * any emitting N or C flanks! (bugfix, 2.1.1b) - *****************************************************************/ - - tlen = L + 6; - for (i = 1; i < L; i++) - { - if (tassign[i] == STM && tassign[i+1] == STM) - tlen += kassign[i+1] - kassign[i] - 1; - if (tassign[i] == STN && tassign[i+1] == STM) - tlen += kassign[i+1] - 1; - if (tassign[i] == STM && tassign[i+1] == STC) - tlen += hmm->M - kassign[i]; - } - if (tassign[1] == STM) tlen += kassign[1] - 1; - if (tassign[L] == STM) tlen += hmm->M - kassign[L]; - P7AllocTrace(tlen, &tr); - - tr->statetype[0] = STS; - tr->nodeidx[0] = 0; - tr->pos[0] = 0; - tr->statetype[1] = STN; - tr->nodeidx[1] = 0; - tr->pos[1] = 0; - tpos = 2; - - for (i = 1; i <= L; i++) - { - switch(tassign[i]) { - case STM: - /* check for first match state */ - if (tr->statetype[tpos-1] == STN) { - tr->statetype[tpos] = STB; - tr->nodeidx[tpos] = 0; - tr->pos[tpos] = 0; - tpos++; - /* check for wing unfolding */ - if (Prob2Score(hmm->begin[kassign[i]], hmm->p1) + INTSCALE <= hmm->bsc[kassign[i]]) - for (k = 1; k < kassign[i]; k++) { - tr->statetype[tpos] = STD; - tr->nodeidx[tpos] = k; - tr->pos[tpos] = 0; - tpos++; - } - } - /* do the match state itself */ - tr->statetype[tpos] = STM; - tr->nodeidx[tpos] = kassign[i]; - tr->pos[tpos] = i; - tpos++; - /* do any deletes necessary 'til next match */ - if (i < L && tassign[i+1] == STM && kassign[i+1] - kassign[i] > 1) - for (k = kassign[i] + 1; k < kassign[i+1]; k++) - { - tr->statetype[tpos] = STD; - tr->nodeidx[tpos] = k; - tr->pos[tpos] = 0; - tpos++; - } - /* check for last match state */ - if (i == L || tassign[i+1] == STC) { - /* check for wing unfolding */ - if (Prob2Score(hmm->end[kassign[i-1]], 1.) + INTSCALE <= hmm->esc[kassign[i-1]]) - for (k = kassign[i]+1; k <= hmm->M; k++) - { - tr->statetype[tpos] = STD; - tr->nodeidx[tpos] = k; - tr->pos[tpos] = 0; - tpos++; - } - /* add on the end state */ - tr->statetype[tpos] = STE; - tr->nodeidx[tpos] = 0; - tr->pos[tpos] = 0; - tpos++; - /* and a nonemitting C state */ - tr->statetype[tpos] = STC; - tr->nodeidx[tpos] = 0; - tr->pos[tpos] = 0; - tpos++; - } - break; - - case STI: - tr->statetype[tpos] = STI; - tr->nodeidx[tpos] = kassign[i]; - tr->pos[tpos] = i; - tpos++; - break; - - case STN: - tr->statetype[tpos] = STN; - tr->nodeidx[tpos] = 0; - tr->pos[tpos] = i; - tpos++; - break; - - case STC: - tr->statetype[tpos] = STC; - tr->nodeidx[tpos] = 0; - tr->pos[tpos] = i; - tpos++; - break; - - default: Die("Bogus state %s", Statetype(tassign[i])); - } - } - /* terminate the trace */ - tr->statetype[tpos] = STT; - tr->nodeidx[tpos] = 0; - tr->pos[tpos] = 0; - tr->tlen = tpos+1; - - *ret_tr = tr; - - free(kassign); - free(tassign); - free(startlist); - free(endlist); - return ret_sc; -} - - -/* Function: Plan7ESTViterbi() - * - * Purpose: Frameshift-tolerant alignment of protein model to cDNA EST. - * - * - */ -float -Plan7ESTViterbi(char *dsq, int L, struct plan7_s *hmm, struct dpmatrix_s **ret_mx) -{ - struct dpmatrix_s *mx; - int **xmx; - int **mmx; - int **imx; - int **dmx; - int i,k; - int sc; - int codon; - - /* Allocate a DP matrix with 0..L rows, 0..M+1 columns. - */ - mx = AllocPlan7Matrix(L+1, hmm->M, &xmx, &mmx, &imx, &dmx); - - /* Initialization of the zero row (DNA sequence of length 0) - * Note that xmx[i][stN] = 0 by definition for all i, - * and xmx[i][stT] = xmx[i][stC], so neither stN nor stT need - * to be calculated in DP matrices. - */ - xmx[0][XMN] = 0; /* S->N, p=1 */ - xmx[0][XMB] = hmm->xsc[XTN][MOVE]; /* S->N->B, no N-tail */ - xmx[0][XME] = xmx[0][XMC] = xmx[0][XMJ] = -INFTY; /* need seq to get here */ - for (k = 0; k <= hmm->M; k++) - mmx[0][k] = imx[0][k] = dmx[0][k] = -INFTY; /* need seq to get here */ - - /* Initialization of the first row (DNA sequence of length 1); - * only N state can make this nucleotide. - */ - xmx[1][XMN] = xmx[0][XMN] + hmm->xsc[XTN][LOOP]; - xmx[1][XMB] = xmx[1][XMN] + hmm->xsc[XTN][MOVE]; - xmx[0][XME] = xmx[0][XMC] = xmx[0][XMJ] = -INFTY; /* need 2 nt to get here */ - for (k = 0; k <= hmm->M; k++) - mmx[0][k] = imx[0][k] = dmx[0][k] = -INFTY; /* need 2 nt to get into model */ - - /* Recursion. Done as a pull. - * Note some slightly wasteful boundary conditions: - * tsc[0] = -INFTY for all eight transitions (no node 0) - * D_M and I_M are wastefully calculated (they don't exist) - */ - for (i = 2; i <= L; i++) { - mmx[i][0] = imx[i][0] = dmx[i][0] = -INFTY; - - /* crude calculation of lookup value for codon */ - if (i > 2) { - if (dsq[i-2] < 4 && dsq[i-1] < 4 && dsq[i] < 4) - codon = dsq[i-2] * 16 + dsq[i-1] * 4 + dsq[i]; - else - codon = 64; /* ambiguous codon; punt */ - } - - for (k = 1; k <= hmm->M; k++) { - /* match state */ - if (i > 2) { - mmx[i][k] = mmx[i-3][k-1] + hmm->tsc[k-1][TMM]; - if ((sc = imx[i-3][k-1] + hmm->tsc[k-1][TIM]) > mmx[i][k]) - mmx[i][k] = sc; - if ((sc = xmx[i-3][XMB] + hmm->bsc[k]) > mmx[i][k]) - mmx[i][k] = sc; - if ((sc = dmx[i-3][k-1] + hmm->tsc[k-1][TDM]) > mmx[i][k]) - mmx[i][k] = sc; - mmx[i][k] += hmm->dnam[codon][k]; - } - /* -1 frameshifts into match state */ - if ((sc = mmx[i-2][k-1] + hmm->tsc[k-1][TMM] + hmm->dna2) > mmx[i][k]) - mmx[i][k] = sc; - if ((sc = imx[i-2][k-1] + hmm->tsc[k-1][TIM] + hmm->dna2) > mmx[i][k]) - mmx[i][k] = sc; - if ((sc = xmx[i-2][XMB] + hmm->bsc[k] + hmm->dna2) > mmx[i][k]) - mmx[i][k] = sc; - if ((sc = dmx[i-2][k-1] + hmm->tsc[k-1][TDM] + hmm->dna2) > mmx[i][k]) - mmx[i][k] = sc; - - /* +1 frameshifts into match state */ - if (i > 3) { - if ((sc = mmx[i-4][k-1] + hmm->tsc[k-1][TMM] + hmm->dna4) > mmx[i][k]) - mmx[i][k] = sc; - if ((sc = imx[i-4][k-1] + hmm->tsc[k-1][TIM] + hmm->dna4) > mmx[i][k]) - mmx[i][k] = sc; - if ((sc = xmx[i-4][XMB] + hmm->bsc[k] + hmm->dna4) > mmx[i][k]) - mmx[i][k] = sc; - if ((sc = dmx[i-4][k-1] + hmm->tsc[k-1][TDM] + hmm->dna4) > mmx[i][k]) - mmx[i][k] = sc; - } - /* delete state */ - dmx[i][k] = mmx[i][k-1] + hmm->tsc[k-1][TMD]; - if ((sc = dmx[i][k-1] + hmm->tsc[k-1][TDD]) > dmx[i][k]) - dmx[i][k] = sc; - - /* insert state */ - if (i > 2) { - imx[i][k] = mmx[i-3][k] + hmm->tsc[k][TMI]; - if ((sc = imx[i-3][k] + hmm->tsc[k][TII]) > imx[i][k]) - imx[i][k] = sc; - imx[i][k] += hmm->dnai[codon][k]; - } - - /* -1 frameshifts into insert state */ - if ((sc = mmx[i-2][k] + hmm->tsc[k][TMI] + hmm->dna2) > imx[i][k]) - imx[i][k] = sc; - if ((sc = imx[i-2][k] + hmm->tsc[k][TII] + hmm->dna2) > imx[i][k]) - imx[i][k] = sc; - - /* +1 frameshifts into insert state */ - if (i > 4) { - if ((sc = mmx[i-4][k] + hmm->tsc[k][TMI] + hmm->dna4) > imx[i][k]) - imx[i][k] = sc; - if ((sc = imx[i-4][k] + hmm->tsc[k][TII] + hmm->dna4) > imx[i][k]) - imx[i][k] = sc; - } - } - /* Now the special states. Order is important here. - * remember, C and J emissions are zero score by definition, - */ - /* N state: +1 nucleotide */ - xmx[i][XMN] = xmx[i-1][XMN] + hmm->xsc[XTN][LOOP]; - /* E state: collect from M's, and last D */ - xmx[i][XME] = dmx[i][hmm->M]; /* transition prob from last D = 1.0 */ - for (k = 1; k <= hmm->M; k++) - if ((sc = mmx[i][k] + hmm->esc[k]) > xmx[i][XME]) - xmx[i][XME] = sc; - /* J state: +1 nucleotide */ - xmx[i][XMJ] = xmx[i-1][XMJ] + hmm->xsc[XTJ][LOOP]; - if ((sc = xmx[i][XME] + hmm->xsc[XTE][LOOP]) > xmx[i][XMJ]) - xmx[i][XMJ] = sc; - /* B state: collect from N,J */ - xmx[i][XMB] = xmx[i][XMN] + hmm->xsc[XTN][MOVE]; - if ((sc = xmx[i][XMJ] + hmm->xsc[XTJ][MOVE]) > xmx[i][XMB]) - xmx[i][XMB] = sc; - /* C state: +1 nucleotide */ - xmx[i][XMC] = xmx[i-1][XMC] + hmm->xsc[XTC][LOOP]; - if ((sc = xmx[i][XME] + hmm->xsc[XTE][MOVE]) > xmx[i][XMC]) - xmx[i][XMC] = sc; - } - - sc = xmx[L][XMC] + hmm->xsc[XTC][MOVE]; - - if (ret_mx != NULL) *ret_mx = mx; - else FreePlan7Matrix(mx); - - return Scorify(sc); /* the total Viterbi score. */ -} - - - -/* Function: get_wee_midpt() - * Date: SRE, Wed Mar 4 08:27:11 1998 [St. Louis] - * - * Purpose: The heart of the divide and conquer algorithm - * for P7WeeViterbi(). This function is called - * recursively to find successive optimal midpoints - * in the alignment matrix. See P7WeeViterbi() for - * further comments on the assumptions of this algorithm. - * - * Args: hmm - the model, set up for integer scores - * dsq - the sequence, digitized - * L - length of the sequence - * k1 - model node to start with, 1..M - * t1 - state type to start with, STM | STI | STN | STC; STS to start - * s1 - sequence position to start with, 1..L; 1 to start - * k3 - model node to end with, 1..M - * t3 - state type to end with, STM | STI | STN | STC; STT to start - * s3 - sequence position to end with, 1..L; L to start - * ret_k2 - RETURN: optimal midpoint, node position in model - * ret_t2 - RETURN: optimal midpoint, state type - * ret_s2 - RETURN: optimal midpoint, sequence position - * - * Returns: score of optimal alignment, in bits. - */ -static float -get_wee_midpt(struct plan7_s *hmm, char *dsq, int L, - int k1, char t1, int s1, - int k3, char t3, int s3, - int *ret_k2, char *ret_t2, int *ret_s2) -{ - struct dpmatrix_s *fwd; - struct dpmatrix_s *bck; - int **xmx; /* convenience ptr into special states */ - int **mmx; /* convenience ptr into match states */ - int **imx; /* convenience ptr into insert states */ - int **dmx; /* convenience ptr into delete states */ - int k2; - char t2; - int s2; - int cur, prv, nxt; /* current, previous, next row index (0 or 1)*/ - int i,k; /* indices for seq, model */ - int sc; /* integer score */ - int max; /* maximum integer score */ - int start; /* s1 to start at (need, for STS special case) */ - - - /* Choose our midpoint. - * Special cases: s1, s3 adjacent and t1 == STS: s2 = s1 - * s1, s3 adjacent and t3 == STT: s2 = s3 - * (where we must replace STS, STT eventually) - */ - s2 = s1 + (s3-s1) / 2; - if (s3-s1 == 1 && t1 == STS) s2 = s1; - if (s3-s1 == 1 && t3 == STT) s2 = s3; - - /* STS is a special case. STS aligns to row zero by convention, - * but we'll be passed s1=1, t1=STS. We have to init on row - * zero then start DP on row 1. - */ - start = (t1 == STS) ? 0 : s1; - - /* Allocate our forward two rows. - * Initialize row zero. - */ - fwd = AllocPlan7Matrix(2, hmm->M, &xmx, &mmx, &imx, &dmx); - cur = start%2; - xmx[cur][XMN] = xmx[cur][XMB] = -INFTY; - xmx[cur][XME] = xmx[cur][XMC] = -INFTY; - for (k = k1; k <= k3; k++) - mmx[cur][k] = imx[cur][k] = dmx[cur][k] = -INFTY; - - /* Where to put our zero for our start point... - * (only possible to start on an emitting state; J disallowed) - */ - switch (t1) { - case STM: mmx[cur][k1] = 0; break; - case STI: imx[cur][k1] = 0; break; - case STN: xmx[cur][XMN] = 0; break; - case STC: xmx[cur][XMC] = 0; break; - case STS: xmx[cur][XMN] = 0; break; - default: Die("you can't init get_wee_midpt with a %s\n", Statetype(t1)); - } - - /* Still initializing. - * Deal with pulling horizontal matrix moves in initial row. - * These are any transitions to nonemitters: - * STM-> E, D - * STI-> none - * STN-> B - * STC-> (T, but we never observe this in the forward pass of a d&c) - * STE-> C - * STS-> (N, already implied by setting xmx[cur][XMN] = 0) - * STB-> M - */ - if (t1 == STM) - { - for (k = k1+1; k <= k3; k++) - { /* transits into STD */ - dmx[cur][k] = -INFTY; - if ((sc = mmx[cur][k-1] + hmm->tsc[k-1][TMD]) > -INFTY) - dmx[cur][k] = sc; - if ((sc = dmx[cur][k-1] + hmm->tsc[k-1][TDD]) > dmx[cur][k]) - dmx[cur][k] = sc; - } - /* transit into STE */ - xmx[cur][XME] = -INFTY; - if ((sc = mmx[cur][k1] + hmm->esc[k1]) > -INFTY) - xmx[cur][XME] = sc; - } - /* transit into STB from STN */ - xmx[cur][XMB] = -INFTY; - if ((sc = xmx[cur][XMN] + hmm->xsc[XTN][MOVE]) > -INFTY) - xmx[cur][XMB] = sc; - /* transit into STC from STE */ - xmx[cur][XMC] = -INFTY; - if ((sc = xmx[cur][XME] + hmm->xsc[XTE][MOVE]) > -INFTY) - xmx[cur][XMC] = sc; - - /* Done initializing. - * Start recursive DP; sweep forward to chosen s2 midpoint. Done as a pull. - */ - for (i = start+1; i <= s2; i++) { - cur = i % 2; - prv = !cur; - - mmx[cur][k1] = imx[cur][k1] = dmx[cur][k1] = -INFTY; - - /* Insert state in column k1, and B->M transition in k1. - */ - if (k1 < hmm->M) { - imx[cur][k1] = -INFTY; - if ((sc = mmx[prv][k1] + hmm->tsc[k1][TMI]) > -INFTY) - imx[cur][k1] = sc; - if ((sc = imx[prv][k1] + hmm->tsc[k1][TII]) > imx[cur][k1]) - imx[cur][k1] = sc; - if (hmm->isc[(int) dsq[i]][k1] != -INFTY) - imx[cur][k1] += hmm->isc[(int) dsq[i]][k1]; - else - imx[cur][k1] = -INFTY; - } - if ((sc = xmx[prv][XMB] + hmm->bsc[k1]) > -INFTY) - mmx[cur][k1] = sc; - if (hmm->msc[(int) dsq[i]][k1] != -INFTY) - mmx[cur][k1] += hmm->msc[(int) dsq[i]][k1]; - else - mmx[cur][k1] = -INFTY; - - /* Main chunk of recursion across model positions - */ - for (k = k1+1; k <= k3; k++) { - /* match state */ - mmx[cur][k] = -INFTY; - if ((sc = mmx[prv][k-1] + hmm->tsc[k-1][TMM]) > -INFTY) - mmx[cur][k] = sc; - if ((sc = imx[prv][k-1] + hmm->tsc[k-1][TIM]) > mmx[cur][k]) - mmx[cur][k] = sc; - if ((sc = xmx[prv][XMB] + hmm->bsc[k]) > mmx[cur][k]) - mmx[cur][k] = sc; - if ((sc = dmx[prv][k-1] + hmm->tsc[k-1][TDM]) > mmx[cur][k]) - mmx[cur][k] = sc; - if (hmm->msc[(int) dsq[i]][k] != -INFTY) - mmx[cur][k] += hmm->msc[(int) dsq[i]][k]; - else - mmx[cur][k] = -INFTY; - - /* delete state */ - dmx[cur][k] = -INFTY; - if (k < hmm->M) { - if ((sc = mmx[cur][k-1] + hmm->tsc[k-1][TMD]) > -INFTY) - dmx[cur][k] = sc; - if ((sc = dmx[cur][k-1] + hmm->tsc[k-1][TDD]) > dmx[cur][k]) - dmx[cur][k] = sc; - } - - /* insert state */ - imx[cur][k] = -INFTY; - if (k < hmm->M) { - if ((sc = mmx[prv][k] + hmm->tsc[k][TMI]) > -INFTY) - imx[cur][k] = sc; - if ((sc = imx[prv][k] + hmm->tsc[k][TII]) > imx[cur][k]) - imx[cur][k] = sc; - if (hmm->isc[(int) dsq[i]][k] != -INFTY) - imx[cur][k] += hmm->isc[(int) dsq[i]][k]; - else - imx[cur][k] = -INFTY; - } - } - /* N state */ - xmx[cur][XMN] = -INFTY; - if ((sc = xmx[prv][XMN] + hmm->xsc[XTN][LOOP]) > -INFTY) - xmx[cur][XMN] = sc; - /* E state */ - xmx[cur][XME] = -INFTY; - for (k = k1; k <= k3 && k <= hmm->M; k++) - if ((sc = mmx[cur][k] + hmm->esc[k]) > xmx[cur][XME]) - xmx[cur][XME] = sc; - /* B state */ - xmx[cur][XMB] = -INFTY; - if ((sc = xmx[cur][XMN] + hmm->xsc[XTN][MOVE]) > -INFTY) - xmx[cur][XMB] = sc; - /* C state */ - xmx[cur][XMC] = -INFTY; - if ((sc = xmx[prv][XMC] + hmm->xsc[XTC][LOOP]) > -INFTY) - xmx[cur][XMC] = sc; - if ((sc = xmx[cur][XME] + hmm->xsc[XTE][MOVE]) > xmx[cur][XMC]) - xmx[cur][XMC] = sc; - } - - /* Row s2%2 in fwd matrix now contains valid scores from s1 (start) to s2, - * with J transitions disallowed (no cycles through model). - */ - - /***************************************************************** - * Backwards pass. - *****************************************************************/ - - /* Allocate our backwards two rows. Init last row. - */ - bck = AllocPlan7Matrix(2, hmm->M, &xmx, &mmx, &imx, &dmx); - nxt = s3%2; - xmx[nxt][XMN] = xmx[nxt][XMB] = -INFTY; - xmx[nxt][XME] = xmx[nxt][XMC] = -INFTY; - for (k = k1; k <= k3 + 1; k++) - mmx[nxt][k] = imx[nxt][k] = dmx[nxt][k] = -INFTY; - cur = !nxt; - mmx[cur][k3+1] = imx[cur][k3+1] = dmx[cur][k3+1] = -INFTY; - - /* Where to put the zero for our end point on last row. - */ - switch (t3) { - case STM: mmx[nxt][k3] = 0; break; - case STI: imx[nxt][k3] = 0; break; - case STN: xmx[nxt][XMN] = 0; break; - case STC: xmx[nxt][XMC] = 0; break; /* must be an emitting C */ - case STT: xmx[nxt][XMC] = hmm->xsc[XTC][MOVE]; break; /* C->T implied */ - default: Die("you can't init get_wee_midpt with a %s\n", Statetype(t3)); - } - - /* Still initializing. - * In the case t3==STT, there are a few horizontal moves possible - * on row s3, because STT isn't an emitter. All other states are - * emitters, so their connections have to be to the previous row s3-1. - */ - if (t3 == STT) - { /* E->C */ - xmx[nxt][XME] = xmx[nxt][XMC] + hmm->xsc[XTE][MOVE]; - /* M->E */ - for (k = k3; k >= k1; k--) { - mmx[nxt][k] = xmx[nxt][XME] + hmm->esc[k]; - if (s3 != s2) - mmx[nxt][k] += hmm->msc[(int)dsq[s3]][k]; - } - } - - /* Start recursive DP; sweep backwards to chosen s2 midpoint. - * Done as a pull. M, I scores at current row do /not/ include - * emission scores. Be careful of integer underflow. - */ - for (i = s3-1; i >= s2; i--) { - /* note i < L, so i+1 is always a legal index */ - cur = i%2; - nxt = !cur; - /* C pulls from C (T is special cased) */ - xmx[cur][XMC] = -INFTY; - if ((sc = xmx[nxt][XMC] + hmm->xsc[XTC][LOOP]) > -INFTY) - xmx[cur][XMC] = sc; - /* B pulls from M's */ - xmx[cur][XMB] = -INFTY; - for (k = k1; k <= k3; k++) - if ((sc = mmx[nxt][k] + hmm->bsc[k]) > xmx[cur][XMB]) - xmx[cur][XMB] = sc; - /* E pulls from C (J disallowed) */ - xmx[cur][XME] = -INFTY; - if ((sc = xmx[cur][XMC] + hmm->xsc[XTE][MOVE]) > -INFTY) - xmx[cur][XME] = sc; - /* N pulls from B, N */ - xmx[cur][XMN] = -INFTY; - if ((sc = xmx[cur][XMB] + hmm->xsc[XTN][MOVE]) > -INFTY) - xmx[cur][XMN] = sc; - if ((sc = xmx[nxt][XMN] + hmm->xsc[XTN][LOOP]) > xmx[cur][XMN]) - xmx[cur][XMN] = sc; - - /* Main recursion across model - */ - for (k = k3; k >= k1; k--) { - /* special case k == M */ - if (k == hmm->M) { - mmx[cur][k] = xmx[cur][XME]; /* p=1 transition to E by definition */ - dmx[cur][k] = -INFTY; /* doesn't exist */ - imx[cur][k] = -INFTY; /* doesn't exist */ - if (i != s2) - mmx[cur][k] += hmm->msc[(int)dsq[i]][k]; - continue; - } /* below this k < M, so k+1 is a legal index */ - - /* pull into match state */ - mmx[cur][k] = -INFTY; - if ((sc = xmx[cur][XME] + hmm->esc[k]) > -INFTY) - mmx[cur][k] = sc; - if ((sc = mmx[nxt][k+1] + hmm->tsc[k][TMM]) > mmx[cur][k]) - mmx[cur][k] = sc; - if ((sc = imx[nxt][k] + hmm->tsc[k][TMI]) > mmx[cur][k]) - mmx[cur][k] = sc; - if ((sc = dmx[cur][k+1] + hmm->tsc[k][TMD]) > mmx[cur][k]) - mmx[cur][k] = sc; - if (i != s2) - mmx[cur][k] += hmm->msc[(int)dsq[i]][k]; - - /* pull into delete state */ - dmx[cur][k] = -INFTY; - if ((sc = mmx[nxt][k+1] + hmm->tsc[k][TDM]) > -INFTY) - dmx[cur][k] = sc; - if ((sc = dmx[cur][k+1] + hmm->tsc[k][TDD]) > dmx[cur][k]) - dmx[cur][k] = sc; - /* pull into insert state */ - imx[cur][k] = -INFTY; - if ((sc = mmx[nxt][k+1] + hmm->tsc[k][TIM]) > -INFTY) - imx[cur][k] = sc; - if ((sc = imx[nxt][k] + hmm->tsc[k][TII]) > imx[cur][k]) - imx[cur][k] = sc; - if (i != s2) - imx[cur][k] += hmm->isc[(int)dsq[i]][k]; - - } - } - - /***************************************************************** - * DP complete; we have both forward and backward passes. Now we - * look across the s2 row and find the optimal emitting state. - *****************************************************************/ - - cur = s2%2; - max = -INFTY; - for (k = k1; k <= k3; k++) - { - if ((sc = fwd->mmx[cur][k] + bck->mmx[cur][k]) > max) - { k2 = k; t2 = STM; max = sc; } - if ((sc = fwd->imx[cur][k] + bck->imx[cur][k]) > max) - { k2 = k; t2 = STI; max = sc; } - } - if ((sc = fwd->xmx[cur][XMN] + bck->xmx[cur][XMN]) > max) - { k2 = 1; t2 = STN; max = sc; } - if ((sc = fwd->xmx[cur][XMC] + bck->xmx[cur][XMC]) > max) - { k2 = hmm->M; t2 = STC; max = sc; } - - /***************************************************************** - * Garbage collection, return. - *****************************************************************/ - - FreePlan7Matrix(fwd); - FreePlan7Matrix(bck); - *ret_k2 = k2; - *ret_t2 = t2; - *ret_s2 = s2; - return Scorify(max); -} - - -/* Function: P7ViterbiAlignAlignment() - * Date: SRE, Sat Jul 4 13:39:00 1998 [St. Louis] - * - * Purpose: Align a multiple alignment to an HMM without - * changing the multiple alignment itself. - * Adapted from P7Viterbi(). - * - * Heuristic; not a guaranteed optimal alignment. - * Guaranteeing an optimal alignment appears difficult. - * [cryptic note to myself:] In paths connecting to I* metastates, - * recursion breaks down; if there is a gap in the - * previous column for a given seq, we can't determine what state the - * I* metastate corresponds to for this sequence, unless we - * look back in the DP matrix. The lookback would either involve - * recursing back to the previous M* metastate (giving a - * O(MN^2) algorithm instead of O(MN)) or expanding the I* - * metastate into 3^nseq separate I* metastates to keep track - * of which of three states each seq is in. Since the second - * option blows up exponentially w/ nseq, it is not attractive. - * If the first option were used, the correct algorithm would be related to - * modelmakers.c:Maxmodelmaker(), but somewhat more difficult. - * - * The heuristic approach here is to calculate a "consensus" - * sequence from the alignment, and align the consensus to the HMM. - * Some hackery is employed, weighting transitions and emissions - * to make things work (re: con and mocc arrays). - * - * Args: aseq - aligned sequences - * ainfo - info for aseqs (includes alen, nseq, wgt) - * hmm - model to align to - * - * Returns: Traceback. Caller must free with P7FreeTrace(). - * pos[] contains alignment columns, indexed 1..alen. - * statetype[] contains metastates M*, etc. as STM, etc. - */ -struct p7trace_s * -P7ViterbiAlignAlignment(MSA *msa, struct plan7_s *hmm) -{ - struct dpmatrix_s *mx; /* Viterbi calculation lattice (two rows) */ - struct dpshadow_s *tb; /* shadow matrix of traceback pointers */ - struct p7trace_s *tr; /* RETURN: traceback */ - int **xmx, **mmx, **imx, **dmx; - char **xtb, **mtb, **itb, **dtb; - float **con; /* [1..alen][0..Alphabet_size-1], consensus counts */ - float *mocc; /* fractional occupancy of a column; used to weight transitions */ - int i; /* counter for columns */ - int k; /* counter for model positions */ - int idx; /* counter for seqs */ - int sym; /* counter for alphabet symbols */ - int sc; /* temp variable for holding score */ - float denom; /* total weight of seqs; used to "normalize" counts */ - int cur, prv; - - /* The "consensus" is a counts matrix, [1..alen][0..Alphabet_size-1]. - * Gaps are not counted explicitly, but columns with lots of gaps get - * less total weight because they have fewer counts. - */ - /* allocation */ - con = MallocOrDie(sizeof(float *) * (msa->alen+1)); - mocc = MallocOrDie(sizeof(float) * (msa->alen+1)); - for (i = 1; i <= msa->alen; i++) { - con[i] = MallocOrDie(sizeof(float) * Alphabet_size); - FSet(con[i], Alphabet_size, 0.0); - } - mocc[0] = -9999.; - /* initialization */ - /* note: aseq is off by one, 0..alen-1 */ - /* "normalized" to have a max total count of 1 per col */ - denom = FSum(msa->wgt, msa->nseq); - for (i = 1; i <= msa->alen; i++) - { - for (idx = 0; idx < msa->nseq; idx++) - if (! isgap(msa->aseq[idx][i-1])) - P7CountSymbol(con[i], SYMIDX(msa->aseq[idx][i-1]), msa->wgt[idx]); - FScale(con[i], Alphabet_size, 1./denom); - mocc[i] = FSum(con[i], Alphabet_size); - } - - /* Allocate a DP matrix with 2 rows, 0..M columns, - * and a shadow matrix with 0,1..alen rows, 0..M columns. - */ - mx = AllocPlan7Matrix(2, hmm->M, &xmx, &mmx, &imx, &dmx); - tb = AllocShadowMatrix(msa->alen+1, hmm->M, &xtb, &mtb, &itb, &dtb); - - /* Initialization of the zero row. - */ - xmx[0][XMN] = 0; /* S->N, p=1 */ - xtb[0][XMN] = STS; - xmx[0][XMB] = hmm->xsc[XTN][MOVE]; /* S->N->B, no N-tail */ - xtb[0][XMB] = STN; - xmx[0][XME] = xmx[0][XMC] = xmx[0][XMJ] = -INFTY; /* need seq to get here */ - tb->esrc[0] = 0; - xtb[0][XMC] = xtb[0][XMJ] = STBOGUS; - for (k = 0; k <= hmm->M; k++) { - mmx[0][k] = imx[0][k] = dmx[0][k] = -INFTY; /* need seq to get here */ - mtb[0][k] = itb[0][k] = dtb[0][k] = STBOGUS; - } - - /* Recursion. Done as a pull. - * Note some slightly wasteful boundary conditions: - * tsc[0] = -INFTY for all eight transitions (no node 0) - * D_M and I_M are wastefully calculated (they don't exist) - */ - for (i = 1; i <= msa->alen; i++) { - cur = i % 2; - prv = ! cur; - - mmx[cur][0] = imx[cur][0] = dmx[cur][0] = -INFTY; - mtb[i][0] = itb[i][0] = dtb[i][0] = STBOGUS; - - for (k = 1; k <= hmm->M; k++) { - /* match state */ - mmx[cur][k] = -INFTY; - mtb[i][k] = STBOGUS; - if (mmx[prv][k-1] > -INFTY && hmm->tsc[k-1][TMM] > -INFTY && - (sc = mmx[prv][k-1] + hmm->tsc[k-1][TMM]) > mmx[cur][k]) - { mmx[cur][k] = sc; mtb[i][k] = STM; } - if (imx[prv][k-1] > -INFTY && hmm->tsc[k-1][TIM] > -INFTY && - (sc = imx[prv][k-1] + hmm->tsc[k-1][TIM] * mocc[i-1]) > mmx[cur][k]) - { mmx[cur][k] = sc; mtb[i][k] = STI; } - if ((sc = xmx[prv][XMB] + hmm->bsc[k]) > mmx[cur][k]) - { mmx[cur][k] = sc; mtb[i][k] = STB; } - if (dmx[prv][k-1] > -INFTY && hmm->tsc[k-1][TDM] > -INFTY && - (sc = dmx[prv][k-1] + hmm->tsc[k-1][TDM]) > mmx[cur][k]) - { mmx[cur][k] = sc; mtb[i][k] = STD; } - /* average over "consensus" sequence */ - for (sym = 0; sym < Alphabet_size; sym++) - { - if (con[i][sym] > 0 && hmm->msc[sym][k] == -INFTY) { mmx[cur][k] = -INFTY; break; } - mmx[cur][k] += hmm->msc[sym][k] * con[i][sym]; - } - - /* delete state */ - dmx[cur][k] = -INFTY; - dtb[i][k] = STBOGUS; - if (mmx[cur][k-1] > -INFTY && hmm->tsc[k-1][TMD] > -INFTY && - (sc = mmx[cur][k-1] + hmm->tsc[k-1][TMD]) > dmx[cur][k]) - { dmx[cur][k] = sc; dtb[i][k] = STM; } - if (dmx[cur][k-1] > -INFTY && hmm->tsc[k-1][TDD] > -INFTY && - (sc = dmx[cur][k-1] + hmm->tsc[k-1][TDD]) > dmx[cur][k]) - { dmx[cur][k] = sc; dtb[i][k] = STD; } - - /* insert state */ - if (k < hmm->M) { - imx[cur][k] = -INFTY; - itb[i][k] = STBOGUS; - if (mmx[prv][k] > -INFTY && hmm->tsc[k][TMI] > -INFTY && - (sc = mmx[prv][k] + hmm->tsc[k][TMI] * mocc[i]) > imx[cur][k]) - { imx[cur][k] = sc; itb[i][k] = STM; } - if (imx[prv][k] > -INFTY && hmm->tsc[k][TII] > -INFTY && - (sc = imx[prv][k] + hmm->tsc[k][TII] * mocc[i-1] * mocc[i]) > imx[cur][k]) - { imx[cur][k] = sc; itb[i][k] = STI; } - /* average over "consensus" sequence */ - for (sym = 0; sym < Alphabet_size; sym++) - { - if (con[i][sym] > 0 && hmm->isc[sym][k] == -INFTY) { imx[cur][k] = -INFTY; break; } - imx[cur][k] += hmm->isc[sym][k] * con[i][sym]; - } - } - } - - /* Now the special states. Order is important here. - * remember, N, C, and J emissions are zero score by definition. - */ - /* N state */ - xmx[cur][XMN] = -INFTY; - xtb[i][XMN] = STBOGUS; - if (xmx[prv][XMN] > -INFTY && hmm->xsc[XTN][LOOP] > -INFTY && - (sc = xmx[prv][XMN] + hmm->xsc[XTN][LOOP] * mocc[i]) > -INFTY) - { xmx[cur][XMN] = sc; xtb[i][XMN] = STN; } - /* E state */ - xmx[cur][XME] = -INFTY; - xtb[i][XME] = STBOGUS; - for (k = 1; k <= hmm->M; k++) - if (mmx[cur][k] > -INFTY && hmm->esc[k] > -INFTY && - (sc = mmx[cur][k] + hmm->esc[k]) > xmx[cur][XME]) - { xmx[cur][XME] = sc; tb->esrc[i] = k; } - - /* we don't check J state */ - /* B state; don't connect from J */ - xmx[cur][XMB] = -INFTY; - xtb[i][XMB] = STBOGUS; - if (xmx[cur][XMN] > -INFTY && hmm->xsc[XTN][MOVE] > -INFTY && - (sc = xmx[cur][XMN] + hmm->xsc[XTN][MOVE]) > xmx[cur][XMB]) - { xmx[cur][XMB] = sc; xtb[i][XMB] = STN; } - - /* C state */ - xmx[cur][XMC] = -INFTY; - xtb[i][XMC] = STBOGUS; - if (xmx[prv][XMC] > -INFTY && hmm->xsc[XTC][LOOP] > -INFTY && - (sc = xmx[prv][XMC] + hmm->xsc[XTC][LOOP] * mocc[i]) > -INFTY) - { xmx[cur][XMC] = sc; xtb[i][XMC] = STC; } - if (xmx[cur][XME] > -INFTY && hmm->xsc[XTE][MOVE] > -INFTY && - (sc = xmx[cur][XME] + hmm->xsc[XTE][MOVE]) > xmx[cur][XMC]) - { xmx[cur][XMC] = sc; xtb[i][XMC] = STE; } - } - /* T state (not stored in mx) */ - sc = xmx[msa->alen%2][XMC] + hmm->xsc[XTC][MOVE]; - - /* do the traceback */ - tr = ShadowTrace(tb, hmm, msa->alen); - /* cleanup and return */ - FreePlan7Matrix(mx); - FreeShadowMatrix(tb); - for (i = 1; i <= msa->alen; i++) - free(con[i]); - free(con); - free(mocc); - - return tr; -} - - - -/* Function: ShadowTrace() - * Date: SRE, Sun Jul 5 11:38:24 1998 [St. Louis] - * - * Purpose: Given a shadow matrix, trace it back, and return - * the trace. - * - * Args: tb - shadow matrix of traceback pointers - * hmm - the model (needed for figuring out wing unfolding) - * L - sequence length - * - * Returns: traceback. Caller must free w/ P7FreeTrace(). - */ -struct p7trace_s * -ShadowTrace(struct dpshadow_s *tb, struct plan7_s *hmm, int L) -{ - struct p7trace_s *tr; - int curralloc; /* current allocated length of trace */ - int tpos; /* position in trace */ - int i; /* position in seq (1..N) */ - int k; /* position in model (1..M) */ - char nxtstate; /* next state to assign in traceback */ - - /* Overallocate for the trace. - * S-N-B- ... - E-C-T : 6 states + L is minimum trace; - * add L more as buffer. - */ - curralloc = L * 2 + 6; - P7AllocTrace(curralloc, &tr); - - /* Initialization of trace - * We do it back to front; ReverseTrace() is called later. - */ - tr->statetype[0] = STT; - tr->nodeidx[0] = 0; - tr->pos[0] = 0; - tpos = 1; - i = L; /* current i (seq pos) we're trying to assign */ - k = 0; /* current k (model pos) we're trying to assign */ - nxtstate = STC; /* assign the C state first, for C->T */ - - /* Traceback - */ - while (nxtstate != STS) { - switch (nxtstate) { - case STM: - tr->statetype[tpos] = STM; - nxtstate = tb->mtb[i][k]; - tr->nodeidx[tpos] = k--; - tr->pos[tpos] = i--; - tpos++; - break; - - case STI: - tr->statetype[tpos] = STI; - nxtstate = tb->itb[i][k]; - tr->nodeidx[tpos] = k; - tr->pos[tpos] = i--; - tpos++; - break; - - case STD: - tr->statetype[tpos] = STD; - nxtstate = tb->dtb[i][k]; - tr->nodeidx[tpos] = k--; - tr->pos[tpos] = 0; - tpos++; - break; - - case STN: - tr->statetype[tpos] = STN; - nxtstate = tb->xtb[i][XMN]; - tr->nodeidx[tpos] = 0; - tr->pos[tpos] = (nxtstate == STN) ? i-- : 0; /* N->N; 2nd one emits. */ - tpos++; - break; - - case STB: - /* Check for wing unfolding */ - if (Prob2Score(hmm->begin[k+1], hmm->p1) + 1 * INTSCALE <= hmm->bsc[k+1]) - while (k > 0) - { - tr->statetype[tpos] = STD; - tr->nodeidx[tpos] = k--; - tr->pos[tpos] = 0; - tpos++; - if (tpos == curralloc) - { /* grow trace if necessary */ - curralloc += L; - P7ReallocTrace(tr, curralloc); - } - } - - tr->statetype[tpos] = STB; - nxtstate = tb->xtb[i][XMB]; - tr->nodeidx[tpos] = 0; - tr->pos[tpos] = 0; - tpos++; - break; - - case STJ: - tr->statetype[tpos] = STJ; - nxtstate = tb->xtb[i][XMJ]; - tr->nodeidx[tpos] = 0; - tr->pos[tpos] = (nxtstate == STJ) ? i-- : 0; /* J->J; 2nd one emits. */ - tpos++; - break; - - case STE: - tr->statetype[tpos] = STE; - tr->nodeidx[tpos] = 0; - tr->pos[tpos] = 0; - k = tb->esrc[i]; - nxtstate = STM; - tpos++; - /* check for wing unfolding */ - if (Prob2Score(hmm->end[k], 1.) + 1*INTSCALE <= hmm->esc[k]) - { - int dk; /* need a tmp k while moving thru delete wing */ - for (dk = hmm->M; dk > k; dk--) - { - tr->statetype[tpos] = STD; - tr->nodeidx[tpos] = dk; - tr->pos[tpos] = 0; - tpos++; - if (tpos == curralloc) - { /* grow trace if necessary */ - curralloc += L; - P7ReallocTrace(tr, curralloc); - } - } - } - break; - - case STC: - tr->statetype[tpos] = STC; - nxtstate = tb->xtb[i][XMC]; - tr->nodeidx[tpos] = 0; - tr->pos[tpos] = (nxtstate == STC) ? i-- : 0; /* C->C; 2nd one emits. */ - tpos++; - break; - - default: - Die("HMMER: Bad state (%s) in ShadowTrace()\n", Statetype(nxtstate)); - - } /* end switch over nxtstate */ - - if (tpos == curralloc) - { /* grow trace if necessary */ - curralloc += L; - P7ReallocTrace(tr, curralloc); - } - - } /* end traceback, just before assigning S state */ - - tr->statetype[tpos] = STS; - tr->nodeidx[tpos] = 0; - tr->pos[tpos] = 0; - tr->tlen = tpos + 1; - - P7ReverseTrace(tr); - return tr; -} - - - -/* Function: PostprocessSignificantHit() - * Date: SRE, Wed Dec 20 12:11:01 2000 [StL] - * - * Purpose: Add a significant hit to per-seq and per-domain hit - * lists, after postprocessing the scores appropriately, - * and making sure per-domain scores add up to the per-seq - * score. - * - * [doesn't really belong in core_algorithms.c, because - * it's more of a hack than an algorithm, but on the other - * hand it's now part of the core of how HMMER scores - * things. Maybe there should be a core_hacks.c.] - * - * Given: active hit lists for per-seq and per-domain - * scores (e.g. hmmpfam and hmmsearch, collating their - * results), and a new hit that's significant enough - * that it may need to be reported in final output. - * - * Breaks the traceback into individual domain traces; - * scores each one of them, then applies null2 correction - * for biased composition. Recalculates the per-seq score - * as the sum of the per-domain scores. Stores the hits - * in the lists, for eventual sorting and output by the - * caller. - * - * Notes: In principle we've got the score, and a pvalue, and a traceback - * by doing the Viterbi algorithm, right? What else is left - * to do? Well, in practice, life is more complicated, because - * of the trace-dependent null2 score correction. - * - * After a null2 score correction is carried out on - * each domain (the default) the number of detected domains - * with scores > 0 may have decreased. We want the - * global (per-seq) hit list to have the recalculated number of - * domains, not necessarily what Viterbi gave us. - * - * Also, since we want the global score to be the sum of - * the individual domains, but the null2 correction is - * applied to each domain individually, we have to calculate - * an adjusted global score. (To do otherwise invites - * subtle inconsistencies; xref bug 2.) - * - * We don't have final evalues, so we may put a few - * more hits into the hit lists than we end up reporting. - * The main output routine is responsible for final - * enforcement of the thresholds. - * - * This routine is NOT THREADSAFE. When multithreaded, - * with using shared ghit/dhit output buffers, calls to - * PostprocessSignificantHit() need to be protected. - * - * Args: ghit - an active list of per-seq (global) hits - * dhit - an active list of per-domain hits - * tr - the significant HMM/seq traceback we'll report on - * hmm - ptr to the HMM - * dsq - digitized sequence (1..L) - * L - length of dsq - * seqname - name of sequence (same as targname, in hmmsearch) - * seqacc - seq's accession (or NULL) - * seqdesc - seq's description (or NULL) - * do_forward - TRUE if we've already calculated final per-seq score - * sc_override - per-seq score to use if do_forward is TRUE - * do_null2 - TRUE to apply the null2 scoring correction - * thresh - contains the threshold/cutoff information. - * hmmpfam_mode - TRUE if called by hmmpfam, else assumes hmmsearch; - * affects how the lists' sort keys are set. - * - * Returns: (void) - */ -void -PostprocessSignificantHit(struct tophit_s *ghit, - struct tophit_s *dhit, - struct p7trace_s *tr, - struct plan7_s *hmm, - char *dsq, - int L, - char *seqname, - char *seqacc, - char *seqdesc, - int do_forward, - float sc_override, - int do_null2, - struct threshold_s *thresh, - int hmmpfam_mode) -{ - struct p7trace_s **tarr; /* array of per-domain traces */ - struct fancyali_s *ali; /* alignment of a domain */ - int ntr; /* number of domain traces from Viterbi */ - int tidx; /* index for traces (0..ntr-1) */ - int ndom; /* # of domains accepted in sequence */ - int didx; /* index for domains (1..ndom) */ - int k1, k2; /* start, stop coord in model */ - int i1, i2; /* start, stop in sequence */ - float whole_sc; /* whole sequence score = \sum domain scores */ - float *score; /* array of raw scores for each domain */ - int *usedomain; /* TRUE if this domain is accepted */ - double whole_pval; - double pvalue; - double sortkey; - - /* Break the trace into one or more individual domains. - */ - TraceDecompose(tr, &tarr, &ntr); - if (ntr == 0) Die("TraceDecompose() screwup"); /* "can't happen" (!) */ - - /* Rescore each domain, apply null2 correction if asked. - * Mark positive-scoring ones (we'll definitely report those), - * and include their score in the whole sequence score. - */ - score = MallocOrDie(sizeof(float) * ntr); - usedomain = MallocOrDie(sizeof(int) * ntr); - ndom = 0; - whole_sc = 0.; - for (tidx = 0; tidx < ntr; tidx++) - { - score[tidx] = P7TraceScore(hmm, dsq, tarr[tidx]); - if (do_null2) score[tidx] -= TraceScoreCorrection(hmm, tarr[tidx], dsq); - if (score[tidx] > 0.0) { - usedomain[tidx] = TRUE; - ndom++; - whole_sc += score[tidx]; - } else - usedomain[tidx] = FALSE; - } - - /* Make sure at least one positive scoring domain is in - * the trace. If not, invoke "weak single domain" rules: - * we will always report at least one domain per sequence, even - * if it has a negative score. (HMMER's Plan7 architecture can report - * one negative scoring domain but not more.) - */ - if (ndom == 0) { - tidx = FMax(score, ntr); - usedomain[tidx] = TRUE; - whole_sc = score[tidx]; - ndom = 1; - } - - /* Implement --do_forward: override the trace-dependent sum-of-domain - * whole score, use the P7Forward() score that the called passed - * us instead. This is a hack; null2 is trace-dependent and - * thus undefined for P7Forward() scoring; see commentary in hmmpfam.c. - */ - if (do_forward) whole_sc = sc_override; - - /* Go through and put all the accepted domains into the hit list. - */ - whole_pval = PValue(hmm, whole_sc); - for (tidx = 0, didx = 1; tidx < ntr; tidx++) { - if (! usedomain[tidx]) continue; - - TraceSimpleBounds(tarr[tidx], &i1, &i2, &k1, &k2); - pvalue = PValue(hmm, score[tidx]); - - if (pvalue <= thresh->domE && score[tidx] >= thresh->domT) { - ali = CreateFancyAli(tarr[tidx], hmm, dsq, seqname); - - if (hmmpfam_mode) - sortkey = -1.*(double)i1; /* hmmpfam: sort on position in seq */ - else - sortkey = score[tidx]; /* hmmsearch: sort on E (monotonic w/ sc) */ - - RegisterHit(dhit, sortkey, - pvalue, score[tidx], - whole_pval, whole_sc, - hmmpfam_mode ? hmm->name : seqname, - hmmpfam_mode ? hmm->acc : seqacc, - hmmpfam_mode ? hmm->desc : seqdesc, - i1,i2, L, - k1,k2, hmm->M, - didx,ndom,ali); - } - didx++; - } - - /* Now register the global hit, with the domain-derived score. - */ - - /* sorting: - * hmmpfam has to worry that score and E-value are not monotonic - * when multiple HMMs (with different EVD parameters) are potential - * targets. Therefore in hmmpfam_mode we apply a weird hack - * to sort primarily on E-value, but on score - * for really good hits with E=0.0... works because we can - * assume 100000. > -log(DBL_MIN). - * hmmsearch simply sorts on score (which for a single HMM, we - * know is monotonic with E-value). - */ - if (hmmpfam_mode) - sortkey = (whole_pval > 0.0) ? -1.*log(whole_pval) : 100000. + whole_sc; - else - sortkey = whole_sc; - - /* Note: we've recalculated whole_sc and it may have decreased - * after the null2 correction was applied. For Pfam GA, TC, - * or NC cutoffs, we have to be sure that everything on the - * hitlist is correct (the hmmpfam output routine assumes it, - * otherwise it would have to reload each HMM to get its - * cutoffs). In all other cases, though, we don't care if - * the hit list has a bit too many things on it, because the - * output routine in hmmsearch or hmmpfam will check against - * the cutoffs. Hence we only need to check against globT - * (it may be set by GA, TC, or NC) but not globE. - * - SRE, CSHL genome mtg May 2001 - */ - if (whole_sc >= thresh->globT) { - RegisterHit(ghit, sortkey, - whole_pval, whole_sc, - 0., 0., /* no mother seq */ - hmmpfam_mode ? hmm->name : seqname, - hmmpfam_mode ? hmm->acc : seqacc, - hmmpfam_mode ? hmm->desc : seqdesc, - 0,0,0, /* seq positions */ - 0,0,0, /* HMM positions */ - 0, ndom, /* # domains info */ - NULL); /* alignment info */ - } - - /* Clean up and return. - */ - for (tidx = 0; tidx < ntr; tidx++) - P7FreeTrace(tarr[tidx]); - free(tarr); - free(score); - free(usedomain); - return; -} diff --git a/forester/archive/RIO/others/hmmer/src/debug.c b/forester/archive/RIO/others/hmmer/src/debug.c deleted file mode 100644 index 1044436..0000000 --- a/forester/archive/RIO/others/hmmer/src/debug.c +++ /dev/null @@ -1,368 +0,0 @@ -/***************************************************************** - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - *****************************************************************/ - -/* debug.c - * Thu Nov 21 09:58:05 1996 - * - * Printing out or naming various useful things from HMMER - * innards. - * - * CVS $Id: debug.c,v 1.1.1.1 2005/03/22 08:33:58 cmzmasek Exp $ - */ - -#include -#include -#include -#include - -#include "structs.h" -#include "config.h" -#include "funcs.h" -#include "squid.h" - -/* Function: Statetype() - * - * Purpose: Returns the state type in text. - * Example: Statetype(S) = "S" - */ -char * -Statetype(char st) -{ - switch (st) { - case STS: return "S"; - case STN: return "N"; - case STB: return "B"; - case STM: return "M"; - case STD: return "D"; - case STI: return "I"; - case STE: return "E"; - case STJ: return "J"; - case STC: return "C"; - case STT: return "T"; - default: return "BOGUS"; - } -} - -/* Function: AlphabetType2String() - * Date: SRE, Sun Dec 24 11:33:40 2000 [St. Louis] - * - * Purpose: Returns a string "protein" for hmmAMINO, - * "nucleic acid" for hmmNUCLEIC, etc... used - * for formatting diagnostics. - * - * Args: type - Alphabet type, e.g. hmmAMINO - * - * Returns: char * - */ -char * -AlphabetType2String(int type) -{ - switch (type) { - case hmmAMINO: return "protein"; - case hmmNUCLEIC: return "nucleic acid"; - case hmmNOTSETYET: return "unknown"; - default: return "BOGUS"; - } -} - - -/* Function: P7PrintTrace() - * - * Purpose: Print out a traceback structure. - * If hmm is non-NULL, also print transition and emission scores. - * - * Args: fp - stderr or stdout, often - * tr - trace structure to print - * hmm - NULL or hmm containing scores to print - * dsq - NULL or digitized sequence trace refers to. - */ -void -P7PrintTrace(FILE *fp, struct p7trace_s *tr, struct plan7_s *hmm, char *dsq) -{ - int tpos; /* counter for trace position */ - int sym; - int sc; - - if (hmm == NULL) { - fprintf(fp, "st node rpos - traceback len %d\n", tr->tlen); - fprintf(fp, "-- ---- ------\n"); - for (tpos = 0; tpos < tr->tlen; tpos++) { - fprintf(fp, "%1s %4d %6d\n", - Statetype(tr->statetype[tpos]), - tr->nodeidx[tpos], - tr->pos[tpos]); - } - } else { - if (!(hmm->flags & PLAN7_HASBITS)) - Die("oi, you can't print scores from that hmm, it's not ready."); - - sc = 0; - fprintf(fp, "st node rpos transit emission - traceback len %d\n", tr->tlen); - fprintf(fp, "-- ---- ------ ------- --------\n"); - for (tpos = 0; tpos < tr->tlen; tpos++) { - if (dsq != NULL) sym = (int) dsq[tr->pos[tpos]]; - - fprintf(fp, "%1s %4d %6d %7d", - Statetype(tr->statetype[tpos]), - tr->nodeidx[tpos], - tr->pos[tpos], - (tpos < tr->tlen-1) ? - TransitionScoreLookup(hmm, tr->statetype[tpos], tr->nodeidx[tpos], - tr->statetype[tpos+1], tr->nodeidx[tpos+1]) : 0); - - if (tpos < tr->tlen-1) - sc += TransitionScoreLookup(hmm, tr->statetype[tpos], tr->nodeidx[tpos], - tr->statetype[tpos+1], tr->nodeidx[tpos+1]); - - if (dsq != NULL) { - if (tr->statetype[tpos] == STM) - { - fprintf(fp, " %8d %c", hmm->msc[sym][tr->nodeidx[tpos]], - Alphabet[sym]); - sc += hmm->msc[sym][tr->nodeidx[tpos]]; - } - else if (tr->statetype[tpos] == STI) - { - fprintf(fp, " %8d %c", hmm->isc[sym][tr->nodeidx[tpos]], - (char) tolower((int) Alphabet[sym])); - sc += hmm->isc[sym][tr->nodeidx[tpos]]; - } - else if ((tr->statetype[tpos] == STN && tr->statetype[tpos-1] == STN) || - (tr->statetype[tpos] == STC && tr->statetype[tpos-1] == STC) || - (tr->statetype[tpos] == STJ && tr->statetype[tpos-1] == STJ)) - { - fprintf(fp, " %8d %c", 0, (char) tolower((int) Alphabet[sym])); - } - } else { - fprintf(fp, " %8s %c", "-", '-'); - } - - - fputs("\n", fp); - } - fprintf(fp, " ------- --------\n"); - fprintf(fp, " total: %6d\n\n", sc); - } -} - -/* Function: P7PrintPrior() - * - * Purpose: Print out a Plan 7 prior structure. - */ -void -P7PrintPrior(FILE *fp, struct p7prior_s *pri) -{ - int q, x; /* counters for mixture component, element */ - - if (pri->strategy == PRI_DCHLET) fputs("Dirichlet\n", fp); - else if (pri->strategy == PRI_PAM) fputs("PAM\n", fp); - else Die("No such strategy."); - - if (Alphabet_type == hmmAMINO) fputs("Amino\n", fp); - else if (Alphabet_type == hmmNUCLEIC) fputs("Nucleic\n", fp); - - /* Transitions - */ - fprintf(fp, "\n%d\n", pri->tnum); - for (q = 0; q < pri->tnum; q++) - { - fprintf(fp, "%.4f\n", pri->tq[q]); - for (x = 0; x < 7; x++) - fprintf(fp, "%.4f ", pri->t[q][x]); - fputs("\n", fp); - } - - /* Match emissions - */ - fprintf(fp, "\n%d\n", pri->mnum); - for (q = 0; q < pri->mnum; q++) - { - fprintf(fp, "%.4f\n", pri->mq[q]); - for (x = 0; x < Alphabet_size; x++) - fprintf(fp, "%.4f ", pri->m[q][x]); - fputs("\n", fp); - } - - /* Insert emissions - */ - fprintf(fp, "\n%d\n", pri->inum); - for (q = 0; q < pri->inum; q++) - { - fprintf(fp, "%.4f\n", pri->iq[q]); - for (x = 0; x < Alphabet_size; x++) - fprintf(fp, "%.4f ", pri->i[q][x]); - fputs("\n", fp); - } -} - -/* Function: TraceVerify() - * Date: SRE, Mon Feb 2 07:48:52 1998 [St. Louis] - * - * Purpose: Check a traceback structure for internal consistency. - * Used in Shiva testsuite, for example. - * - * Args: tr - traceback to verify - * M - length of HMM - * N - length of sequence - * - * Returns: 1 if OK. 0 if not. - */ -int -TraceVerify(struct p7trace_s *tr, int M, int N) -{ - int tpos; /* position in trace */ - int k; /* current position in HMM nodes 1..M */ - int i; /* current position in seq 1..N */ - int nn, nc, nj; /* number of STN's, STC's, STJ's seen */ - int nm; /* number of STM's seen */ - - /* Basic checks on ends. - */ - if (tr->statetype[0] != STS) return 0; - if (tr->statetype[1] != STN) return 0; - if (tr->statetype[tr->tlen-2] != STC) return 0; - if (tr->statetype[tr->tlen-1] != STT) return 0; - if (tr->pos[1] != 0) return 0; - - /* Check for consistency throughout trace - */ - k = i = nn = nc = nj = nm = 0; - for (tpos = 0; tpos < tr->tlen; tpos++) - { - switch (tr->statetype[tpos]) { - case STS: - if (tr->nodeidx[tpos] != 0) return 0; - if (tr->pos[tpos] != 0) return 0; - if (k != 0) return 0; - if (i != 0) return 0; - if (tpos != 0) return 0; - break; - - case STN: /* first N doesn't emit. */ - if (tr->nodeidx[tpos] != 0) return 0; - if (k != 0) return 0; - if (nn > 0) - { - if (tr->pos[tpos] != i+1) return 0; - i++; - } - else - { - if (tr->pos[tpos] != 0) return 0; - if (i != 0) return 0; - } - nn++; - break; - - case STB: - if (tr->nodeidx[tpos] != 0) return 0; - if (tr->pos[tpos] != 0) return 0; - nm = 0; - break; - - case STM: /* can enter anywhere on first M */ - if (tr->pos[tpos] != i+1) return 0; - if (tr->nodeidx[tpos] < 1 || tr->nodeidx[tpos] > M) return 0; - i++; - if (nm == 0) k = tr->nodeidx[tpos]; - else { - if (tr->nodeidx[tpos] != k+1) return 0; - k++; - } - nm++; - break; - - case STI: - if (tr->pos[tpos] != i+1) return 0; - if (tr->nodeidx[tpos] != k) return 0; - if (tr->nodeidx[tpos] < 1 || tr->nodeidx[tpos] > M-1) return 0; - if (k >= M) return 0; - i++; - break; - - case STD: - if (tr->pos[tpos] != 0) return 0; - if (tr->nodeidx[tpos] != k+1) return 0; - if (tr->nodeidx[tpos] < 1 || tr->nodeidx[tpos] > M) return 0; - k++; - break; - - case STE: - if (tr->nodeidx[tpos] != 0) return 0; - if (tr->pos[tpos] != 0) return 0; - nj = 0; - break; - - case STJ: - if (tr->nodeidx[tpos] != 0) return 0; - if (nj > 0) - { - if (tr->pos[tpos] != i+1) return 0; - i++; - } - else if (tr->pos[tpos] != 0) return 0; - nj++; - break; - - case STC: - if (tr->nodeidx[tpos] != 0) return 0; - if (nc > 0) - { - if (tr->pos[tpos] != i+1) return 0; - i++; - } - else if (tr->pos[tpos] != 0) return 0; - nc++; - break; - - case STT: - if (tpos != tr->tlen - 1) return 0; - if (tr->nodeidx[tpos] != 0) return 0; - if (tr->pos[tpos] != 0) return 0; - if (i != N) return 0; - break; - - case STBOGUS: - default: - return 0; - } /* end switch over statetypes */ - } /* end loop over trace positions */ - - return 1; -} - - -/* Function: TraceCompare() - * Date: SRE, Wed Mar 4 17:26:49 1998 [St. Louis] - * - * Purpose: Compare two tracebacks; return 1 if they're - * identical, else 0. Written for Shiva testsuite. - * - * Args: t1 - first trace - * t2 - second trace - * - * Returns: 1 if identical; 0 elsewise - */ -int -TraceCompare(struct p7trace_s *t1, struct p7trace_s *t2) -{ - int tpos; - - if (t1->tlen != t2->tlen) return 0; - - for (tpos = 0; tpos < t1->tlen; tpos++) - { - if (t1->statetype[tpos] != t2->statetype[tpos]) return 0; - if (t1->nodeidx[tpos] != t2->nodeidx[tpos]) return 0; - if (t1->pos[tpos] != t2->pos[tpos]) return 0; - } - return 1; -} - diff --git a/forester/archive/RIO/others/hmmer/src/display.c b/forester/archive/RIO/others/hmmer/src/display.c deleted file mode 100644 index 137180a..0000000 --- a/forester/archive/RIO/others/hmmer/src/display.c +++ /dev/null @@ -1,447 +0,0 @@ -/************************************************************ - * Copyright (C) 1998 Ian Holmes - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - ************************************************************/ - -/* display.c - * Author: Ian Holmes (ihh@sanger.ac.uk, Jun 5 1998) - * Derived from core_algorithms.c (SRE, Nov 11 1996) - * Incorporated SRE, Sat Nov 6 10:09:41 1999 - * - * Functions for displaying HMMer2.0 structures. - * - * RCS $Id: display.c,v 1.1.1.1 2005/03/22 08:33:59 cmzmasek Exp $ - */ - -#include "structs.h" -#include "config.h" -#include "funcs.h" -#include "squid.h" - -void PrintIscore(int sc); - -void PrintTransition(char src, - int isrc, - int ksrc, - char dest, - int idest, - int kdest, - int sc, - struct p7trace_s **alignment, - int *min, - int *max, - int *on, - int A); - - -/* Function: DisplayPlan7Posteriors() - * - * Purpose: Print out posterior transition probabilities - * in modelpost format. - * NB only prints out transitions that touch - * either the Viterbi or the optimal accuracy path. - * - * Args: L - the length of the sequence - * hmm - the model - * forward - forward matrix - * backward - backward matrix - * viterbi - Viterbi trace - * optacc - optimal accuracy trace - * - * Return: void - * - */ -void DisplayPlan7Posteriors(int L, struct plan7_s *hmm, - struct dpmatrix_s *forward, - struct dpmatrix_s *backward, - struct p7trace_s *viterbi, - struct p7trace_s *optacc) -{ - struct p7trace_s* alignment[2]; - alignment[0] = viterbi; - alignment[1] = optacc; - DisplayPlan7PostAlign (L, hmm, forward, backward, alignment, 2); -} - - -/* Function: DisplayPlan7PostAlign() - * - * Purpose: Print out posterior transition probabilities - * in modelpost format, for any set of alignments. - * - * Args: L - the length of the sequence - * hmm - the model - * forward - forward matrix - * backward - backward matrix - * alignment - array of traces - * A - size of alignment array - * - * Return: void - * - */ -void DisplayPlan7PostAlign(int L, struct plan7_s *hmm, - struct dpmatrix_s *forward, - struct dpmatrix_s *backward, - struct p7trace_s **alignment, - int A) -{ - int sc; - int i; - int j; - int k; - int kmin; - int kmax; - int* min; - int* max; - int* on; - char state; - - sc = forward->xmx[L][XMC] + hmm->xsc[XTC][MOVE]; /* total Forward score */ - - min = (int*) calloc (A, sizeof(int)); - max = (int*) calloc (A, sizeof(int)); - on = (int*) calloc (A, sizeof(int)); - - for (i = 0; i <= L; i++) - { - for (j = 0; j < A; j++) { - while (alignment[j]->pos[min[j]] < i - 1 && min[j] < alignment[j]->tlen - 1) - min[j]++; - - while (alignment[j]->pos[max[j]] <= i + 1 && max[j] < alignment[j]->tlen - 1) - max[j]++; - } - - for (state = STM; state <= STJ; state++) - { - if (state == STM || state == STB) - { - kmin = 1; - kmax = hmm->M; - } - else if (state == STD) - { - kmin = 2; - kmax = hmm->M - 1; - } - else if (state == STI) - { - kmin = 1; - kmax = hmm->M - 1; - } - else - kmin = kmax = 0; - - for (k = kmin; k <= kmax; k++) - { - switch (state) - { - case STM: - if (iM) - PrintTransition (STM,i,k, STM,i+1,k+1, - forward->mmx[i][k] + hmm->tsc[k][TMM] + backward->mmx[i+1][k+1] - sc, - alignment, min, max, on, A); - - if (iM) - PrintTransition (STM,i,k, STI,i+1,k, - forward->mmx[i][k] + hmm->tsc[k][TMI] + backward->imx[i+1][k] - sc, - alignment, min, max, on, A); - - if (kM-1) - PrintTransition (STM,i,k, STD,i,k+1, - forward->mmx[i][k] + hmm->tsc[k][TMD] + backward->dmx[i][k+1] - sc, - alignment, min, max, on, A); - - PrintTransition (STM,i,k, STE,i,0, - forward->mmx[i][k] + hmm->esc[k] + backward->xmx[i][XME] - sc, - alignment, min, max, on, A); - break; - - case STD: - if (idmx[i][k] + hmm->tsc[k][TDM] + backward->mmx[i+1][k+1] - sc, - alignment, min, max, on, A); - - PrintTransition (STD,i,k, STD,i,k+1, - forward->dmx[i][k] + hmm->tsc[k][TDD] + backward->dmx[i][k+1] - sc, - alignment, min, max, on, A); - - break; - - case STI: - if (iimx[i][k] + hmm->tsc[k][TIM] + backward->mmx[i+1][k+1] - sc, - alignment, min, max, on, A); - - if (iimx[i][k] + hmm->tsc[k][TII] + backward->imx[i+1][k] - sc, - alignment, min, max, on, A); - - break; - - case STB: - if (ixmx[i][XMB] + hmm->bsc[k] + backward->mmx[i+1][k] - sc, - alignment, min, max, on, A); - break; - - default: - break; - - } - } - - switch (state) - { - case STN: - PrintTransition (STN,i,0, STB,i,0, - forward->xmx[i][XMN] + hmm->xsc[XTN][MOVE] + backward->xmx[i][XMB] - sc, - alignment, min, max, on, A); - - if (ixmx[i][XMN] + hmm->xsc[XTN][LOOP] + backward->xmx[i+1][XMN] - sc, - alignment, min, max, on, A); - break; - - case STJ: - PrintTransition (STJ,i,0, STB,i,0, - forward->xmx[i][XMJ] + hmm->xsc[XTJ][MOVE] + backward->xmx[i][XMB] - sc, - alignment, min, max, on, A); - - if (ixmx[i][XMJ] + hmm->xsc[XTJ][LOOP] + backward->xmx[i+1][XMJ] - sc, - alignment, min, max, on, A); - break; - - case STC: - PrintTransition (STC,i,0, STT,i,0, - forward->xmx[i][XMC] + hmm->xsc[XTC][MOVE] - sc, /* should be 1 */ - alignment, min, max, on, A); - - if (ixmx[i][XMC] + hmm->xsc[XTC][LOOP] + backward->xmx[i+1][XMC] - sc, - alignment, min, max, on, A); - break; - - case STE: - PrintTransition (STE,i,0, STC,i,0, - forward->xmx[i][XME] + hmm->xsc[XTE][MOVE] + backward->xmx[i][XMC] - sc, - alignment, min, max, on, A); - - PrintTransition (STE,i,0, STJ,i,0, - forward->xmx[i][XME] + hmm->xsc[XTE][LOOP] + backward->xmx[i][XMJ] - sc, - alignment, min, max, on, A); - break; - - case STS: - if (i == 0) - PrintTransition (STS,i,0, STN,i,0, - backward->xmx[i][XMN] - sc, /* should be 1 */ - alignment, min, max, on, A); - break; - - case STM: - case STD: - case STI: - case STB: - case STT: - break; - - default: - Die ("unknown state"); - - } - } - } - - free (min); - free (max); - free (on); - -} - - - -/* Function: DisplayPlan7Matrix() - * - * Purpose: Print out a dynamic programming matrix. - * - * Args: dsq - sequence in digitized form - * L - length of dsq - * hmm - the model - * mx - dp matrix - * - * Return: void - * - * The output of this function inverts HMMer's concept of rows and columns - * (i.e. each row represents a state, and each column, a residue); - * also, probabilities are displayed as natural logs, not bit scores. - * It should probably only be used by ihh... - * - */ -void -DisplayPlan7Matrix(char *dsq, int L, struct plan7_s *hmm, struct dpmatrix_s *mx) -{ - int i; - int k; - - printf(" * "); - for (i=1;i<=L;i++) printf(" %c ",Alphabet[dsq[i]]); - printf("\nN "); - for (i=0;i<=L;i++) PrintIscore(mx->xmx[i][XMN]); - for (k=1;k<=hmm->M;k++) { - printf("\nM%-3d ",k); - for (i=0;i<=L;i++) PrintIscore(mx->mmx[i][k]); - } - for (k=1;kM;k++) { - printf("\nI%-3d ",k); - for (i=0;i<=L;i++) PrintIscore(mx->imx[i][k]); - } - printf("\nE "); - for (i=0;i<=L;i++) PrintIscore(mx->xmx[i][XME]); - printf("\nC "); - for (i=0;i<=L;i++) PrintIscore(mx->xmx[i][XMC]); - printf("\nJ "); - for (i=0;i<=L;i++) PrintIscore(mx->xmx[i][XMJ]); - printf("\nB "); - for (i=0;i<=L;i++) PrintIscore(mx->xmx[i][XMB]); - for (k=2;kM;k++) { - printf("\nD%-3d ",k); - for (i=0;i<=L;i++) PrintIscore(mx->dmx[i][k]); - } - printf("\n\n"); -} - - -void PrintIscore(int sc) { - double dsc; - double div; - dsc = (double) sc; - div = INTSCALE / 0.693147180559945; /* == INTSCALE / log(2) */ - dsc = dsc / div; - printf("%- #11.3e",dsc); -} - - -void PrintTransition(char src, - int isrc, - int ksrc, - char dest, - int idest, - int kdest, - int sc, - struct p7trace_s **alignment, - int *min, - int *max, - int *on, - int A) -{ - char src_str[6]; /* buffer for source state label */ - char dest_str[6]; /* buffer for destination state label */ - int j; - int tpos; - int tnext; - int pos; - int next; - int near; - - near = 0; - - for (j = 0; j < A; j++) { - on[j] = 0; - for (pos = 0, tpos = min[j]; tpos <= max[j]; tpos++) { - - if (alignment[j]->pos[tpos] != 0) - pos = alignment[j]->pos[tpos]; - - if (src == alignment[j]->statetype[tpos] - && ksrc == alignment[j]->nodeidx[tpos] - && isrc == pos) - near = TRUE; - - if (dest == alignment[j]->statetype[tpos] - && kdest == alignment[j]->nodeidx[tpos] - && idest == pos) - near = TRUE; - - if (tpos < alignment[j]->tlen - 1) - { - tnext = tpos + 1; - - /* fold up B->D->M transitions into pseudo- B->M transitions */ - - if (alignment[j]->statetype[tpos] == STB) - while (alignment[j]->statetype[tnext] == STD && tnext < alignment[j]->tlen - 1) - tnext++; - - next = alignment[j]->pos[tnext]; - if (next == 0) - next = pos; - - if (src == alignment[j]->statetype[tpos] - && ksrc == alignment[j]->nodeidx[tpos] - && isrc == pos - && dest == alignment[j]->statetype[tnext] - && kdest == alignment[j]->nodeidx[tnext] - && idest == next) - on[j] = TRUE; - } - } - } - - if (!near) return; - - switch (src) - { - case STM: sprintf (src_str, "M%d", ksrc); break; - case STD: sprintf (src_str, "D%d", ksrc); break; - case STI: sprintf (src_str, "I%d", ksrc); break; - case STS: sprintf (src_str, "S"); break; - case STN: sprintf (src_str, "N"); break; - case STB: sprintf (src_str, "B"); break; - case STE: sprintf (src_str, "E"); break; - case STC: sprintf (src_str, "C"); break; - case STJ: sprintf (src_str, "J"); break; - case STT: sprintf (src_str, "T"); break; - default: Die ("bad transition"); - } - - switch (dest) - { - case STM: sprintf (dest_str, "M%d", kdest); break; - case STD: sprintf (dest_str, "D%d", kdest); break; - case STI: sprintf (dest_str, "I%d", kdest); break; - case STS: sprintf (dest_str, "S"); break; - case STN: sprintf (dest_str, "N"); break; - case STB: sprintf (dest_str, "B"); break; - case STE: sprintf (dest_str, "E"); break; - case STC: sprintf (dest_str, "C"); break; - case STJ: sprintf (dest_str, "J"); break; - case STT: sprintf (dest_str, "T"); break; - default: Die ("bad transition"); - } - - printf ("%d\t%s\t%d\t%s\t%-14.7g\t", isrc, src_str, idest, dest_str, (double) Score2Prob(sc,1.)); - - for (j = 0; j < A; j++) { - if (on[j]) printf ("*"); - if (j < A - 1) printf ("\t"); - } - - printf ("\n"); - -} - diff --git a/forester/archive/RIO/others/hmmer/src/emit.c b/forester/archive/RIO/others/hmmer/src/emit.c deleted file mode 100644 index e6b101c..0000000 --- a/forester/archive/RIO/others/hmmer/src/emit.c +++ /dev/null @@ -1,457 +0,0 @@ -/************************************************************ - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - ************************************************************/ - -/* emit.c - * SRE, Sun Mar 8 12:26:58 1998 - * RCS $Id: emit.c,v 1.1.1.1 2005/03/22 08:34:04 cmzmasek Exp $ - * - * Generation of sequences/traces from an HMM. - */ - -#include "structs.h" -#include "config.h" -#include "funcs.h" -#include "squid.h" - -#include - -#ifdef MEMDEBUG -#include "dbmalloc.h" -#endif - -/* Function: EmitSequence() - * Date: SRE, Sun Mar 8 12:28:03 1998 [St. Louis] - * - * Purpose: Given a model, sample a sequence and/or traceback. - * - * Args: hmm - the model - * ret_dsq - RETURN: generated digitized sequence (pass NULL if unwanted) - * ret_L - RETURN: length of generated sequence - * ret_tr - RETURN: generated trace (pass NULL if unwanted) - * - * Returns: void - */ -void -EmitSequence(struct plan7_s *hmm, char **ret_dsq, int *ret_L, struct p7trace_s **ret_tr) -{ - struct p7trace_s *tr; - char type; /* current state type */ - int k; /* current node index */ - char *dsq; /* generated sequence, digitized */ - int L; /* length of sequence */ - int alloc_tlen; /* allocated space for traceback */ - int alloc_L; /* allocated space for sequence */ - int tpos; /* position in traceback */ - int sym; /* a generated symbol index */ - float t[4]; /* little array for choosing M transition from */ - - /* Initialize; allocations - */ - P7AllocTrace(64, &tr); - alloc_tlen = 64; - dsq = MallocOrDie(sizeof(char) * 64); - alloc_L = 64; - - TraceSet(tr, 0, STS, 0, 0); - TraceSet(tr, 1, STN, 0, 0); - dsq[0] = (char) Alphabet_iupac; - L = 1; - k = 0; - type = STN; - tpos = 2; - - while (type != STT) - { - /* Deal with state transition - */ - switch (type) { - case STB: - hmm->begin[0] = hmm->tbd1; /* begin[0] hack (documented in structs.h) */ - k = FChoose(hmm->begin, hmm->M+1); - if (k == 0) { type = STD; k = 1; } else {type = STM; } - break; - - case STI: type = (FChoose(hmm->t[k]+TIM, 2) == 0) ? STM : STI; if (type == STM) k++; break; - case STN: type = (FChoose(hmm->xt[XTN], 2) == LOOP) ? STN : STB; k = 0; break; - case STE: type = (FChoose(hmm->xt[XTE], 2) == LOOP) ? STJ : STC; k = 0; break; - case STC: type = (FChoose(hmm->xt[XTC], 2) == LOOP) ? STC : STT; k = 0; break; - case STJ: type = (FChoose(hmm->xt[XTJ], 2) == LOOP) ? STJ : STB; k = 0; break; - - case STD: - if (k < hmm->M) { - type = (FChoose(hmm->t[k]+TDM, 2) == 0) ? STM : STD; - k++; - } else { - type = STE; - k = 0; - } - break; - - case STM: - if (k < hmm->M) { - FCopy(t, hmm->t[k], 3); - t[3] = hmm->end[k]; - switch (FChoose(t,4)) { - case 0: k++; type = STM; break; - case 1: type = STI; break; - case 2: k++; type = STD; break; - case 3: k=0; type = STE; break; - default: Die("never happens"); - } - } else { - k = 0; - type = STE; - } - break; - - case STT: - case STBOGUS: - default: - Die("can't happen."); - } - - /* Choose a symbol emission, if necessary - */ - sym = -1; - if (type == STM) sym = FChoose(hmm->mat[k], Alphabet_size); - else if (type == STI) sym = FChoose(hmm->ins[k], Alphabet_size); - else if ((type == STN && tr->statetype[tpos-1] == STN) || - (type == STC && tr->statetype[tpos-1] == STC) || - (type == STJ && tr->statetype[tpos-1] == STJ)) - sym = FChoose(hmm->null, Alphabet_size); - - /* Add to the traceback; deal with realloc if necessary - */ - TraceSet(tr, tpos, type, k, (sym != -1) ? L : 0); - tpos++; - if (tpos == alloc_tlen) { - alloc_tlen += 64; - P7ReallocTrace(tr, alloc_tlen); - } - - /* Add to the digitized seq; deal with realloc, if necessary - */ - if (sym != -1) { - dsq[L] = (char) sym; - L++; - if (L+1 == alloc_L) { /* L+1 leaves room for sentinel byte + \0 */ - alloc_L += 64; - dsq = ReallocOrDie(dsq, sizeof(char) * alloc_L); - } - } - } - - /* Finish off the trace - */ - tr->tlen = tpos; - - /* Finish off the dsq with sentinel byte and null terminator. - * Emitted Sequence length is L-1. - */ - dsq[L] = (char) Alphabet_iupac; - dsq[L+1] = '\0'; - L--; - - /* Return - */ - if (ret_dsq != NULL) *ret_dsq = dsq; else free(dsq); - if (ret_L != NULL) *ret_L = L; - if (ret_tr != NULL) *ret_tr = tr; else P7FreeTrace(tr); - return; -} - -#ifdef SRE_REMOVED -/* Function: EmitBestSequence() - * Date: SRE, Tue Nov 10 16:21:59 1998 [St. Louis] - * - * Purpose: Given a model, emit the maximum probability sequence - * from it: argmax_{seq} P(seq | model). - * This is a sensible HMM equivalent to a "consensus" - * sequence. - * The model should be Plan7NakedConfig()'ed; - * in particular, if we allowed B->M and M->E, - * the highest probability sequence would be - * artifactually short. (We could do the highest - * scoring sequence instead, to get around this problem, - * but the highest scoring sequence is prone to - * other artifacts -- any looping state N,C,J, or I - * with a positively scoring residue leads to - * an infinitely long "best scoring" sequence.) - * - * Args: hmm - the model - * ret_seq - RETURN: best sequence - * ret_L - RETURN: length of sequence - * ret_tr - RETURN: traceback of the model/seq alignment; or NULL. - * - * Returns: void - */ -void -EmitBestSequence(struct plan7_s *hmm, char **ret_dsq, int *ret_L, struct p7trace_s **ret_tr) -{ - char *seq; /* RETURN: best seq */ - struct p7trace_s *tr; /* RETURN: traceback */ - float *mmx, *imx, *dmx; /* log P forward scores for M,D,I */ - char *mtb, *itb, *dtb; /* traceback ptrs for M,D,I */ - int x; /* counter for symbols */ - int k; /* counter for nodes */ - float sc; /* tmp var for a log P */ - int bestsym; - int rpos; /* position in a sequence */ - int tpos; /* position in a trace */ - int tlen; /* length of the traceback */ - - /* Initial allocations. We only need a 1D matrix and its shadow; - * it's overkill to use the Plan7Matrix structures, so don't. - */ - mmx = MallocOrDie(sizeof(float) * (hmm->M+1)); - imx = MallocOrDie(sizeof(float) * (hmm->M)); - dmx = MallocOrDie(sizeof(float) * (hmm->M)); - mtb = MallocOrDie(sizeof(char) * (hmm->M+1)); - itb = MallocOrDie(sizeof(char) * (hmm->M)); - dtb = MallocOrDie(sizeof(char) * (hmm->M)); - - /* Initialization. - * We can safely assume a max probability path of S->N->B->(M1 or D1), - * so just init M1 and D1. - */ - mmx[1] = log(hmm->xt[XTN][MOVE]) + log(1.F - hmm->tbd1); - dmx[1] = - - - /* Main recursion, done as a push. - * The model is used in probability form; no wing folding needed. - */ - for (k = 1; k < hmm->M; k++) - { - /* Transits out of match state (init with these) - */ - mmx[k+1] = mmx[k] + log(hmm->t[k][TMM]); mtb[k+1] = STM; - dmx[k+1] = mmx[k] + log(hmm->t[k][TMD]); dtb[k+1] = STM; - if (k < hmm->M-1) - imx[k] = mmx[k] + log(hmm->t[k][TMI]); itb[k] = STM; - - /* Transits out of delete state - */ - if ((sc = dmx[k] + log(hmm->t[k][TDM])) > mmx[k+1]) - { mmx[k+1] = sc; mtb[k+1] = STD; } - if ((sc = dmx[k] + log(hmm->t[k][TDD])) > dmx[k+1]) - { dmx[k+1] = sc; dtb[k+1] = STD; } - - /* Transits out of insert state (self-loops are never good) - */ - if ((sc = imx[k] + log(hmm->t[k][TIM])) > mmx[k+1]) - { mmx[k+1] = sc; mtb[k+1] = STI; } - - /* Best emissions - */ - x = FMax(hmm->mat[k+1], Alphabet_size); - mmx[k+1] += log(hmm->mat[k+1][x]); - - if (k < hmm->M-1) { - x = FMax(hmm->ins[k+1], Alphabet_size); - imx[k+1] += log(hmm->ins[k+1][x]); - } - } -} -#endif /* SRE_REMOVED */ - - -/* Function: EmitConsensusSequence() - * Date: SRE, Wed Nov 11 11:08:59 1998 [St. Louis] - * - * Purpose: Generate a "consensus sequence". For the purposes - * of a profile HMM, this is defined as: - * - for each node: - * - if StateOccupancy() says that M is used - * with probability >= 0.5, this M is "consensus". - * Then, choose maximally likely residue. - * if P>0.5 (protein) or P>0.9 (DNA), make - * it upper case; else make it lower case. - * - if StateOccupancy() says that I - * is used with P >= 0.5, this I is "consensus"; - * use it 1/(1-TII) times (its expectation value). - * Generate an "x" from each I. - * - * The function expects that the model is config'ed - * by Plan7NakedConfig(): that is, for a single global pass - * with no N,C,J involvement. - * - * - * Args: hmm - the model - * ret_seq - RETURN: consensus sequence (pass NULL if unwanted) - * ret_dsq - RETURN: digitized consensus sequence (pass NULL if unwanted) - * ret_L - RETURN: length of generated sequence - * ret_tr - RETURN: generated trace (pass NULL if unwanted) - * - * Returns: void - */ -void -EmitConsensusSequence(struct plan7_s *hmm, char **ret_seq, char **ret_dsq, int *ret_L, struct p7trace_s **ret_tr) -{ - struct p7trace_s *tr; /* RETURN: traceback */ - char *dsq, *seq; /* sequence in digitized and undigitized form */ - float *mp, *ip, *dp; /* state occupancies from StateOccupancy() */ - int nmat, ndel, nins; /* number of matches, deletes, inserts used */ - int k; /* counter for nodes */ - int tpos; /* position in trace */ - int i; /* position in seq (equiv pos in dsq is i+1 */ - int x; /* symbol choice (M) or # symbols (I) */ - float mthresh; /* >= this, show symbol as upper case */ - - if (Alphabet_type == hmmAMINO) mthresh = 0.5; - else mthresh = 0.9; - - StateOccupancy(hmm, &mp, &ip, &dp); - - /* First pass: how many states do we need in the trace? - * how long will the sequence be? - */ - nmat = ndel = nins = 0; - for (k = 1; k <= hmm->M; k++) - { - if (mp[k] >= 0.5) nmat++; else ndel++; - if (k < hmm->M && ip[k] >= 0.5) - nins += (int) (1.f / (1.f - hmm->t[k][TII])); - } - - /* Allocations - */ - P7AllocTrace(6 + nmat + ndel + nins, &tr); - dsq = MallocOrDie(sizeof(char) * (nmat+nins+3)); - seq = MallocOrDie(sizeof(char) * (nmat+nins+1)); - - /* Main pass. - * Construct consensus trace, seq, and dsq. - */ - TraceSet(tr, 0, STS, 0, 0); - TraceSet(tr, 1, STN, 0, 0); - TraceSet(tr, 2, STB, 0, 0); - dsq[0] = Alphabet_iupac; /* guard byte */ - tpos = 3; - i = 0; - for (k = 1; k <= hmm->M; k++) - { - if (mp[k] >= 0.5) - { - x = FMax(hmm->mat[k], Alphabet_size); - TraceSet(tr, tpos, STM, k, i+1); - seq[i] = Alphabet[x]; - dsq[i+1] = x; - if (hmm->mat[k][x] < mthresh) - seq[i] = tolower((int) seq[i]); - i++; - tpos++; - } - else - { - TraceSet(tr, tpos, STD, k, 0); - tpos++; - } - - if (k < hmm->M && ip[k] >= 0.5) - { - x = (int) (1.f / (1.f - hmm->t[k][TII])); - while (x--) - { - TraceSet(tr, tpos, STI, k, i+1); - seq[i] = 'x'; - dsq[i+1] = Alphabet_iupac - 1; - i++; - tpos++; - } - } - } - TraceSet(tr, tpos, STE, 0, 0); tpos++; - TraceSet(tr, tpos, STC, 0, 0); tpos++; - TraceSet(tr, tpos, STT, 0, 0); tpos++; - dsq[i+1] = Alphabet_iupac; - - free(mp); - free(ip); - free(dp); - if (ret_seq != NULL) *ret_seq = seq; else free(seq); - if (ret_dsq != NULL) *ret_dsq = dsq; else free(dsq); - if (ret_L != NULL) *ret_L = i; - if (ret_tr != NULL) *ret_tr = tr; else P7FreeTrace(tr); -} - - - -/* Function: StateOccupancy() - * Date: SRE, Wed Nov 11 09:46:15 1998 [St. Louis] - * - * Purpose: Calculate the expected state occupancy for - * a given HMM in generated traces. - * - * Note that expected prob of getting into - * any special state in a trace is trivial: - * S,N,B,E,C,T = 1.0 - * J = E->J transition prob - * - * Args: hmm - the model - * ret_mp - RETURN: [1..M] prob's of occupying M - * ret_ip - RETURN: [1..M-1] prob's of occupying I - * ret_dp - RETURN: [1..M] prob's of occupying D - * - * Returns: void - * mp, ip, dp are malloc'ed here. Caller must free(). - */ -void -StateOccupancy(struct plan7_s *hmm, float **ret_mp, float **ret_ip, float **ret_dp) -{ - float *fmp, *fip, *fdp; /* forward probabilities */ - int k; /* counter for nodes */ - - /* Initial allocations - */ - fmp = MallocOrDie (sizeof(float) * (hmm->M+1)); - fip = MallocOrDie (sizeof(float) * (hmm->M)); - fdp = MallocOrDie (sizeof(float) * (hmm->M+1)); - - /* Forward pass. - */ - fdp[1] = hmm->tbd1; - fmp[1] = hmm->begin[1]; - fip[1] = fmp[1] * hmm->t[1][TMI]; - for (k = 2; k <= hmm->M; k++) - { - /* M: from M,D,I at k-1, or B; count t_II as 1.0 */ - fmp[k] = fmp[k-1] * hmm->t[k-1][TMM] + - fip[k-1] + - fdp[k-1] * hmm->t[k-1][TDM] + - hmm->begin[k]; - /* D: from M,D at k-1 */ - fdp[k] = fmp[k-1] * hmm->t[k-1][TMD] + - fdp[k-1] * hmm->t[k-1][TDD]; - /* I: from M at k; don't count II */ - if (k < hmm->M) { - fip[k] = fmp[k] * hmm->t[k][TMI]; - } - - SQD_DASSERT2((fabs(1.0f - fmp[k] - fdp[k]) < 1e-6f)); - fmp[k] /= fmp[k]+fdp[k]; /* prevent propagating fp errors */ - fdp[k] /= fmp[k]+fdp[k]; - } - /* We don't need a backward pass; all backwards P's are 1.0 - * by definition (you can always get out of a state with P=1). - * The only situation where this might not be true is for - * a TII of 1.0, when TIM = 0 -- but in that case, if there's - * a finite chance of getting into that insert state, the model - * generates infinitely long sequences, so we can consider this - * situation "perverse" and disallow it elsewhere in building - * profile HMMs. - */ - - /* Return. - */ - *ret_mp = fmp; - *ret_dp = fdp; - *ret_ip = fip; -} diff --git a/forester/archive/RIO/others/hmmer/src/emulation.c b/forester/archive/RIO/others/hmmer/src/emulation.c deleted file mode 100644 index 7de1833..0000000 --- a/forester/archive/RIO/others/hmmer/src/emulation.c +++ /dev/null @@ -1,242 +0,0 @@ -/************************************************************ - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - ************************************************************/ - -/* emulation.c - * SRE, Wed Jan 21 07:50:01 1998 - * - * Interfaces between HMMER and other software packages. - * - * RCS $Id: emulation.c,v 1.1.1.1 2005/03/22 08:34:01 cmzmasek Exp $ - */ - -#include -#include - -#include "squid.h" -#include "config.h" -#include "structs.h" -#include "funcs.h" -#include "version.h" - - -/* Function: WriteProfile() - * Date: SRE, Wed Jan 21 07:58:09 1998 [St. Louis] - * - * Purpose: Given an HMM, write a GCG profile .prf file as - * output. Based on examination of Michael Gribskov's Fortran - * source in GCG 9.1; on reverse engineering - * by examination of GCG 9.1 output from "profilemake" - * and how the .prf file is used by "profilesearch"; - * and on the GCG 9.0 documentation. - * - * See notes 28 Jan 98 for detail; in brief, the conversion goes like: - * - * PROF(i,k) = match score = msc(i,k) + TMM(k-1) - * - * GAP(k) = cost per insertion = TMI(k-1) + TIM(k-1) - TMM(k-1) - TII(k-1) - * LEN(k) = cost per inserted x = TII(k-1) - * - * QGAP(k) = cost per deletion = TDM(k-1) + TMD(unknown) - TMM(k-1) - TDD(k-1) - * QLEN(k) = cost per deleted k = TDD(k-1) - * - * Note that GCG affine gaps are GAP + n * LEN; - * HMMER affine gaps count (n-1) * gap-extend, thus an - * extra TII gets taken away from GAP (and TDD from QGAP), - * since GCG will charge it. - * - * Also note how the TMM transitions, which have no equivalent - * in a profile, get smuggled in OK. - * - * Also note that GCG charges gaps using the profile position - * /after/ the gap, not preceding the gap as HMMER does. - * - * Also note the TMD(unknown) in the QGAP calculation. HMMER - * distinguishes between gap-open and gap-close, but GCG does not, - * so there is a fundamental incompatibility here. Here - * we use an upper (best-scoring, minimum-cost) bound. - * - * And finally note that GCG's implementation forces GAP=QGAP and - * LEN=QLEN. Here, we upper bound again. Compugen's implementation - * allows an "extended profile" format which distinguishes between - * the two. - * - * The upper bound approach to these scores means that a - * score given by an emulated profile is an upper bound: the HMMER - * score (for a single Smith/Waterman style local alignment) - * cannot be better than this. This is intentional, so that - * the Compugen BIC can be used for rapid prefiltering of - * the database. - * - * To get a close approximation of hmmsw scores, call - * profilesearch as - * profilesearch -noave -nonor -gap 10 -len 1 - * On the Compugen BIC, using extended profiles, you want: - * om -model=xsw.model -gapop=10 -gapext=1 -qgapop=10 -qgapext=1 -noave -nonor - * - * Args: fp - open FILE to write to (or stdout, possibly) - * hmm - the HMM to write - * do_xsw - TRUE to write Compugen's experimental extended profile format - * - * Returns: (void) - */ -void -WriteProfile(FILE *fp, struct plan7_s *hmm, int do_xsw) -{ - int k; /* position in model */ - int x; /* symbol index */ - int sc; /* a score to print */ - float nx; /* expected # of symbol x */ - int gap, len, qgap, qlen; /* penalties to charge */ - - P7Logoddsify(hmm, TRUE); - - /* GCG can't deal with long profiles. Their limit is 1000 - * positions. However, Compugen can. Therefore we warn, - * but don't die. - */ - if (hmm->M > 1000 && !do_xsw) - Warn("Profile %s will have more than 1000 positions. GCG won't read it; Compugen will.", - hmm->name); - - /* Header information. - * GCG will look for sequence type and length of model. - * Other than this, nothing is parsed until we get to the - * Cons line that has a ".." on it. - * Lines that begin with "!" are comments. - */ - if (Alphabet_type == hmmAMINO) fprintf(fp, "!!AA_PROFILE 1.0\n"); - else if (Alphabet_type == hmmNUCLEIC) fprintf(fp, "!!NA_PROFILE 1.0\n"); - else Die("No support for profiles with non-biological alphabets"); - - if (Alphabet_type == hmmAMINO) fprintf(fp, "(Peptide) "); - else if (Alphabet_type == hmmNUCLEIC) fprintf(fp, "(Nucleotide) "); - fprintf(fp, "HMMCONVERT v%s Length: %d %s|%s|%s\n", - RELEASE, hmm->M, hmm->name, - hmm->flags & PLAN7_ACC ? hmm->acc : "", - hmm->flags & PLAN7_DESC ? hmm->desc : ""); - - /* Insert some HMMER-specific commentary - */ - if (do_xsw) - { - fprintf(fp, " Profile converted from a profile HMM using HMMER v%s emulation.\n", RELEASE); - fprintf(fp, " Compugen XSW extended profile format.\n"); - fprintf(fp, " Use -model=xsw.model -nonor -noave -gapop=10 -gapext=1 -qgapop=10 -qgapext=1\n"); - fprintf(fp, " with om on the Compugen BIC to get the closest approximation to HMMER bit scores.\n"); - fprintf(fp, " WARNING: There is a loss of information in this conversion.\n"); - fprintf(fp, " Neither the scores nor even the rank order of hits will be precisely\n"); - fprintf(fp, " preserved in a comparison of HMMER hmmsearch to GCG profilesearch.\n"); - fprintf(fp, " The profile score is an approximation of the (single-hit) HMMER score.\n\n"); - } - else - { - fprintf(fp, " Profile converted from a profile HMM using HMMER v%s emulation.\n", RELEASE); - fprintf(fp, " Use -nonor -noave -gap=10 -len=1 with profilesearch and friends\n"); - fprintf(fp, " to get the closest approximation to HMMER bit scores.\n"); - fprintf(fp, " WARNING: There is a loss of information in this conversion.\n"); - fprintf(fp, " Neither the scores nor even the rank order of hits will be precisely\n"); - fprintf(fp, " preserved in a comparison of HMMER hmmsearch to GCG profilesearch.\n"); - fprintf(fp, " The profile score is an approximation of the (single-hit) HMMER score.\n\n"); - } - - - /* Do the CONS line, which gives the valid IUPAC symbols and their order - */ - fprintf(fp, "Cons"); - for (x = 0; x < Alphabet_iupac; x++) - fprintf(fp, " %c ", Alphabet[x]); - if (do_xsw) - fprintf(fp, " Gap Len QGap Qlen ..\n"); - else - fprintf(fp, " Gap Len ..\n"); - - /* Now, the profile; for each position in the HMM, write a line of profile. - */ - for (k = 1; k <= hmm->M; k++) - { - /* GCG adds some indexing as comments */ - if ((k-1)%10 == 0 && k > 10) - fprintf(fp, "! %d\n", k); - - /* find consensus residue by max prob */ - x = FMax(hmm->mat[k], Alphabet_size); - fprintf(fp, " %c ", Alphabet[x]); - /* generate emission score profile; - * Profiles are scaled by a factor of 100 - */ - for (x = 0; x < Alphabet_iupac; x++) - { - sc = hmm->msc[x][k]; - if (k < hmm->M) sc += hmm->tsc[k][TMM]; - sc = sc * 100 / INTSCALE; - fprintf(fp, "%5d ", sc); - } - /* Generate gap open, gap extend penalties; - note we will force profilesearch to weights of 10, 1, - and that GCG profile values are percentages - of these base penalties, 0..100.*/ - /* gap open (insertion)*/ - if (k > 1) - { - gap = -1 * (hmm->tsc[k-1][TMI] + hmm->tsc[k-1][TIM] - hmm->tsc[k-1][TMM] - hmm->tsc[k-1][TII]); - gap = gap * 100 / (10.0 * INTSCALE); - } - else gap = 100; /* doesn't matter because GAP_1 is never used */ - - /* gap extend (insertion)*/ - if (k > 1) - { - len = -1 * hmm->tsc[k-1][TII]; - len = len * 100 / (1.0 * INTSCALE); - } - else len = 100; /* again, doesn't matter because LEN_1 is never used */ - - /* gap open (deletion) */ - if (k > 1) - { - qgap = -1 * (hmm->tsc[k-1][TDM] + hmm->tsc[k-1][TMD] - hmm->tsc[k-1][TMM] - hmm->tsc[k-1][TDD]); - qgap = qgap * 100 / (10.0 * INTSCALE); - } - else qgap = 100; - /* gap extend (deletion) */ - if (k > 1) - { - qlen = -1 * hmm->tsc[k-1][TDD]; - qlen = qlen * 100 / (1.0 * INTSCALE); - } - else qlen = 100; - - - if (do_xsw) - fprintf(fp, "%5d %5d %5d %5d\n", gap, len, qgap, qlen); - else - fprintf(fp, "%5d %5d\n", gap, len); /* assume insertions >= deletions */ - } - - /* The final line of the profile is a count of the observed - * residues in the training sequences. This information is not - * available in an HMM, and I'm not sure that GCG ever uses it. - * Approximate it by calculating a /very/ rough expectation. - */ - fprintf(fp, " * "); - for (x = 0; x < Alphabet_size; x++) - { - nx = 0.0; - for (k = 1; k <= hmm->M; k++) - nx += hmm->mat[k][x]; - nx *= hmm->nseq; - fprintf(fp, "%5d ", (int) nx); - } - for (; x < Alphabet_iupac; x++) - fprintf(fp, "%5d ", 0); - fprintf(fp, "\n"); - return; -} - diff --git a/forester/archive/RIO/others/hmmer/src/funcs.h b/forester/archive/RIO/others/hmmer/src/funcs.h deleted file mode 100644 index 81f31fb..0000000 --- a/forester/archive/RIO/others/hmmer/src/funcs.h +++ /dev/null @@ -1,350 +0,0 @@ -/************************************************************ - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - ************************************************************/ - -/* funcs.h - * RCS $Id: funcs.h,v 1.1.1.1 2005/03/22 08:34:07 cmzmasek Exp $ - * - * Declarations of external functions in HMMER. - */ - -#ifndef FUNCSH_INCLUDED -#define FUNCSH_INCLUDED - -#include "config.h" -#include "structs.h" -#include "squid.h" -#include "msa.h" - -/* alphabet.c - * Configuration of global alphabet information - */ -extern void DetermineAlphabet(char **rseqs, int nseq); -extern void SetAlphabet(int type); -extern int SymbolIndex(char sym); -extern char *DigitizeSequence(char *seq, int L); -extern char *DedigitizeSequence(char *dsq, int L); -extern void DigitizeAlignment(MSA *msa, char ***ret_dsqs); -extern void P7CountSymbol(float *counters, char sym, float wt); -extern void DefaultGeneticCode(int *aacode); -extern void DefaultCodonBias(float *codebias); - -/* from core_algorithms.c - * Clean research/demonstration versions of basic algorithms. - */ -extern struct dpmatrix_s *AllocPlan7Matrix(int rows, int M, int ***xmx, - int ***mmx, int ***imx, int ***dmx); -extern struct dpshadow_s *AllocShadowMatrix(int rows, int M, char ***xtb, - char ***mtb, char ***itb, char ***dtb); -extern void FreePlan7Matrix(struct dpmatrix_s *mx); -extern void FreeShadowMatrix(struct dpshadow_s *tb); -extern int P7ViterbiSize(int L, int M); -extern int P7SmallViterbiSize(int L, int M); -extern int P7WeeViterbiSize(int L, int M); -extern float P7Forward(char *dsq, int L, struct plan7_s *hmm, - struct dpmatrix_s **ret_mx); -extern float P7Viterbi(char *dsq, int L, struct plan7_s *hmm, - struct p7trace_s **ret_tr); -extern void P7ViterbiTrace(struct plan7_s *hmm, char *dsq, int L, - struct dpmatrix_s *mx, struct p7trace_s **ret_tr); -extern float P7SmallViterbi(char *dsq, int L, struct plan7_s *hmm, struct p7trace_s **ret_tr); -extern float P7ParsingViterbi(char *dsq, int L, struct plan7_s *hmm, - struct p7trace_s **ret_tr); -extern float P7WeeViterbi(char *dsq, int L, struct plan7_s *hmm, - struct p7trace_s **ret_tr); -extern float Plan7ESTViterbi(char *dsq, int L, struct plan7_s *hmm, - struct dpmatrix_s **ret_mx); -extern struct p7trace_s *P7ViterbiAlignAlignment(MSA *msa, struct plan7_s *hmm); -extern struct p7trace_s *ShadowTrace(struct dpshadow_s *tb, struct plan7_s *hmm, int L); -extern void PostprocessSignificantHit(struct tophit_s *ghit, struct tophit_s *dhit, struct p7trace_s *tr, struct plan7_s *hmm, char *dsq, int L, char *seqname, char *seqacc, char *seqdesc, int do_forward, float sc_override, int do_null2, struct threshold_s *thresh, int hmmpfam_mode); - - -/* from debug.c - * Debugging output of various sorts. - */ -extern char *Statetype(char st); -extern char *AlphabetType2String(int type); -extern void P7PrintTrace(FILE *fp, struct p7trace_s *tr, - struct plan7_s *hmm, char *dsq); -extern void P7PrintPrior(FILE *fp, struct p7prior_s *pri); -extern int TraceCompare(struct p7trace_s *t1, struct p7trace_s *t2); -extern int TraceVerify(struct p7trace_s *tr, int M, int N); - -/* - * from display.c - * Ian Holmes' functions for displaying HMMER2 data structures, especially - * for posterior probabilities in alignments. - */ -extern void DisplayPlan7Matrix(char *dsq, int L, struct plan7_s *hmm, - struct dpmatrix_s *mx); -extern void DisplayPlan7Posteriors(int L, struct plan7_s *hmm, - struct dpmatrix_s *forward, struct dpmatrix_s *backward, - struct p7trace_s *viterbi, struct p7trace_s *optacc); -extern void DisplayPlan7PostAlign(int L, struct plan7_s *hmm, - struct dpmatrix_s *forward, struct dpmatrix_s *backward, - struct p7trace_s **alignment, int A); - - -/* from emit.c - * Generation of sequences/traces from an HMM - */ -extern void EmitSequence(struct plan7_s *hmm, char **ret_dsq, int *ret_L, struct p7trace_s **ret_tr); -extern void EmitConsensusSequence(struct plan7_s *hmm, char **ret_seq, char **ret_dsq, int *ret_L, struct p7trace_s **ret_tr); -extern void StateOccupancy(struct plan7_s *hmm, float **ret_mp, float **ret_ip, float **ret_dp); - - -/* from emulation.c - * Interfaces between HMMER and other software packages - */ -extern void WriteProfile(FILE *fp, struct plan7_s *hmm, int do_xsw); - - -/* from histogram.c - * accumulation of scores - */ -extern struct histogram_s *AllocHistogram(int min, int max, int lumpsize); -extern void FreeHistogram(struct histogram_s *h); -extern void UnfitHistogram(struct histogram_s *h); -extern void AddToHistogram(struct histogram_s *h, float sc); -extern void PrintASCIIHistogram(FILE *fp, struct histogram_s *h); -extern void PrintXMGRHistogram(FILE *fp, struct histogram_s *h); -extern void PrintXMGRDistribution(FILE *fp, struct histogram_s *h); -extern void PrintXMGRRegressionLine(FILE *fp, struct histogram_s *h); -extern void EVDBasicFit(struct histogram_s *h); -extern int ExtremeValueFitHistogram(struct histogram_s *h, int censor, - float high_hint); -extern void ExtremeValueSetHistogram(struct histogram_s *h, float mu, float lambda, - float low, float high, int ndegrees); -extern int GaussianFitHistogram(struct histogram_s *h, float high_hint); -extern void GaussianSetHistogram(struct histogram_s *h, float mean, float sd); -extern double EVDDensity(float x, float mu, float lambda); -extern double EVDDistribution(float x, float mu, float lambda); -extern double ExtremeValueP (float x, float mu, float lambda); -extern double ExtremeValueP2(float x, float mu, float lambda, int N); -extern double ExtremeValueE (float x, float mu, float lambda, int N); -extern float EVDrandom(float mu, float lambda); -extern int EVDMaxLikelyFit(float *x, int *y, int n, - float *ret_mu, float *ret_lambda); -extern int EVDCensoredFit(float *x, int *y, int n, int z, float c, - float *ret_mu, float *ret_lambda); -extern void Lawless416(float *x, int *y, int n, float lambda, - float *ret_f, float *ret_df); -extern void Lawless422(float *x, int *y, int n, int z, float c, - float lambda, float *ret_f, float *ret_df); - -/* from hmmio.c - * Input/output (saving/reading) of models - */ -extern HMMFILE *HMMFileOpen(char *hmmfile, char *env); -extern int HMMFileRead(HMMFILE *hmmfp, struct plan7_s **ret_hmm); -extern void HMMFileClose(HMMFILE *hmmfp); -extern int HMMFileFormat(HMMFILE *hmmfp); -extern void HMMFileRewind(HMMFILE *hmmfp); -extern int HMMFilePositionByName(HMMFILE *hmmfp, char *name); -extern int HMMFilePositionByIndex(HMMFILE *hmmfp, int idx); -extern void WriteAscHMM(FILE *fp, struct plan7_s *hmm); -extern void WriteBinHMM(FILE *fp, struct plan7_s *hmm); - -/* masks.c - * Repetitive sequence masking. - */ -extern int XNU(char *dsq, int len); -extern float TraceScoreCorrection(struct plan7_s *hmm, struct p7trace_s *tr, char *dsq); - -/* mathsupport.c - * Much of this code deals with Dirichlet prior mathematics. - */ -extern int Prob2Score(float p, float null); -extern float Score2Prob(int sc, float null); -extern float Scorify(int sc); -extern double PValue(struct plan7_s *hmm, float sc); -extern float LogSum(float p1, float p2); -extern int ILogsum(int p1, int p2); -extern void LogNorm(float *vec, int n); -extern float Logp_cvec(float *cvec, int n, float *alpha); -extern void SampleDirichlet(float *alpha, int n, float *p); -extern float SampleGamma(float alpha); -extern void SampleCountvector(float *p, int n, int c, float *cvec); -extern float P_PvecGivenDirichlet(float *p, int n, float *alpha); - -/* from misc.c - * Miscellaneous functions with no home - */ -extern char *Getword(FILE *fp, int type); -extern char *Getline(char *s, int n, FILE *fp); -extern int SetAutocuts(struct threshold_s *thresh, struct plan7_s *hmm); - -/* from modelmakers.c - * Model construction algorithms - */ -extern void P7Handmodelmaker(MSA *msa, char **dsq, struct plan7_s **ret_hmm, - struct p7trace_s ***ret_tr); -extern void P7Fastmodelmaker(MSA *msa, char **dsq, - float maxgap, struct plan7_s **ret_hmm, - struct p7trace_s ***ret_tr); -extern void P7Maxmodelmaker(MSA *msa, char **dsq, - float maxgap, struct p7prior_s *prior, - float *null, float null_p1, float mpri, - struct plan7_s **ret_hmm, - struct p7trace_s ***ret_tr); - -/* from plan7.c - * Plan7 HMM structure support - */ -extern struct plan7_s *AllocPlan7(int M); -extern struct plan7_s *AllocPlan7Shell(void); -extern void AllocPlan7Body(struct plan7_s *hmm, int M); -extern void FreePlan7(struct plan7_s *hmm); -extern void ZeroPlan7(struct plan7_s *hmm); -extern void Plan7SetName(struct plan7_s *hmm, char *name); -extern void Plan7SetAccession(struct plan7_s *hmm, char *acc); -extern void Plan7SetDescription(struct plan7_s *hmm, char *desc); -extern void Plan7ComlogAppend(struct plan7_s *hmm, int argc, char **argv); -extern void Plan7SetCtime(struct plan7_s *hmm); -extern void Plan7SetNullModel(struct plan7_s *hmm, float null[MAXABET], float p1); -extern void P7Logoddsify(struct plan7_s *hmm, int viterbi_mode); -extern void Plan7Renormalize(struct plan7_s *hmm); -extern void Plan7RenormalizeExits(struct plan7_s *hmm); -extern void Plan7NakedConfig(struct plan7_s *hmm); -extern void Plan7GlobalConfig(struct plan7_s *hmm); -extern void Plan7LSConfig(struct plan7_s *hmm); -extern void Plan7SWConfig(struct plan7_s *hmm, float pentry, float pexit); -extern void Plan7FSConfig(struct plan7_s *hmm, float pentry, float pexit); -extern void PrintPlan7Stats(FILE *fp, struct plan7_s *hmm, char **dsq, - int nseq, struct p7trace_s **tr); -extern int DegenerateSymbolScore(float *p, float *null, int ambig); -extern void Plan9toPlan7(struct plan9_s *hmm, struct plan7_s **ret_plan7); - -/* - * from plan9.c - * Backwards compatibility for the Plan 9 data structures of HMMER 1.x - */ -extern struct plan9_s *P9AllocHMM(int M); -extern void P9ZeroHMM(struct plan9_s *hmm); -extern int P9FreeHMM(struct plan9_s *hmm); -extern void P9Renormalize(struct plan9_s *hmm); -extern void P9DefaultNullModel(float *null); - -/* - * from postprob.c - * Functions for working with posterior probabilities within alignments - */ -extern float P7OptimalAccuracy(char *dsq, int L, struct plan7_s *hmm, struct p7trace_s **ret_tr); -extern float P7Backward(char *dsq, int L, struct plan7_s *hmm, struct dpmatrix_s **ret_mx); -extern void P7EmitterPosterior(int L, struct plan7_s *hmm, struct dpmatrix_s *forward, - struct dpmatrix_s *backward, struct dpmatrix_s *mx); -extern float P7FillOptimalAccuracy(int L, int M, struct dpmatrix_s *posterior, - struct dpmatrix_s *mx, struct p7trace_s **ret_tr); -extern void P7OptimalAccuracyTrace(int L, int M, struct dpmatrix_s *posterior, - struct dpmatrix_s *mx, struct p7trace_s **ret_tr); -extern char *PostalCode(int L, struct dpmatrix_s *mx, struct p7trace_s *tr); - -/* from prior.c - * Dirichlet priors - */ -extern struct p7prior_s *P7AllocPrior(void); -extern struct p7prior_s *P7LaplacePrior(void); -extern struct p7prior_s *P7DefaultPrior(void); -extern struct p7prior_s *P7ReadPrior(char *prifile); -extern void P7FreePrior(struct p7prior_s *pri); -extern void PAMPrior(char *pamfile, struct p7prior_s *pri, float pamwgt); -extern void P7DefaultNullModel(float *null, float *ret_p1); -extern void P7ReadNullModel(char *rndfile, float *null, float *ret_p1); -extern void P7PriorifyHMM(struct plan7_s *hmm, struct p7prior_s *pri); -extern void P7PriorifyTransitionVector(float *t, struct p7prior_s *prior, - float tq[MAXDCHLET]); -extern void P7PriorifyEmissionVector(float *vec, struct p7prior_s *pri, - int num, float eq[MAXDCHLET], - float e[MAXDCHLET][MAXABET], - float *ret_mix); - - -#ifdef HMMER_PVM -/* from pvm.c - * PVM Parallel Virtual Machine implementation - */ -extern void PVMSpawnSlaves(char *slave, int **ret_tid, int *ret_nslaves); -extern void PVMConfirmSlaves(int *slave_tid, int nslaves); -extern void PVMCheckSlaves(int *slave_tid, int nslaves); -extern void PVMKillSlaves(int *slave_tid, int nslaves); -extern int PVMPackString(char *s); -extern char * PVMUnpackString(void); -extern int PVMPackTrace(struct p7trace_s *tr); -extern struct p7trace_s *PVMUnpackTrace(void); -extern int PVMPackHMM(struct plan7_s *hmm); -extern struct plan7_s * PVMUnpackHMM(void); -#endif /*HMMER_PVM*/ - -#ifdef HMMER_THREADS -/* from threads.c - * POSIX threads implementation - */ -extern int ThreadNumber(void); -#endif /*HMMER_THREADS*/ - - -/* from tophits.c - * Support for keeping/sorting top scoring hit/alignment lists - */ -extern struct tophit_s *AllocTophits(int lumpsize); -extern void GrowTophits(struct tophit_s *h); -extern void FreeTophits(struct tophit_s *h); -extern struct fancyali_s *AllocFancyAli(void); -extern void FreeFancyAli(struct fancyali_s *ali); -extern void RegisterHit(struct tophit_s *h, double sortkey, - double pvalue, float score, - double motherp, float mothersc, - char *name, char *acc, char *desc, - int sqfrom, int sqto, int sqlen, - int hmmfrom, int hmmto, int hmmlen, - int domidx, int ndom, - struct fancyali_s *ali); -extern void GetRankedHit(struct tophit_s *h, int rank, - double *r_pvalue, float *r_score, - double *r_motherp, float *r_mothersc, - char **r_name, char **r_acc, char **r_desc, - int *r_sqfrom, int *r_sqto, int *r_sqlen, - int *r_hmmfrom, int *r_hmmto, int *r_hmmlen, - int *r_domidx, int *r_ndom, - struct fancyali_s **r_ali); -extern int TophitsMaxName(struct tophit_s *h); -extern void FullSortTophits(struct tophit_s *h); -extern void TophitsReport(struct tophit_s *h, double E, int nseq); - -/* from trace.c - * Support for traceback (state path) structure - */ -extern void P7AllocTrace(int tlen, struct p7trace_s **ret_tr); -extern void P7ReallocTrace(struct p7trace_s *tr, int tlen); -extern void P7FreeTrace(struct p7trace_s *tr); -extern void TraceSet(struct p7trace_s *tr, int tpos, char type, int idx, int pos); -extern struct p7trace_s **MergeTraceArrays(struct p7trace_s **t1, int n1, struct p7trace_s **t2, int n2); -extern void P7ReverseTrace(struct p7trace_s *tr); -extern void P7TraceCount(struct plan7_s *hmm, char *dsq, float wt, - struct p7trace_s *tr); -extern float P7TraceScore(struct plan7_s *hmm, char *dsq, struct p7trace_s *tr); -extern MSA *P7Traces2Alignment(char **dsq, SQINFO *sqinfo, float *wgt, - int nseq, int M, - struct p7trace_s **tr, int matchonly); -extern int TransitionScoreLookup(struct plan7_s *hmm, char st1, - int k1, char st2, int k2); -extern struct fancyali_s *CreateFancyAli(struct p7trace_s *tr, struct plan7_s *hmm, - char *dsq, char *name); -extern void PrintFancyAli(FILE *fp, struct fancyali_s *ali); -extern void TraceDecompose(struct p7trace_s *otr, struct p7trace_s ***ret_tr, - int *ret_ntr); -extern int TraceDomainNumber(struct p7trace_s *tr); -extern void TraceSimpleBounds(struct p7trace_s *tr, int *ret_i1, int *ret_i2, - int *ret_k1, int *ret_k2); -extern struct p7trace_s *MasterTraceFromMap(int *map, int M, int alen); -extern void ImposeMasterTrace(char **aseq, int nseq, struct p7trace_s *mtr, - struct p7trace_s ***ret_tr); - - -#endif /*FUNCSH_INCLUDED*/ diff --git a/forester/archive/RIO/others/hmmer/src/globals.h b/forester/archive/RIO/others/hmmer/src/globals.h deleted file mode 100644 index 5a5ae23..0000000 --- a/forester/archive/RIO/others/hmmer/src/globals.h +++ /dev/null @@ -1,24 +0,0 @@ -/************************************************************ - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - ************************************************************/ - -/* globals.h - * Mon Nov 18 13:05:03 1996 - * - * Global variable definitions. - * This file may only be included in a main() .c file. - */ - -char Alphabet[MAXCODE]; /* ACGT, for instance */ -int Alphabet_type; /* hmmNUCLEIC or hmmAMINO */ -int Alphabet_size; /* uniq alphabet size: 4 or 20 */ -int Alphabet_iupac; /* total size of alphabet + IUPAC degen. */ -char Degenerate[MAXCODE][MAXABET]; -int DegenCount[MAXCODE]; - diff --git a/forester/archive/RIO/others/hmmer/src/histogram.c b/forester/archive/RIO/others/hmmer/src/histogram.c deleted file mode 100644 index 88841fc..0000000 --- a/forester/archive/RIO/others/hmmer/src/histogram.c +++ /dev/null @@ -1,1369 +0,0 @@ -/************************************************************ - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - ************************************************************/ - -/* histogram.c - * SRE, Sat Jan 20 16:16:17 1996 - * - * Accumulation, printing, and fitting of score histograms - * from database searches. - * - * RCS $Id: histogram.c,v 1.1.1.1 2005/03/22 08:34:00 cmzmasek Exp $ - ************************************************************ - * Basic API: - * - * struct histogram_s *h; - * - * h = AllocHistogram(min_hint, max_hint, lumpsize); - * - * while (getting scores x) AddToHistogram(h, x); - * - * ExtremeValueFitHistogram(h, high_hint); - * PrintASCIIHistogram(fp, h); - * FreeHistogram(h); - */ - -#include -#include -#include -#include -#include - -#include "squid.h" -#include "config.h" -#include "structs.h" -#include "funcs.h" - -#ifdef MEMDEBUG -#include "dbmalloc.h" -#endif - -/* Function: AllocHistogram() - * - * Purpose: Allocate and return a histogram structure. - * min and max are your best guess. They need - * not be absolutely correct; the histogram - * will expand dynamically to accomodate scores - * that exceed these suggested bounds. The amount - * that the histogram grows by is set by "lumpsize". - * - * Args: min: minimum score (integer) - * max: maximum score (integer) - * lumpsize: when reallocating histogram, pad the reallocation - * by this much (saves excessive reallocation) - */ -struct histogram_s * -AllocHistogram(int min, int max, int lumpsize) -{ - struct histogram_s *h; - int newsize; - int i; - - newsize = max - min + 1; - - h = (struct histogram_s *) MallocOrDie(sizeof(struct histogram_s)); - h->min = min; - h->max = max; - h->total = 0; - h->lowscore = INT_MAX; - h->highscore = INT_MIN; - h->lumpsize = lumpsize; - h->histogram = (int *) MallocOrDie (sizeof(int) * newsize); - for (i = 0; i < newsize; i++) h->histogram[i] = 0; - - h->expect = NULL; - h->fit_type = HISTFIT_NONE; - - return h; -} - - -/* Function: FreeHistogram() - * - * Purpose: free a histogram structure. - */ -void -FreeHistogram(struct histogram_s *h) -{ - free(h->histogram); - if (h->expect != NULL) free(h->expect); - free(h); -} - -/* Function: UnfitHistogram() - * - * Purpose: Free only the theoretical fit part of a histogram. - */ -void -UnfitHistogram(struct histogram_s *h) -{ - if (h->expect != NULL) free(h->expect); - h->expect = NULL; - h->fit_type = HISTFIT_NONE; -} - - -/* Function: AddToHistogram() - * - * Purpose: Bump the appropriate counter in a histogram - * structure, given a score. The score is - * rounded off from float precision to the - * next lower integer. - */ -void -AddToHistogram(struct histogram_s *h, float sc) -{ - int score; - int moveby; - int prevsize; - int newsize; - int i; - - /* Adding to a histogram conflicts with existing fit: - * prohibit this. - */ - if (h->fit_type != HISTFIT_NONE) - Die("AddToHistogram(): Can't add to a fitted histogram\n"); - - - /* histogram bins are defined as: score >= bin value, < bin+1 - * -1.9 -> -2 -0.4 -> -1 1.9 -> 1 - * -2.1 -> -3 0.4 -> 0 2.1 -> 2 - */ - score = (int) floor(sc); - - /* Check to see if we must reallocate the histogram. - */ - if (score < h->min) - { - prevsize = h->max - h->min + 1; - moveby = (h->min - score) + h->lumpsize; - newsize = prevsize + moveby; - h->min -= moveby; - - h->histogram = (int *) ReallocOrDie(h->histogram, sizeof(int) * newsize); - memmove(h->histogram+moveby, h->histogram, sizeof(int) * prevsize); - for (i = 0; i < moveby; i++) - h->histogram[i] = 0; - } - else if (score > h->max) - { - prevsize = h->max - h->min + 1; - h->max = h->lumpsize + score; - newsize = h->max - h->min + 1; - - h->histogram = (int *) ReallocOrDie(h->histogram, sizeof(int) * newsize); - for (i = prevsize; i < newsize; i++) - h->histogram[i] = 0; - } - - /* Bump the correct bin. - * The bin number is score - h->min - */ - h->histogram[score - h->min]++; - h->total++; - if (score < h->lowscore) h->lowscore = score; - if (score > h->highscore) h->highscore = score; - - SQD_DPRINTF3(("AddToHistogram(): added %.1f; rounded to %d; in bin %d (%d-%d)\n", - sc, score, score-h->min, h->min, h->max)); - return; -} - - - -/* Function: PrintASCIIHistogram() - * - * Purpose: Print a "prettified" histogram to a file pointer. - * Deliberately a look-and-feel clone of Bill Pearson's - * excellent FASTA output. - * - * Args: fp - open file to print to (stdout works) - * h - histogram to print - */ -void -PrintASCIIHistogram(FILE *fp, struct histogram_s *h) -{ - int units; - int maxbar; - int num; - int i, idx; - char buffer[81]; /* output line buffer */ - int pos; /* position in output line buffer */ - int lowbound, lowcount; /* cutoffs on the low side */ - int highbound, highcount; /* cutoffs on the high side */ - int emptybins = 3; - - /* Find out how we'll scale the histogram. - * We have 59 characters to play with on a - * standard 80-column terminal display: - * leading "%5d %6d %6d|" occupies 20 chars. - * Save the peak position, we'll use it later. - */ - maxbar = 0; - for (i = h->lowscore - h->min; i <= h->highscore - h->min; i++) - if (h->histogram[i] > maxbar) - { - maxbar = h->histogram[i]; /* max height */ - lowbound = i + h->min; /* peak position */ - } - - /* Truncate histogram display on both sides, ad hoc fashion. - * Start from the peak; then move out until we see empty bins, - * and stop. - */ - highbound = lowbound; /* start at peak position */ - for (num = 0; lowbound > h->lowscore; lowbound--) - { - i = lowbound - h->min; - if (h->histogram[i] > 0) { num = 0; continue; } /* reset */ - if (++num == emptybins) { lowbound += emptybins; break; } /* stop */ - } - for (num = 0; highbound < h->highscore; highbound++) - { - i = highbound - h->min; - if (h->histogram[i] > 0) { num = 0; continue; } /* reset */ - if (++num == emptybins) { highbound -= emptybins; break; } /* stop */ - } - /* collect counts outside of bounds */ - for (lowcount = 0, i = h->lowscore - h->min; i <= lowbound - h->min; i++) - lowcount += h->histogram[i]; - for (highcount = 0, i = h->highscore - h->min; i >= highbound - h->min; i--) - highcount += h->histogram[i]; - - /* maxbar might need raised now; then set our units */ - if (lowcount > maxbar) maxbar = lowcount; - if (highcount > maxbar) maxbar = highcount; - units = ((maxbar-1)/ 59) + 1; - - - /* Print the histogram - */ - fprintf(fp, "%5s %6s %6s (one = represents %d sequences)\n", - "score", "obs", "exp", units); - fprintf(fp, "%5s %6s %6s\n", "-----", "---", "---"); - buffer[80] = '\0'; - buffer[79] = '\n'; - for (i = h->lowscore; i <= h->highscore; i++) - { - memset(buffer, ' ', 79 * sizeof(char)); - idx = i - h->min; - - /* Deal with special cases at edges - */ - if (i < lowbound) continue; - else if (i > highbound) continue; - else if (i == lowbound && i != h->lowscore) - { - sprintf(buffer, "<%4d %6d %6s|", i+1, lowcount, "-"); - if (lowcount > 0) { - num = 1+(lowcount-1) / units; - if (num > 60) Die("oops"); - for (pos = 20; num > 0; num--) buffer[pos++] = '='; - } - fputs(buffer, fp); - continue; - } - else if (i == highbound && i != h->highscore) - { - sprintf(buffer, ">%4d %6d %6s|", i, highcount, "-"); - if (highcount > 0) { - num = 1+(highcount-1) / units; - for (pos = 20; num > 0; num--) buffer[pos++] = '='; - } - fputs(buffer, fp); - continue; - } - - /* Deal with most cases - */ - if (h->fit_type != HISTFIT_NONE) - sprintf(buffer, "%5d %6d %6d|", - i, h->histogram[idx], (int) h->expect[idx]); - else - sprintf(buffer, "%5d %6d %6s|", i, h->histogram[idx], "-"); - buffer[20] = ' '; /* sprintf writes a null char */ - - /* Mark the histogram bar for observed hits - */ - if (h->histogram[idx] > 0) { - num = 1 + (h->histogram[idx]-1) / units; - for (pos = 20; num > 0; num--) buffer[pos++] = '='; - } - - /* Mark the theoretically expected value - */ - if (h->fit_type != HISTFIT_NONE && (int) h->expect[idx] > 0) - { - pos = 20 + (int)(h->expect[idx]-1) / units; - if (pos >= 78) pos = 78; /* be careful of buffer bounds */ - buffer[pos] = '*'; - } - - /* Print the line - */ - fputs(buffer, fp); - } - - /* Print details about the statistics - */ - switch (h->fit_type) { - case HISTFIT_NONE: - fprintf(fp, "\n\n%% No statistical fit available\n"); - break; - - case HISTFIT_EVD: - fprintf(fp, "\n\n%% Statistical details of theoretical EVD fit:\n"); - fprintf(fp, " mu = %10.4f\n", h->param[EVD_MU]); - fprintf(fp, " lambda = %10.4f\n", h->param[EVD_LAMBDA]); - fprintf(fp, "chi-sq statistic = %10.4f\n", h->chisq); - fprintf(fp, " P(chi-square) = %10.4g\n", h->chip); - break; - - case HISTFIT_GAUSSIAN: - fprintf(fp, "\n\n%% Statistical details of theoretical Gaussian fit:\n"); - fprintf(fp, " mean = %10.4f\n", h->param[GAUSS_MEAN]); - fprintf(fp, " sd = %10.4f\n", h->param[GAUSS_SD]); - fprintf(fp, "chi-sq statistic = %10.4f\n", h->chisq); - fprintf(fp, " P(chi-square) = %10.4g\n", h->chip); - break; - } - return; -} - - - -/* Function: PrintXMGRHistogram() - * Date: SRE, Wed Nov 12 11:02:00 1997 [St. Louis] - * - * Purpose: Print an XMGR data file that contains two data sets: - * - xy data for the observed histogram - * - xy data for the theoretical histogram - */ -void -PrintXMGRHistogram(FILE *fp, struct histogram_s *h) -{ - int sc; /* integer score in histogram structure */ - double val; - - /* First data set is the observed histogram - */ - for (sc = h->lowscore; sc <= h->highscore; sc++) - if (h->histogram[sc - h->min] > 0) - fprintf(fp, "%-6d %f\n", sc, - (float) h->histogram[sc - h->min]/ (float) h->total); - fprintf(fp, "&\n"); - - /* Second data set is the theoretical histogram - */ - if (h->fit_type != HISTFIT_NONE) - { - for (sc = h->lowscore; sc <= h->highscore; sc++) - { - val = - (1. - ExtremeValueP((float)sc+1, h->param[EVD_MU], h->param[EVD_LAMBDA]))- - (1. - ExtremeValueP((float)sc, h->param[EVD_MU], h->param[EVD_LAMBDA])); - fprintf(fp, "%-6d %f\n", sc, val); - } - fprintf(fp, "&\n"); - } -} - -/* Function: PrintXMGRDistribution() - * Date: SRE, Wed Nov 12 11:02:09 1997 [St. Louis] - * - * Purpose: Print an XMGR data file that contains two data sets: - * - xy data for the observed distribution P(Slowscore; sc <= h->highscore; sc++) - { - cum += h->histogram[sc - h->min]; - fprintf(fp, "%-6d %f\n", sc + 1, (float) cum / (float) h->total); - } - fprintf(fp, "&\n"); - - /* Second data set is the theoretical histogram - */ - if (h->fit_type != HISTFIT_NONE) - { - for (sc = h->lowscore; sc <= h->highscore; sc++) - { - val = (1. - ExtremeValueP((float) sc, h->param[EVD_MU], - h->param[EVD_LAMBDA])); - fprintf(fp, "%-6d %f\n", sc, val); - } - fprintf(fp, "&\n"); - } -} - -/* Function: PrintXMGRRegressionLine() - * Date: SRE, Wed Nov 12 11:02:19 1997 [St. Louis] - * - * Purpose: Print an XMGR data file that contains two data sets: - * - xy data for log log transform of observed distribution P(Slowscore; sc <= h->highscore; sc++) - { - cum += h->histogram[sc - h->min]; - val = log (-1. * log((double) cum / (double) h->total)); - if (cum < h->total) - fprintf(fp, "%-6d %f\n", sc + 1, val); - } - fprintf(fp, "&\n"); - - /* Second data set is the theoretical histogram - */ - if (h->fit_type != HISTFIT_NONE) - { - for (sc = h->lowscore; sc <= h->highscore; sc++) - { - val = log(-1. * log(1. - ExtremeValueP((float) sc, h->param[EVD_MU], - h->param[EVD_LAMBDA]))); - fprintf(fp, "%-6d %f\n", sc, val); - } - fprintf(fp, "&\n"); - } -} - -/* Function: EVDBasicFit() - * Date: SRE, Wed Nov 12 11:02:27 1997 [St. Louis] - * - * Purpose: Fit a score histogram to the extreme value - * distribution. Set the parameters lambda - * and mu in the histogram structure. Fill in the - * expected values in the histogram. Calculate - * a chi-square test as a measure of goodness of fit. - * - * This is the basic version of ExtremeValueFitHistogram(), - * in a nonrobust form: simple linear regression with no - * outlier pruning. - * - * Methods: Uses a linear regression fitting method [Collins88,Lawless82] - * - * Args: h - histogram to fit - * - * Return: (void) - */ -void -EVDBasicFit(struct histogram_s *h) -{ - float *d; /* distribution P(S < x) */ - float *x; /* x-axis of P(Smin */ - float slope, intercept; /* m,b fit from Linefit() */ - float corr; /* correlation coeff of line fit, not used */ - float lambda, mu; /* slope, intercept converted to EVD params */ - - /* Allocations for x, y axes - * distribution d runs from min..max with indices 0..max-min - * i.e. score - min = index into d, x, histogram, and expect - */ - hsize = h->highscore - h->lowscore + 1; - d = (float *) MallocOrDie(sizeof(float) * hsize); - x = (float *) MallocOrDie(sizeof(float) * hsize); - for (idx = 0; idx < hsize; idx++) - d[idx] = x[idx] = 0.; - - /* Calculate P(S < x) distribution from histogram. - * note off-by-one of sc, because histogram bin contains scores between - * x and x+1. - */ - sum = 0; - for (sc = h->lowscore; sc <= h->highscore; sc++) - { - sum += h->histogram[sc - h->min]; - d[sc - h->lowscore] = (float) sum / (float) h->total; - x[sc - h->lowscore] = (float) (sc + 1); - } - - /* Do a linear regression fit to the log[-log(P(Sx))] = -lambda * x + lambda * mu - * so lambda = -m and mu = b/lambda - */ - /* convert y axis to log[-log(P(Slowscore; sc < h->highscore; sc++) - d[sc - h->lowscore] = log(-1. * log(d[sc - h->lowscore])); - - /* do the linear regression */ - Linefit(x, d, hsize-1, &intercept, &slope, &corr); - /* calc mu, lambda */ - lambda = -1. * slope; - mu = intercept / lambda; - - /* Set the EVD parameters in the histogram; - * pass 2 for additional lost degrees of freedom because we fit mu, lambda. - */ - ExtremeValueSetHistogram(h, mu, lambda, h->lowscore, h->highscore, 2); - - free(x); - free(d); - return; -} - - -/* Function: ExtremeValueFitHistogram() - * Date: SRE, Sat Nov 15 17:16:15 1997 [St. Louis] - * - * Purpose: Fit a score histogram to the extreme value - * distribution. Set the parameters lambda - * and mu in the histogram structure. Calculate - * a chi-square test as a measure of goodness of fit. - * - * Methods: Uses a maximum likelihood method [Lawless82]. - * Lower outliers are removed by censoring the data below the peak. - * Upper outliers are removed iteratively using method - * described by [Mott92]. - * - * Args: h - histogram to fit - * censor - TRUE to censor data left of the peak - * high_hint - score cutoff; above this are `real' hits that aren't fit - * - * Return: 1 if fit is judged to be valid. - * else 0 if fit is invalid (too few seqs.) - */ -int -ExtremeValueFitHistogram(struct histogram_s *h, int censor, float high_hint) -{ - float *x; /* array of EVD samples to fit */ - int *y; /* histogram counts */ - int n; /* number of observed samples */ - int z; /* number of censored samples */ - int hsize; /* size of histogram */ - float lambda, mu; /* new estimates of lambda, mu */ - int sc; /* loop index for score */ - int lowbound; /* lower bound of fitted region*/ - int highbound; /* upper bound of fitted region*/ - int new_highbound; - int iteration; - - /* Determine lower bound on fitted region; - * if we're censoring the data, choose the peak of the histogram. - * if we're not, then we take the whole histogram. - */ - lowbound = h->lowscore; - if (censor) - { - int max = -1; - for (sc = h->lowscore; sc <= h->highscore; sc++) - if (h->histogram[sc - h->min] > max) - { - max = h->histogram[sc - h->min]; - lowbound = sc; - } - } - - /* Determine initial upper bound on fitted region. - */ - highbound = MIN(high_hint, h->highscore); - - /* Now, iteratively converge on our lambda, mu: - */ - for (iteration = 0; iteration < 100; iteration++) - { - /* Construct x, y vectors. - */ - x = NULL; - y = NULL; - hsize = highbound - lowbound + 1; - if (hsize < 5) goto FITFAILED; /* require at least 5 bins or we don't fit */ - - x = MallocOrDie(sizeof(float) * hsize); - y = MallocOrDie(sizeof(int) * hsize); - n = 0; - for (sc = lowbound; sc <= highbound; sc++) - { - x[sc-lowbound] = (float) sc + 0.5; /* crude, but tests OK */ - y[sc-lowbound] = h->histogram[sc - h->min]; - n += h->histogram[sc - h->min]; - } - - if (n < 100) goto FITFAILED; /* require fitting to at least 100 points */ - - /* If we're censoring, estimate z, the number of censored guys - * left of the bound. Our initial estimate is crudely that we're - * missing e^-1 of the total distribution (which would be exact - * if we censored exactly at mu; but we censored at the observed peak). - * Subsequent estimates are more exact based on our current estimate of mu. - */ - if (censor) - { - if (iteration == 0) - z = MIN(h->total-n, (int) (0.58198 * (float) n)); - else - { - double psx; - psx = EVDDistribution((float) lowbound, mu, lambda); - z = MIN(h->total-n, (int) ((double) n * psx / (1. - psx))); - } - } - - /* Do an ML fit - */ - if (censor) { - if (! EVDCensoredFit(x, y, hsize, z, (float) lowbound, &mu, &lambda)) - goto FITFAILED; - } else - if (! EVDMaxLikelyFit(x, y, hsize, &mu, &lambda)) - goto FITFAILED; - - /* Find the Eval = 1 point as a new highbound; - * the total number of samples estimated to "belong" to the EVD is n+z - */ - new_highbound = (int) - (mu - (log (-1. * log((double) (n+z-1) / (double)(n+z))) / lambda)); - - free(x); - free(y); - if (new_highbound >= highbound) break; - highbound = new_highbound; - } - - /* Set the histogram parameters; - * - we fit from lowbound to highbound; thus we lose 2 degrees of freedom - * for fitting mu, lambda, but we get 1 back because we're unnormalized - * in this interval, hence we pass 2-1 = 1 as ndegrees. - */ - ExtremeValueSetHistogram(h, mu, lambda, lowbound, highbound, 1); - return 1; - -FITFAILED: - UnfitHistogram(h); - if (x != NULL) free(x); - if (y != NULL) free(y); - return 0; -} - - -/* Function: ExtremeValueSetHistogram() - * - * Purpose: Instead of fitting the histogram to an EVD, - * simply set the EVD parameters from an external source. - * - * Args: h - the histogram to set - * mu - mu location parameter - * lambda - lambda scale parameter - * lowbound - low bound of the histogram that was fit - * highbound- high bound of histogram that was fit - * ndegrees - extra degrees of freedom to subtract in X^2 test: - * typically 0 if mu, lambda are parametric, - * else 2 if mu, lambda are estimated from data - */ -void -ExtremeValueSetHistogram(struct histogram_s *h, float mu, float lambda, - float lowbound, float highbound, int ndegrees) -{ - int sc; - int hsize, idx; - int nbins; - float delta; - - UnfitHistogram(h); - h->fit_type = HISTFIT_EVD; - h->param[EVD_LAMBDA] = lambda; - h->param[EVD_MU] = mu; - - hsize = h->max - h->min + 1; - h->expect = (float *) MallocOrDie(sizeof(float) * hsize); - for (idx = 0; idx < hsize; idx++) - h->expect[idx] = 0.; - - /* Calculate the expected values for the histogram. - */ - for (sc = h->min; sc <= h->max; sc++) - h->expect[sc - h->min] = - ExtremeValueE((float)(sc), h->param[EVD_MU], h->param[EVD_LAMBDA], - h->total) - - ExtremeValueE((float)(sc+1), h->param[EVD_MU], h->param[EVD_LAMBDA], - h->total); - - /* Calculate the goodness-of-fit (within whole region) - */ - h->chisq = 0.; - nbins = 0; - for (sc = lowbound; sc <= highbound; sc++) - if (h->expect[sc-h->min] >= 5. && h->histogram[sc-h->min] >= 5) - { - delta = (float) h->histogram[sc-h->min] - h->expect[sc-h->min]; - h->chisq += delta * delta / h->expect[sc-h->min]; - nbins++; - } - - /* Since we fit the whole histogram, there is at least - * one constraint on chi-square: the normalization to h->total. - */ - if (nbins > 1 + ndegrees) - h->chip = (float) IncompleteGamma((double)(nbins-1-ndegrees)/2., - (double) h->chisq/2.); - else - h->chip = 0.; -} - - - -/* Function: GaussianFitHistogram() - * - * Purpose: Fit a score histogram to a Gaussian distribution. - * Set the parameters mean and sd in the histogram - * structure, as well as a chi-squared test for - * goodness of fit. - * - * Args: h - histogram to fit - * high_hint - score cutoff; above this are `real' hits that aren't fit - * - * Return: 1 if fit is judged to be valid. - * else 0 if fit is invalid (too few seqs.) - */ -int -GaussianFitHistogram(struct histogram_s *h, float high_hint) -{ - float sum; - float sqsum; - float delta; - int sc; - int nbins; - int hsize, idx; - - /* Clear any previous fitting from the histogram. - */ - UnfitHistogram(h); - - /* Determine if we have enough hits to fit the histogram; - * arbitrarily require 1000. - */ - if (h->total < 1000) { h->fit_type = HISTFIT_NONE; return 0; } - - /* Simplest algorithm for mean and sd; - * no outlier detection yet (not even using high_hint) - * - * Magic 0.5 correction is because our histogram is for - * scores between x and x+1; we estimate the expectation - * (roughly) as x + 0.5. - */ - sum = sqsum = 0.; - for (sc = h->lowscore; sc <= h->highscore; sc++) - { - delta = (float) sc + 0.5; - sum += (float) h->histogram[sc-h->min] * delta; - sqsum += (float) h->histogram[sc-h->min] * delta * delta; - } - h->fit_type = HISTFIT_GAUSSIAN; - h->param[GAUSS_MEAN] = sum / (float) h->total; - h->param[GAUSS_SD] = sqrt((sqsum - (sum*sum/(float)h->total)) / - (float)(h->total-1)); - - /* Calculate the expected values for the histogram. - * Note that the magic 0.5 correction appears again. - * Calculating difference between distribution functions for Gaussian - * would be correct but hard. - */ - hsize = h->max - h->min + 1; - h->expect = (float *) MallocOrDie(sizeof(float) * hsize); - for (idx = 0; idx < hsize; idx++) - h->expect[idx] = 0.; - - for (sc = h->min; sc <= h->max; sc++) - { - delta = (float) sc + 0.5 - h->param[GAUSS_MEAN]; - h->expect[sc - h->min] = - (float) h->total * ((1. / (h->param[GAUSS_SD] * sqrt(2.*3.14159))) * - (exp(-1.* delta*delta / (2. * h->param[GAUSS_SD] * h->param[GAUSS_SD])))); - } - - /* Calculate the goodness-of-fit (within region that was fitted) - */ - h->chisq = 0.; - nbins = 0; - for (sc = h->lowscore; sc <= h->highscore; sc++) - if (h->expect[sc-h->min] >= 5. && h->histogram[sc-h->min] >= 5) - { - delta = (float) h->histogram[sc-h->min] - h->expect[sc-h->min]; - h->chisq += delta * delta / h->expect[sc-h->min]; - nbins++; - } - /* -1 d.f. for normalization; -2 d.f. for two free parameters */ - if (nbins > 3) - h->chip = (float) IncompleteGamma((double)(nbins-3)/2., - (double) h->chisq/2.); - else - h->chip = 0.; - - return 1; -} - - -/* Function: GaussianSetHistogram() - * - * Purpose: Instead of fitting the histogram to a Gaussian, - * simply set the Gaussian parameters from an external source. - */ -void -GaussianSetHistogram(struct histogram_s *h, float mean, float sd) -{ - int sc; - int hsize, idx; - int nbins; - float delta; - - UnfitHistogram(h); - h->fit_type = HISTFIT_GAUSSIAN; - h->param[GAUSS_MEAN] = mean; - h->param[GAUSS_SD] = sd; - - /* Calculate the expected values for the histogram. - */ - hsize = h->max - h->min + 1; - h->expect = (float *) MallocOrDie(sizeof(float) * hsize); - for (idx = 0; idx < hsize; idx++) - h->expect[idx] = 0.; - - /* Note: ideally we'd use the Gaussian distribution function - * to find the histogram occupancy in the window sc..sc+1. - * However, the distribution function is hard to calculate. - * Instead, estimate the histogram by taking the density at sc+0.5. - */ - for (sc = h->min; sc <= h->max; sc++) - { - delta = ((float)sc + 0.5) - h->param[GAUSS_MEAN]; - h->expect[sc - h->min] = - (float) h->total * ((1. / (h->param[GAUSS_SD] * sqrt(2.*3.14159))) * - (exp(-1.*delta*delta / (2. * h->param[GAUSS_SD] * h->param[GAUSS_SD])))); - } - - /* Calculate the goodness-of-fit (within whole region) - */ - h->chisq = 0.; - nbins = 0; - for (sc = h->lowscore; sc <= h->highscore; sc++) - if (h->expect[sc-h->min] >= 5. && h->histogram[sc-h->min] >= 5) - { - delta = (float) h->histogram[sc-h->min] - h->expect[sc-h->min]; - h->chisq += delta * delta / h->expect[sc-h->min]; - nbins++; - } - /* -1 d.f. for normalization */ - if (nbins > 1) - h->chip = (float) IncompleteGamma((double)(nbins-1)/2., - (double) h->chisq/2.); - else - h->chip = 0.; -} - - - -/* Function: EVDDensity() - * Date: SRE, Sat Nov 15 19:37:52 1997 [St. Louis] - * - * Purpose: Return the extreme value density P(S=x) at - * a given point x, for an EVD controlled by - * parameters mu and lambda. - */ -double -EVDDensity(float x, float mu, float lambda) -{ - return (lambda * exp(-1. * lambda * (x - mu) - - exp(-1. * lambda * (x - mu)))); -} - -/* Function: EVDDistribution() - * Date: SRE, Tue Nov 18 08:02:22 1997 [St. Louis] - * - * Purpose: Returns the extreme value distribution P(S < x) - * evaluated at x, for an EVD controlled by parameters - * mu and lambda. - */ -double -EVDDistribution(float x, float mu, float lambda) -{ - return (exp(-1. * exp(-1. * lambda * (x - mu)))); -} - -/* Function: ExtremeValueP() - * - * Purpose: Calculate P(S>x) according to an extreme - * value distribution, given x and the parameters - * of the distribution (characteristic - * value mu, decay constant lambda). - * - * This function is exquisitely prone to - * floating point exceptions if it isn't coded - * carefully. - * - * Args: x = score - * mu = characteristic value of extreme value distribution - * lambda = decay constant of extreme value distribution - * - * Return: P(S>x) - */ -double -ExtremeValueP(float x, float mu, float lambda) -{ - double y; - /* avoid exceptions near P=1.0 */ - /* typical 32-bit sys: if () < -3.6, return 1.0 */ - if ((lambda * (x - mu)) <= -1. * log(-1. * log(DBL_EPSILON))) return 1.0; - /* avoid underflow fp exceptions near P=0.0*/ - if ((lambda * (x - mu)) >= 2.3 * (double) DBL_MAX_10_EXP) return 0.0; - /* a roundoff issue arises; use 1 - e^-x --> x for small x */ - y = exp(-1. * lambda * (x - mu)); - if (y < 1e-7) return y; - else return (1.0 - exp(-1. * y)); -} - - -/* Function: ExtremeValueP2() - * - * Purpose: Calculate P(S>x) in a database of size N, - * using P(S>x) for a single sequence, according - * to a Poisson distribution. - * - * Args: x = score - * mu = characteristic value of extreme value distribution - * lambda = decay constant of extreme value distribution - * N = number of trials (number of sequences) - * - * Return: P(S>x) for database of size N - */ -double -ExtremeValueP2(float x, float mu, float lambda, int N) -{ - double y; - y = N * ExtremeValueP(x,mu,lambda); - if (y < 1e-7) return y; - else return (1.0 - exp(-1. * y)); -} - -/* Function: ExtremeValueE() - * - * Purpose: Calculate E(S>x) in a database of size N, - * using P(S>x) for a single sequence: simply np. - * - * Args: x = score - * mu = characteristic value of extreme value distribution - * lambda = decay constant of extreme value distribution - * N = number of trials (number of sequences) - * - * Return: E(S>x) for database of size N - */ -double -ExtremeValueE(float x, float mu, float lambda, int N) -{ - return (double)N * ExtremeValueP(x,mu,lambda); -} - - -/* Function: EVDrandom() - * - * Purpose: Randomly sample an x from an EVD. - * Trivially done by the transformation method, since - * the distribution is analytical: - * x = \mu - \frac{\log \left[ -\log P(S= c - * lambda - a lambda to test - * ret_f - RETURN: 4.2.2 evaluated at lambda - * ret_df - RETURN: first derivative of 4.2.2 evaluated at lambda - * - * Return: (void) - */ -void -Lawless422(float *x, int *y, int n, int z, float c, - float lambda, float *ret_f, float *ret_df) -{ - double esum; /* \sum e^(-lambda xi) + z term */ - double xesum; /* \sum xi e^(-lambda xi) + z term */ - double xxesum; /* \sum xi^2 e^(-lambda xi) + z term */ - double xsum; /* \sum xi (no z term) */ - double mult; /* histogram count multiplier */ - double total; /* total samples */ - int i; - - esum = xesum = xsum = xxesum = total = 0.; - for (i = 0; i < n; i++) - { - mult = (y == NULL) ? 1. : (double) y[i]; - xsum += mult * x[i]; - esum += mult * exp(-1. * lambda * x[i]); - xesum += mult * x[i] * exp(-1. * lambda * x[i]); - xxesum += mult * x[i] * x[i] * exp(-1. * lambda * x[i]); - total += mult; - } - - /* Add z terms for censored data - */ - esum += (double) z * exp(-1. * lambda * c); - xesum += (double) z * c * exp(-1. * lambda * c); - xxesum += (double) z * c * c * exp(-1. * lambda * c); - - *ret_f = 1./lambda - xsum / total + xesum / esum; - *ret_df = ((xesum / esum) * (xesum / esum)) - - (xxesum / esum) - - (1. / (lambda * lambda)); - - return; -} - - - -/* Function: EVDMaxLikelyFit() - * Date: SRE, Fri Nov 14 07:56:29 1997 [St. Louis] - * - * Purpose: Given a list or a histogram of EVD-distributed samples, - * find maximum likelihood parameters lambda and - * mu. - * - * Algorithm: Uses approach described in [Lawless82]. Solves - * for lambda using Newton/Raphson iterations; - * then substitutes lambda into Lawless' equation 4.1.5 - * to get mu. - * - * Newton/Raphson algorithm developed from description in - * Numerical Recipes in C [Press88]. - * - * Args: x - list of EVD distributed samples or x-axis of histogram - * c - NULL, or y-axis of histogram - * n - number of samples, or number of histogram bins - * ret_mu : RETURN: ML estimate of mu - * ret_lambda : RETURN: ML estimate of lambda - * - * Return: 1 on success; 0 on any failure - */ -int -EVDMaxLikelyFit(float *x, int *c, int n, float *ret_mu, float *ret_lambda) -{ - float lambda, mu; - float fx; /* f(x) */ - float dfx; /* f'(x) */ - double esum; /* \sum e^(-lambda xi) */ - double mult; - double total; - float tol = 1e-5; - int i; - - /* 1. Find an initial guess at lambda: linear regression here? - */ - lambda = 0.2; - - /* 2. Use Newton/Raphson to solve Lawless 4.1.6 and find ML lambda - */ - for (i = 0; i < 100; i++) - { - Lawless416(x, c, n, lambda, &fx, &dfx); - if (fabs(fx) < tol) break; /* success */ - lambda = lambda - fx / dfx; /* Newton/Raphson is simple */ - if (lambda <= 0.) lambda = 0.001; /* but be a little careful */ - } - - /* 2.5: If we did 100 iterations but didn't converge, Newton/Raphson failed. - * Resort to a bisection search. Worse convergence speed - * but guaranteed to converge (unlike Newton/Raphson). - * We assume (!?) that fx is a monotonically decreasing function of x; - * i.e. fx > 0 if we are left of the root, fx < 0 if we - * are right of the root. - */ - if (i == 100) - { - float left, right, mid; - SQD_DPRINTF2(("EVDMaxLikelyFit(): Newton/Raphson failed; switchover to bisection")); - - /* First we need to bracket the root */ - lambda = right = left = 0.2; - Lawless416(x, c, n, lambda, &fx, &dfx); - if (fx < 0.) - { /* fix right; search left. */ - do { - left -= 0.1; - if (left < 0.) { - SQD_DPRINTF2(("EVDMaxLikelyFit(): failed to bracket root")); - return 0; - } - Lawless416(x, c, n, left, &fx, &dfx); - } while (fx < 0.); - } - else - { /* fix left; search right. */ - do { - right += 0.1; - Lawless416(x, c, n, right, &fx, &dfx); - if (right > 100.) { - SQD_DPRINTF2(("EVDMaxLikelyFit(): failed to bracket root")); - return 0; - } - } while (fx > 0.); - } - /* now we bisection search in left/right interval */ - for (i = 0; i < 100; i++) - { - mid = (left + right) / 2.; - Lawless416(x, c, n, mid, &fx, &dfx); - if (fabs(fx) < tol) break; /* success */ - if (fx > 0.) left = mid; - else right = mid; - } - if (i == 100) { - SQD_DPRINTF2(("EVDMaxLikelyFit(): even the bisection search failed")); - return 0; - } - lambda = mid; - } - - /* 3. Substitute into Lawless 4.1.5 to find mu - */ - esum = 0.; - total = 0.; - for (i = 0; i < n; i++) - { - mult = (c == NULL) ? 1. : (double) c[i]; - esum += mult * exp(-1 * lambda * x[i]); - total += mult; - } - mu = -1. * log(esum / total) / lambda; - - *ret_lambda = lambda; - *ret_mu = mu; - return 1; -} - - -/* Function: EVDCensoredFit() - * Date: SRE, Mon Nov 17 10:01:05 1997 [St. Louis] - * - * Purpose: Given a /left-censored/ list or histogram of EVD-distributed - * samples, as well as the number of censored samples z and the - * censoring value c, - * find maximum likelihood parameters lambda and - * mu. - * - * Algorithm: Uses approach described in [Lawless82]. Solves - * for lambda using Newton/Raphson iterations; - * then substitutes lambda into Lawless' equation 4.2.3 - * to get mu. - * - * Newton/Raphson algorithm developed from description in - * Numerical Recipes in C [Press88]. - * - * Args: x - list of EVD distributed samples or x-axis of histogram - * y - NULL, or y-axis of histogram - * n - number of observed samples,or number of histogram bins - * z - number of censored samples - * c - censoring value (all x_i >= c) - * ret_mu : RETURN: ML estimate of mu - * ret_lambda : RETURN: ML estimate of lambda - * - * Return: (void) - */ -int -EVDCensoredFit(float *x, int *y, int n, int z, float c, - float *ret_mu, float *ret_lambda) -{ - float lambda, mu; - float fx; /* f(x) */ - float dfx; /* f'(x) */ - double esum; /* \sum e^(-lambda xi) */ - double mult; - double total; - float tol = 1e-5; - int i; - - /* 1. Find an initial guess at lambda: linear regression here? - */ - lambda = 0.2; - - /* 2. Use Newton/Raphson to solve Lawless 4.2.2 and find ML lambda - */ - for (i = 0; i < 100; i++) - { - Lawless422(x, y, n, z, c, lambda, &fx, &dfx); - if (fabs(fx) < tol) break; /* success */ - lambda = lambda - fx / dfx; /* Newton/Raphson is simple */ - if (lambda <= 0.) lambda = 0.001; /* but be a little careful */ - } - - /* 2.5: If we did 100 iterations but didn't converge, Newton/Raphson failed. - * Resort to a bisection search. Worse convergence speed - * but guaranteed to converge (unlike Newton/Raphson). - * We assume (!?) that fx is a monotonically decreasing function of x; - * i.e. fx > 0 if we are left of the root, fx < 0 if we - * are right of the root. - */ - if (i == 100) - { - float left, right, mid; - /* First we need to bracket the root */ - SQD_DPRINTF2(("EVDCensoredFit(): Newton/Raphson failed; switched to bisection")); - lambda = right = left = 0.2; - Lawless422(x, y, n, z, c, lambda, &fx, &dfx); - if (fx < 0.) - { /* fix right; search left. */ - do { - left -= 0.03; - if (left < 0.) { - SQD_DPRINTF2(("EVDCensoredFit(): failed to bracket root")); - return 0; - } - Lawless422(x, y, n, z, c, left, &fx, &dfx); - } while (fx < 0.); - } - else - { /* fix left; search right. */ - do { - right += 0.1; - Lawless422(x, y, n, z, c, left, &fx, &dfx); - if (right > 100.) { - SQD_DPRINTF2(("EVDCensoredFit(): failed to bracket root")); - return 0; - } - } while (fx > 0.); - } - /* now we bisection search in left/right interval */ - for (i = 0; i < 100; i++) - { - mid = (left + right) / 2.; - Lawless422(x, y, n, z, c, left, &fx, &dfx); - if (fabs(fx) < tol) break; /* success */ - if (fx > 0.) left = mid; - else right = mid; - } - if (i == 100) { - SQD_DPRINTF2(("EVDCensoredFit(): even the bisection search failed")); - return 0; - } - lambda = mid; - } - - /* 3. Substitute into Lawless 4.2.3 to find mu - */ - esum = total = 0.; - for (i = 0; i < n; i++) - { - mult = (y == NULL) ? 1. : (double) y[i]; - esum += mult * exp(-1. * lambda * x[i]); - total += mult; - } - esum += (double) z * exp(-1. * lambda * c); /* term from censored data */ - mu = -1. * log(esum / total) / lambda; - - *ret_lambda = lambda; - *ret_mu = mu; - return 1; -} - - - diff --git a/forester/archive/RIO/others/hmmer/src/hmmalign.c b/forester/archive/RIO/others/hmmer/src/hmmalign.c deleted file mode 100644 index 27d0802..0000000 --- a/forester/archive/RIO/others/hmmer/src/hmmalign.c +++ /dev/null @@ -1,325 +0,0 @@ -/************************************************************ - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - ************************************************************/ - -/* hmmalign.c - * SRE, Thu Dec 18 16:05:29 1997 [St. Louis] - * - * main() for aligning a set of sequences to an HMM. - * RCS $Id: hmmalign.c,v 1.1.1.1 2005/03/22 08:34:00 cmzmasek Exp $ - */ - -#include -#include - -#include "structs.h" /* data structures, macros, #define's */ -#include "config.h" /* compile-time configuration constants */ -#include "funcs.h" /* function declarations */ -#include "globals.h" /* alphabet global variables */ -#include "squid.h" /* general sequence analysis library */ -#include "msa.h" /* squid's multiple alignment i/o */ - -static char banner[] = "hmmalign - align sequences to an HMM profile"; - -static char usage[] = "\ -Usage: hmmalign [-options] \n\ -Available options are:\n\ - -h : help; print brief help on version and usage\n\ - -m : only print symbols aligned to match states\n\ - -o : save alignment in file in SELEX format\n\ - -q : quiet - suppress verbose banner\n\ -"; - -static char experts[] = "\ - --informat : sequence file is in format , not FASTA\n\ - --mapali : include alignment in file using map in HMM\n\ - --withali : include alignment to (fixed) alignment in file \n\ -\n"; - -static struct opt_s OPTIONS[] = { - { "-h", TRUE, sqdARG_NONE }, - { "-m", TRUE, sqdARG_NONE } , - { "-o", TRUE, sqdARG_STRING }, - { "-q", TRUE, sqdARG_NONE }, - { "--informat",FALSE, sqdARG_STRING }, - { "--mapali", FALSE, sqdARG_STRING }, - { "--withali", FALSE, sqdARG_STRING }, -}; -#define NOPTIONS (sizeof(OPTIONS) / sizeof(struct opt_s)) - -static void include_alignment(char *seqfile, struct plan7_s *hmm, int do_mapped, - char ***rseq, char ***dsq, SQINFO **sqinfo, - struct p7trace_s ***tr, int *nseq); - -int -main(int argc, char **argv) -{ - char *hmmfile; /* file to read HMMs from */ - HMMFILE *hmmfp; /* opened hmmfile for reading */ - struct plan7_s *hmm; /* HMM to align to */ - char *seqfile; /* file to read target sequence from */ - int format; /* format of seqfile */ - char **rseq; /* raw, unaligned sequences */ - SQINFO *sqinfo; /* info associated with sequences */ - char **dsq; /* digitized raw sequences */ - int nseq; /* number of sequences */ - float *wgt; /* weights to assign to alignment */ - MSA *msa; /* alignment that's created */ - int i; - struct p7trace_s **tr; /* traces for aligned sequences */ - - char *optname; /* name of option found by Getopt() */ - char *optarg; /* argument found by Getopt() */ - int optind; /* index in argv[] */ - int be_quiet; /* TRUE to suppress verbose banner */ - int matchonly; /* TRUE to show only match state syms */ - char *outfile; /* optional alignment output file */ - FILE *ofp; /* handle on alignment output file */ - char *withali; /* name of additional alignment file to align */ - char *mapali; /* name of additional alignment file to map */ - - /*********************************************** - * Parse command line - ***********************************************/ - - format = SQFILE_UNKNOWN; /* default: autodetect format */ - matchonly = FALSE; - outfile = NULL; - be_quiet = FALSE; - withali = NULL; - mapali = NULL; - - while (Getopt(argc, argv, OPTIONS, NOPTIONS, usage, - &optind, &optname, &optarg)) { - if (strcmp(optname, "-m") == 0) matchonly= TRUE; - else if (strcmp(optname, "-o") == 0) outfile = optarg; - else if (strcmp(optname, "-q") == 0) be_quiet = TRUE; - else if (strcmp(optname, "--mapali") == 0) mapali = optarg; - else if (strcmp(optname, "--withali") == 0) withali = optarg; - else if (strcmp(optname, "--informat") == 0) { - format = String2SeqfileFormat(optarg); - if (format == SQFILE_UNKNOWN) - Die("unrecognized sequence file format \"%s\"", optarg); - } - else if (strcmp(optname, "-h") == 0) - { - Banner(stdout, banner); - puts(usage); - puts(experts); - exit(EXIT_SUCCESS); - } - } - if (argc - optind != 2) - Die("Incorrect number of arguments.\n%s\n", usage); - - hmmfile = argv[optind++]; - seqfile = argv[optind++]; - - /*********************************************** - * Open HMM file (might be in HMMERDB or current directory). - * Read a single HMM from it. - * - * Currently hmmalign disallows the J state and - * only allows one domain per sequence. To preserve - * the S/W entry information, the J state is explicitly - * disallowed, rather than calling a Plan7*Config() function. - * this is a workaround in 2.1 for the 2.0.x "yo!" bug. - ***********************************************/ - - if ((hmmfp = HMMFileOpen(hmmfile, "HMMERDB")) == NULL) - Die("Failed to open HMM file %s\n%s", hmmfile, usage); - if (!HMMFileRead(hmmfp, &hmm)) - Die("Failed to read any HMMs from %s\n", hmmfile); - HMMFileClose(hmmfp); - if (hmm == NULL) - Die("HMM file %s corrupt or in incorrect format? Parse failed", hmmfile); - hmm->xt[XTE][MOVE] = 1.; /* only 1 domain/sequence ("global" alignment) */ - hmm->xt[XTE][LOOP] = 0.; - P7Logoddsify(hmm, TRUE); - /* do we have the map we might need? */ - if (mapali != NULL && ! (hmm->flags & PLAN7_MAP)) - Die("HMMER: HMM file %s has no map; you can't use --mapali.", hmmfile); - - /*********************************************** - * Open sequence file in current directory. - * Read all seqs from it. - ***********************************************/ - - if (! ReadMultipleRseqs(seqfile, format, &rseq, &sqinfo, &nseq)) - Die("Failed to read any sequences from file %s", seqfile); - - /*********************************************** - * Show the banner - ***********************************************/ - - if (! be_quiet) - { - Banner(stdout, banner); - printf( "HMM file: %s\n", hmmfile); - printf( "Sequence file: %s\n", seqfile); - printf("- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n\n"); - } - - /*********************************************** - * Do the work - ***********************************************/ - - /* Allocations and initializations. - */ - dsq = MallocOrDie(sizeof(char *) * nseq); - tr = MallocOrDie(sizeof(struct p7trace_s *) * nseq); - - /* Align each sequence to the model, collect traces - */ - for (i = 0; i < nseq; i++) - { - dsq[i] = DigitizeSequence(rseq[i], sqinfo[i].len); - - if (P7ViterbiSize(sqinfo[i].len, hmm->M) <= RAMLIMIT) - (void) P7Viterbi(dsq[i], sqinfo[i].len, hmm, &(tr[i])); - else - (void) P7SmallViterbi(dsq[i], sqinfo[i].len, hmm, &(tr[i])); - } - - /* Include an aligned alignment, if desired. - */ - if (mapali != NULL) - include_alignment(mapali, hmm, TRUE, &rseq, &dsq, &sqinfo, &tr, &nseq); - if (withali != NULL) - include_alignment(withali, hmm, FALSE, &rseq, &dsq, &sqinfo, &tr, &nseq); - - /* Turn traces into a multiple alignment - */ - wgt = MallocOrDie(sizeof(float) * nseq); - FSet(wgt, nseq, 1.0); - msa = P7Traces2Alignment(dsq, sqinfo, wgt, nseq, hmm->M, tr, matchonly); - - /*********************************************** - * Output the alignment - ***********************************************/ - - if (outfile != NULL && (ofp = fopen(outfile, "w")) != NULL) - { - WriteStockholm(ofp, msa); - printf("Alignment saved in file %s\n", outfile); - fclose(ofp); - } - else - WriteStockholm(stdout, msa); - - /*********************************************** - * Cleanup and exit - ***********************************************/ - - for (i = 0; i < nseq; i++) - { - P7FreeTrace(tr[i]); - FreeSequence(rseq[i], &(sqinfo[i])); - free(dsq[i]); - } - MSAFree(msa); - FreePlan7(hmm); - free(sqinfo); - free(rseq); - free(dsq); - free(wgt); - free(tr); - - SqdClean(); - return 0; -} - - -/* Function: include_alignment() - * Date: SRE, Sun Jul 5 15:25:13 1998 [St. Louis] - * - * Purpose: Given the name of a multiple alignment file, - * align that alignment to the HMM, and add traces - * to an existing array of traces. If do_mapped - * is TRUE, we use the HMM's map file. If not, - * we use P7ViterbiAlignAlignment(). - * - * Args: seqfile - name of alignment file - * hmm - model to align to - * do_mapped- TRUE if we're to use the HMM's alignment map - * rsq - RETURN: array of rseqs to add to - * dsq - RETURN: array of dsq to add to - * sqinfo - RETURN: array of SQINFO to add to - * tr - RETURN: array of traces to add to - * nseq - RETURN: number of seqs - * - * Returns: new, realloc'ed arrays for rsq, dsq, sqinfo, tr; nseq is - * increased to nseq+ainfo.nseq. - */ -static void -include_alignment(char *seqfile, struct plan7_s *hmm, int do_mapped, - char ***rsq, char ***dsq, SQINFO **sqinfo, - struct p7trace_s ***tr, int *nseq) -{ - int format; /* format of alignment file */ - MSA *msa; /* alignment to align to */ - MSAFILE *afp; - SQINFO *newinfo; /* sqinfo array from msa */ - char **newdsq; - char **newrseq; - int idx; /* counter over aseqs */ - struct p7trace_s *master; /* master trace */ - struct p7trace_s **addtr; /* individual traces for aseq */ - - format = MSAFILE_UNKNOWN; /* invoke Babelfish */ - if ((afp = MSAFileOpen(seqfile, format, NULL)) == NULL) - Die("Alignment file %s could not be opened for reading", seqfile); - if ((msa = MSAFileRead(afp)) == NULL) - Die("Failed to read an alignment from %s\n", seqfile); - MSAFileClose(afp); - for (idx = 0; idx < msa->nseq; idx++) - s2upper(msa->aseq[idx]); - newinfo = MSAToSqinfo(msa); - - /* Verify checksums before mapping */ - if (do_mapped && GCGMultchecksum(msa->aseq, msa->nseq) != hmm->checksum) - Die("The checksums for alignment file %s and the HMM alignment map don't match.", - seqfile); - /* Get a master trace */ - if (do_mapped) master = MasterTraceFromMap(hmm->map, hmm->M, msa->alen); - else master = P7ViterbiAlignAlignment(msa, hmm); - - /* convert to individual traces */ - ImposeMasterTrace(msa->aseq, msa->nseq, master, &addtr); - /* add those traces to existing ones */ - *tr = MergeTraceArrays(*tr, *nseq, addtr, msa->nseq); - - /* additional bookkeeping: add to dsq, sqinfo */ - *rsq = ReallocOrDie((*rsq), sizeof(char *) * (*nseq + msa->nseq)); - DealignAseqs(msa->aseq, msa->nseq, &newrseq); - for (idx = *nseq; idx < *nseq + msa->nseq; idx++) - (*rsq)[idx] = newrseq[idx - (*nseq)]; - free(newrseq); - - *dsq = ReallocOrDie((*dsq), sizeof(char *) * (*nseq + msa->nseq)); - DigitizeAlignment(msa, &newdsq); - for (idx = *nseq; idx < *nseq + msa->nseq; idx++) - (*dsq)[idx] = newdsq[idx - (*nseq)]; - free(newdsq); - /* unnecessarily complex, but I can't be bothered... */ - *sqinfo = ReallocOrDie((*sqinfo), sizeof(SQINFO) * (*nseq + msa->nseq)); - for (idx = *nseq; idx < *nseq + msa->nseq; idx++) - SeqinfoCopy(&((*sqinfo)[idx]), &(newinfo[idx - (*nseq)])); - - *nseq = *nseq + msa->nseq; - - /* Cleanup */ - P7FreeTrace(master); - MSAFree(msa); - /* Return */ - return; -} - - - diff --git a/forester/archive/RIO/others/hmmer/src/hmmbuild.c b/forester/archive/RIO/others/hmmer/src/hmmbuild.c deleted file mode 100644 index b9308c7..0000000 --- a/forester/archive/RIO/others/hmmer/src/hmmbuild.c +++ /dev/null @@ -1,1129 +0,0 @@ -/************************************************************ - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - ************************************************************/ - -/* hmmbuild.c - * SRE, Mon Nov 18 12:41:29 1996 - * - * main() for HMM construction from an alignment. - * CVS $Id: hmmbuild.c,v 1.1.1.1 2005/03/22 08:34:06 cmzmasek Exp $ - */ - -#include -#include -#include - -#include "structs.h" /* data structures, macros, #define's */ -#include "config.h" /* compile-time configuration constants */ -#include "funcs.h" /* function declarations */ -#include "globals.h" /* alphabet global variables */ -#include "squid.h" /* general sequence analysis library */ -#include "msa.h" /* squid's multiple alignment i/o */ - -static char banner[] = "hmmbuild - build a hidden Markov model from an alignment"; - -static char usage[] = "\ -Usage: hmmbuild [-options] \n\ - Available options are:\n\ - -h : help; print brief help on version and usage\n\ - -n : name; name this (first) HMM \n\ - -o : re-save annotated alignment to \n\ - -A : append; append this HMM to \n\ - -F : force; allow overwriting of \n\ -\n\ - Alternative search algorithm styles: (default: hmmls domain alignment)\n\ - -f : multi-hit local (hmmfs style)\n\ - -g : global alignment (hmms style, Needleman/Wunsch)\n\ - -s : local alignment (hmmsw style, Smith/Waterman)\n\ -"; - -static char experts[] = "\ - Alternative model construction strategies: (default: MAP)\n\ - --fast : Krogh/Haussler fast heuristic construction (see --gapmax)\n\ - --hand : manual construction (requires annotated alignment)\n\ -\n\ - Expert customization of parameters and priors:\n\ - --null : read null (random sequence) model from \n\ - --pam : heuristic PAM-based prior, using BLAST PAM matrix in \n\ - --prior : read Dirichlet prior parameters from \n\ -\n\ - Alternative sequence weighting strategies: (default: GSC weights)\n\ - --wblosum : Henikoff simple filter weights (see --idlevel)\n\ - --wgsc : Gerstein/Sonnhammer/Chothia tree weights (default)\n\ - --wme : maximum entropy (ME)\n\ - --wpb : Henikoff position-based weights\n\ - --wvoronoi : Sibbald/Argos Voronoi weights\n\ - --wnone : don't do any weighting\n\ - --noeff : don't use effective sequence number; just use nseq\n\ - --pbswitch : set switch from GSC to position-based wgts at > n seqs\n\ -\n\ - Forcing an alphabet: (normally autodetected)\n\ - --amino : override autodetection, assert that seqs are protein\n\ - --nucleic : override autodetection, assert that seqs are DNA/RNA\n\ -\n\ - Other expert options:\n\ - --archpri : set architecture size prior to {0.85} [0..1]\n\ - --binary : save the model in binary format, not ASCII text\n\ - --cfile : save count vectors to \n\ - --gapmax : max fraction of gaps in mat column {0.50} [0..1]\n\ - --idlevel : set frac. id level used by eff. nseq and --wblosum {0.62}\n\ - --informat : input alignment is in format , not Stockholm\n\ - --pamwgt : set weight on PAM-based prior to {20.}[>=0]\n\ - --swentry : set S/W aggregate entry prob. to {0.5}\n\ - --swexit : set S/W aggregate exit prob. to {0.5}\n\ - --verbose : print boring information\n\ -\n"; - -static struct opt_s OPTIONS[] = { - { "-f", TRUE, sqdARG_NONE }, - { "-g", TRUE, sqdARG_NONE }, - { "-h", TRUE, sqdARG_NONE }, - { "-n", TRUE, sqdARG_STRING}, - { "-o", TRUE, sqdARG_STRING}, - { "-s", TRUE, sqdARG_NONE }, - { "-A", TRUE, sqdARG_NONE }, - { "-F", TRUE, sqdARG_NONE }, - { "--amino", FALSE, sqdARG_NONE }, - { "--archpri", FALSE, sqdARG_FLOAT }, - { "--binary", FALSE, sqdARG_NONE }, - { "--cfile", FALSE, sqdARG_STRING}, - { "--fast", FALSE, sqdARG_NONE}, - { "--gapmax", FALSE, sqdARG_FLOAT }, - { "--hand", FALSE, sqdARG_NONE}, - { "--idlevel", FALSE, sqdARG_FLOAT }, - { "--informat",FALSE, sqdARG_STRING }, - { "--noeff", FALSE, sqdARG_NONE }, - { "--nucleic", FALSE, sqdARG_NONE }, - { "--null", FALSE, sqdARG_STRING }, - { "--pam", FALSE, sqdARG_STRING }, - { "--pamwgt", FALSE, sqdARG_FLOAT }, - { "--pbswitch",FALSE, sqdARG_INT }, - { "--prior", FALSE, sqdARG_STRING }, - { "--swentry", FALSE, sqdARG_FLOAT }, - { "--swexit", FALSE, sqdARG_FLOAT }, - { "--verbose", FALSE, sqdARG_NONE }, - { "--wgsc", FALSE, sqdARG_NONE }, - { "--wblosum", FALSE, sqdARG_NONE }, - { "--wme", FALSE, sqdARG_NONE }, - { "--wnone", FALSE, sqdARG_NONE }, - { "--wpb", FALSE, sqdARG_NONE }, - { "--wvoronoi",FALSE, sqdARG_NONE }, -}; -#define NOPTIONS (sizeof(OPTIONS) / sizeof(struct opt_s)) - -static void print_all_scores(FILE *fp, struct plan7_s *hmm, - char **dsq, MSA *msa, struct p7trace_s **tr); -static void save_countvectors(FILE *cfp, char *name, struct plan7_s *hmm); -static void position_average_score(struct plan7_s *hmm, char **seq, float *wgt, - int nseq, struct p7trace_s **tr, float *pernode, - float *ret_avg); -static float frag_trace_score(struct plan7_s *hmm, char *dsq, struct p7trace_s *tr, - float *pernode, float expected); -static void maximum_entropy(struct plan7_s *hmm, char **dsq, MSA *msa, - float eff_nseq, - struct p7prior_s *prior, struct p7trace_s **tr); - - -int -main(int argc, char **argv) -{ - char *seqfile; /* seqfile to read alignment from */ - int format; /* format of seqfile */ - MSAFILE *afp; /* open alignment file */ - MSA *msa; /* a multiple sequence alignment */ - char **dsq; /* digitized unaligned aseq's */ - struct plan7_s *hmm; /* constructed HMM; written to hmmfile */ - struct p7prior_s *pri; /* Dirichlet priors to use */ - struct p7trace_s **tr; /* fake tracebacks for aseq's */ - char *hmmfile; /* file to write HMM to */ - FILE *hmmfp; /* HMM output file handle */ - char *name; /* name of the HMM */ - int idx; /* counter for sequences */ - float randomseq[MAXABET]; /* null sequence model */ - float p1; /* null sequence model p1 transition */ - int nali; /* count number of alignments/HMMs */ - char fpopts[3]; /* options to open a file with, e.g. "ab" */ - int checksum; /* checksum of the alignment */ - - char *optname; /* name of option found by Getopt() */ - char *optarg; /* argument found by Getopt() */ - int optind; /* index in argv[] */ - - enum p7_construction c_strategy; /* construction strategy choice */ - enum p7_weight { /* weighting strategy */ - WGT_NONE, WGT_GSC, WGT_BLOSUM, WGT_PB, WGT_VORONOI, WGT_ME} w_strategy; - enum p7_config { /* algorithm configuration strategy */ - P7_BASE_CONFIG, P7_LS_CONFIG, P7_FS_CONFIG, P7_SW_CONFIG } cfg_strategy; - float gapmax; /* max frac gaps in mat col for -k */ - int overwrite_protect; /* TRUE to prevent overwriting HMM file */ - int verbose; /* TRUE to show a lot of output */ - char *rndfile; /* random sequence model file to read */ - char *prifile; /* Dirichlet prior file to read */ - char *pamfile; /* PAM matrix file for heuristic prior */ - char *align_ofile; /* name of output alignment file */ - char *cfile; /* output file for count vectors */ - FILE *alignfp; /* open filehandle for alignment resaves */ - FILE *cfp; /* open filehandle for count vector saves*/ - float archpri; /* "architecture" prior on model size */ - float pamwgt; /* weight on PAM for heuristic prior */ - int do_append; /* TRUE to append to hmmfile */ - int do_binary; /* TRUE to write in binary format */ - float blosumlevel; /* BLOSUM frac id filtering level [0.62] */ - float swentry; /* S/W aggregate entry probability */ - float swexit; /* S/W aggregate exit probability */ - int do_eff; /* TRUE to set an effective seq number */ - float eff_nseq; /* effective sequence number */ - int pbswitch; /* nseq >= this, switchover to PB weights*/ - char *setname; /* NULL, or ptr to HMM name to set */ - int gapmax_set; /* TRUE if gapmax was set on commandline */ - - /*********************************************** - * Parse command line - ***********************************************/ - - format = MSAFILE_UNKNOWN; /* autodetect format by default. */ - c_strategy = P7_MAP_CONSTRUCTION; - w_strategy = WGT_GSC; - blosumlevel = 0.62; - cfg_strategy = P7_LS_CONFIG; - gapmax = 0.5; - overwrite_protect = TRUE; - verbose = FALSE; - rndfile = NULL; - prifile = NULL; - pamfile = NULL; - align_ofile = NULL; - alignfp = NULL; - cfile = NULL; - cfp = NULL; - archpri = 0.85; - pamwgt = 20.; - Alphabet_type = hmmNOTSETYET; /* initially unknown */ - name = NULL; - do_append = FALSE; - swentry = 0.5; - swexit = 0.5; - do_eff = TRUE; - do_binary = FALSE; - pbswitch = 1000; - setname = NULL; - gapmax_set = FALSE; - - while (Getopt(argc, argv, OPTIONS, NOPTIONS, usage, - &optind, &optname, &optarg)) { - if (strcmp(optname, "-f") == 0) cfg_strategy = P7_FS_CONFIG; - else if (strcmp(optname, "-g") == 0) cfg_strategy = P7_BASE_CONFIG; - else if (strcmp(optname, "-n") == 0) setname = optarg; - else if (strcmp(optname, "-o") == 0) align_ofile = optarg; - else if (strcmp(optname, "-s") == 0) cfg_strategy = P7_SW_CONFIG; - else if (strcmp(optname, "-A") == 0) do_append = TRUE; - else if (strcmp(optname, "-F") == 0) overwrite_protect = FALSE; - else if (strcmp(optname, "--amino") == 0) SetAlphabet(hmmAMINO); - else if (strcmp(optname, "--archpri") == 0) archpri = atof(optarg); - else if (strcmp(optname, "--binary") == 0) do_binary = TRUE; - else if (strcmp(optname, "--cfile") == 0) cfile = optarg; - else if (strcmp(optname, "--fast") == 0) c_strategy = P7_FAST_CONSTRUCTION; - else if (strcmp(optname, "--gapmax") == 0) { gapmax = atof(optarg); gapmax_set = TRUE; } - else if (strcmp(optname, "--hand") == 0) c_strategy = P7_HAND_CONSTRUCTION; - else if (strcmp(optname, "--idlevel") == 0) blosumlevel = atof(optarg); - else if (strcmp(optname, "--noeff") == 0) do_eff = FALSE; - else if (strcmp(optname, "--nucleic") == 0) SetAlphabet(hmmNUCLEIC); - else if (strcmp(optname, "--null") == 0) rndfile = optarg; - else if (strcmp(optname, "--pam") == 0) pamfile = optarg; - else if (strcmp(optname, "--pamwgt") == 0) pamwgt = atof(optarg); - else if (strcmp(optname, "--pbswitch")== 0) pbswitch = atoi(optarg); - else if (strcmp(optname, "--prior") == 0) prifile = optarg; - else if (strcmp(optname, "--swentry") == 0) swentry = atof(optarg); - else if (strcmp(optname, "--swexit") == 0) swexit = atof(optarg); - else if (strcmp(optname, "--verbose") == 0) verbose = TRUE; - else if (strcmp(optname, "--wgsc") == 0) w_strategy = WGT_GSC; - else if (strcmp(optname, "--wblosum") == 0) w_strategy = WGT_BLOSUM; - else if (strcmp(optname, "--wme") == 0) w_strategy = WGT_ME; - else if (strcmp(optname, "--wpb") == 0) w_strategy = WGT_PB; - else if (strcmp(optname, "--wnone") == 0) w_strategy = WGT_NONE; - else if (strcmp(optname, "--wvoronoi")== 0) w_strategy = WGT_VORONOI; - else if (strcmp(optname, "--informat") == 0) { - format = String2SeqfileFormat(optarg); - if (format == MSAFILE_UNKNOWN) - Die("unrecognized sequence file format \"%s\"", optarg); - if (! IsAlignmentFormat(format)) - Die("%s is an unaligned format, can't read as an alignment", optarg); - } - else if (strcmp(optname, "-h") == 0) { - Banner(stdout, banner); - puts(usage); - puts(experts); - exit(EXIT_SUCCESS); - } - } - if (argc - optind != 2) - Die("Incorrect number of arguments.\n%s\n", usage); - - hmmfile = argv[optind++]; - seqfile = argv[optind++]; - - if (gapmax < 0. || gapmax > 1.) - Die("--gapmax must be a value from 0 to 1\n%s\n", usage); - if (archpri < 0. || archpri > 1.) - Die("--archpri must be a value from 0 to 1\n%s\n", usage); - if (overwrite_protect && !do_append && FileExists(hmmfile)) - Die("HMM file %s already exists. Rename or delete it.", hmmfile); - if (overwrite_protect && align_ofile != NULL && FileExists(align_ofile)) - Die("Alignment resave file %s exists. Rename or delete it.", align_ofile); - if (gapmax_set && c_strategy != P7_FAST_CONSTRUCTION) - Die("using --gapmax only makes sense if you use --fast"); - - /*********************************************** - * Preliminaries: open our files for i/o - ***********************************************/ - - /* Open the alignment */ - if ((afp = MSAFileOpen(seqfile, format, NULL)) == NULL) - Die("Alignment file %s could not be opened for reading", seqfile); - - /* Open the HMM output file */ - if (do_append) strcpy(fpopts, "a"); - else strcpy(fpopts, "w"); - if (do_binary) strcat(fpopts, "b"); - if ((hmmfp = fopen(hmmfile, fpopts)) == NULL) - Die("Failed to open HMM file %s for %s\n", hmmfile, - do_append ? "appending" : "writing"); - - /* Open the count vector save file */ - cfp = NULL; - if (cfile != NULL) - if ((cfp = fopen(cfile, "w")) == NULL) - Die("Failed to open count vector file %s for writing\n", cfile); - - /* Open the alignment resave file */ - alignfp = NULL; - if (align_ofile != NULL) - if ((alignfp = fopen(align_ofile, "w")) == NULL) - Die("Failed to open alignment resave file %s for writing\n", align_ofile); - - /*********************************************** - * Show the banner - ***********************************************/ - - Banner(stdout, banner); - printf("Alignment file: %s\n", - seqfile); - printf("File format: %s\n", - SeqfileFormat2String(afp->format)); - - printf("Search algorithm configuration: "); - if (cfg_strategy == P7_BASE_CONFIG) puts("Global alignment (hmms)"); - else if (cfg_strategy == P7_SW_CONFIG) { - puts("Local (hmmsw)"); - printf("S/W aggregate entry probability: %.2f\n", swentry); - printf("S/W aggregate exit probability: %.2f\n", swexit); - } - else if (cfg_strategy == P7_LS_CONFIG) puts("Multiple domain (hmmls)"); - else if (cfg_strategy == P7_FS_CONFIG) { - puts("Multiple local (hmmfs)"); - printf("S/W aggregate entry probability: %.2f\n", swentry); - printf("S/W aggregate exit probability: %.2f\n", swexit); - } - - printf("Model construction strategy: "); - if (c_strategy == P7_HAND_CONSTRUCTION) puts("Manual, from #=RF annotation"); - else if (c_strategy==P7_FAST_CONSTRUCTION) printf("Fast/ad hoc (gapmax %.2f)\n", gapmax); - else printf("MAP (gapmax hint: %.2f)\n", gapmax); - - printf("Null model used: %s\n", - (rndfile == NULL) ? "(default)" : rndfile); - - printf("Prior used: %s\n", - (prifile == NULL) ? "(default)" : prifile); - - printf("Sequence weighting method: "); - if (w_strategy == WGT_NONE) puts("none"); - else if (w_strategy == WGT_GSC) puts("G/S/C tree weights"); - else if (w_strategy == WGT_BLOSUM) printf("BLOSUM filter at %.2f id\n", blosumlevel); - else if (w_strategy == WGT_PB) puts("Henikoff position-based"); - else if (w_strategy == WGT_VORONOI)puts("Sibbald/Argos Voronoi"); - else if (w_strategy == WGT_ME) puts("Maximum entropy"); - - printf("New HMM file: %s %s\n", - hmmfile, do_append? "[appending]" : ""); - if (cfile != NULL) - printf("Count vectors saved to: %s\n", cfile); - if (align_ofile != NULL) - printf("Annotated alignment(s) resaved to: %s\n", align_ofile); - printf("- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n\n"); - - - /*********************************************** - * Get alignment(s), build HMMs one at a time - ***********************************************/ - - nali = 0; - while ((msa = MSAFileRead(afp)) != NULL) - { - /* Print some stuff about what we're about to do. - */ - if (msa->name != NULL) printf("Alignment: %s\n", msa->name); - else printf("Alignment: #%d\n", nali+1); - printf ("Number of sequences: %d\n", msa->nseq); - printf ("Number of columns: %d\n", msa->alen); - puts(""); - fflush(stdout); - - /* Make alignment upper case, because some symbol counting - * things are case-sensitive. - */ - for (idx = 0; idx < msa->nseq; idx++) - s2upper(msa->aseq[idx]); - - /* Set up the alphabet globals: - * either already set by --amino or --nucleic, or - * we guess based on the first alignment we see - */ - if (Alphabet_type == hmmNOTSETYET) - DetermineAlphabet(msa->aseq, msa->nseq); - - /* Do some initialization the first time through. - * This code must be delayed until after we've seen the - * first alignment, because we have to see the alphabet type first - */ - if (nali == 0) - { - /* Set up Dirichlet priors */ - if (prifile == NULL) pri = P7DefaultPrior(); - else pri = P7ReadPrior(prifile); - - if (pamfile != NULL) PAMPrior(pamfile, pri, pamwgt); - - /* Set up the null/random seq model */ - if (rndfile == NULL) P7DefaultNullModel(randomseq, &p1); - else P7ReadNullModel(rndfile, randomseq, &p1); - } - - /* Prepare unaligned digitized sequences for internal use - */ - DigitizeAlignment(msa, &dsq); - - /* In some respects we treat DNA more crudely for now; - * for example, we can't do eff seq #, because it's - * calibrated for protein. - */ - if (Alphabet_type == hmmNUCLEIC) - do_eff = FALSE; - - /* Determine "effective sequence number". - * The BlosumWeights() routine is now an efficient O(N) - * memory clustering algorithm that doesn't blow up on, - * say, Pfam's GP120 alignment (13000+ sequences) - */ - eff_nseq = (float) msa->nseq; - if (do_eff) - { - float *wgt; - printf("%-40s ... ", "Determining effective sequence number"); - fflush(stdout); - /* dummy weights array to feed BlosumWeights*/ - wgt = MallocOrDie(sizeof(float) * msa->nseq); - BlosumWeights(msa->aseq, msa->nseq, msa->alen, blosumlevel, wgt); - eff_nseq = FSum(wgt, msa->nseq); - - free(wgt); - printf("done. [%.0f]\n", eff_nseq); - } - - - /* Weight the sequences (optional), - */ - if (w_strategy == WGT_GSC || - w_strategy == WGT_BLOSUM || - w_strategy == WGT_VORONOI || - w_strategy == WGT_PB) - { - printf("%-40s ... ", "Weighting sequences heuristically"); - fflush(stdout); - - if (w_strategy != WGT_PB && msa->nseq >= pbswitch) - { - printf("[big alignment! doing PB]... "); - PositionBasedWeights(msa->aseq, msa->nseq, msa->alen, msa->wgt); - } - else if (w_strategy == WGT_GSC) - GSCWeights(msa->aseq, msa->nseq, msa->alen, msa->wgt); - else if (w_strategy == WGT_BLOSUM) - BlosumWeights(msa->aseq, msa->nseq, msa->alen, blosumlevel, msa->wgt); - else if (w_strategy == WGT_PB) - PositionBasedWeights(msa->aseq, msa->nseq, msa->alen, msa->wgt); - else if (w_strategy == WGT_VORONOI) - VoronoiWeights(msa->aseq, msa->nseq, msa->alen, msa->wgt); - - printf("done.\n"); - } - - /* Set the effective sequence number (if do_eff is FALSE, eff_nseq - * was set to nseq). - */ - FNorm(msa->wgt, msa->nseq); - FScale(msa->wgt, msa->nseq, eff_nseq); - - /* Build a model architecture. - * If we're not doing MD or ME, that's all we need to do. - * We get an allocated, counts-based HMM back. - * - * Because the architecture algorithms are allowed to change - * gap characters in the alignment, we have to calculate the - * alignment checksum before we enter the algorithms. - */ - printf("%-40s ... ", "Constructing model architecture"); - fflush(stdout); - checksum = GCGMultchecksum(msa->aseq, msa->nseq); - if (c_strategy == P7_FAST_CONSTRUCTION) - P7Fastmodelmaker(msa, dsq, gapmax, &hmm, &tr); - else if (c_strategy == P7_HAND_CONSTRUCTION) - P7Handmodelmaker(msa, dsq, &hmm, &tr); - else - P7Maxmodelmaker(msa, dsq, gapmax, - pri, randomseq, p1, archpri, &hmm, &tr); - hmm->checksum = checksum; - printf("done.\n"); - - /* Save the count vectors if asked. Used primarily for - * making the data files for training priors. - */ - if (cfile != NULL) - { - printf("%-40s ... ", "Saving count vector file"); - fflush(stdout); - save_countvectors(cfp, - (msa->name != NULL ? msa->name : "-"), - hmm); - printf("done. [%s]\n", cfile); - } - - /* Record the null model in the HMM; - * add prior contributions in pseudocounts and renormalize. - */ - printf("%-40s ... ", "Converting counts to probabilities"); - fflush(stdout); - Plan7SetNullModel(hmm, randomseq, p1); - P7PriorifyHMM(hmm, pri); - printf("done.\n"); - - /* Model configuration, temporary. - * hmmbuild assumes that it's given an alignment of single domains, - * and the alignment may contain fragments. So, for the purpose of - * scoring the sequences (or, optionally, MD/ME weighting), - * configure the model into hmmsw mode. Later we'll - * configure the model according to how the user wants to - * use it. - */ - Plan7SWConfig(hmm, 0.5, 0.5); - - /* Do model-dependent "weighting" strategies. - */ - if (w_strategy == WGT_ME) - { - printf("\n%-40s ...\n", "Maximum entropy weighting, iterative"); - maximum_entropy(hmm, dsq, msa, eff_nseq, pri, tr); - printf("----------------------------------------------\n\n"); - } - - /* Give the model a name. - * We deal with this differently depending on whether - * we're in an alignment database or a single alignment. - * - * If a single alignment, priority is: - * 1. Use -n if set. - * 2. Use msa->name (avail in Stockholm or SELEX formats only) - * 3. If all else fails, use alignment file name without - * filename extension (e.g. "globins.slx" gets named "globins" - * - * If a multiple MSA database (e.g. Stockholm/Pfam), - * only msa->name is applied. -n is not allowed. - * if msa->name is unavailable, or -n was used, - * a fatal error is thrown. - * - * Because we can't tell whether we've got more than one - * alignment 'til we're on the second one, these fatal errors - * only happen after the first HMM has already been built. - * Oh well. - */ - printf("%-40s ... ", "Setting model name, etc."); - fflush(stdout); - if (nali == 0) /* first (only?) HMM in file: */ - { - if (setname != NULL) name = Strdup(setname); - else if (msa->name != NULL) name = Strdup(msa->name); - else name = FileTail(seqfile, TRUE); - } - else - { - if (setname != NULL) - Die("Oops. Wait. You can't use -n with an alignment database."); - else if (msa->name != NULL) name = Strdup(msa->name); - else - Die("Oops. Wait. I need name annotation on each alignment.\n"); - } - Plan7SetName(hmm, name); - free(name); - - /* Transfer other information from the alignment to - * the HMM. This typically only works for SELEX format - * alignments, so these things are conditional/optional. - */ - if (msa->acc != NULL) Plan7SetAccession(hmm, msa->acc); - if (msa->desc != NULL) Plan7SetDescription(hmm, msa->desc); - - if (msa->flags & MSA_SET_GA) - { hmm->flags |= PLAN7_GA; hmm->ga1 = msa->ga1; hmm->ga2 = msa->ga2; } - if (msa->flags & MSA_SET_TC) - { hmm->flags |= PLAN7_TC; hmm->tc1 = msa->tc1; hmm->tc2 = msa->tc2; } - if (msa->flags & MSA_SET_NC) - { hmm->flags |= PLAN7_NC; hmm->nc1 = msa->nc1; hmm->nc2 = msa->nc2; } - - /* Record some other miscellaneous information in the HMM, - * like how/when we built it. - */ - Plan7ComlogAppend(hmm, argc, argv); - Plan7SetCtime(hmm); - hmm->nseq = msa->nseq; - printf("done. [%s]\n", hmm->name); - - /* Print information for the user - */ - printf("\nConstructed a profile HMM (length %d)\n", hmm->M); - PrintPlan7Stats(stdout, hmm, dsq, msa->nseq, tr); - printf("\n"); - - /* Configure the model for chosen algorithm - */ - printf("%-40s ... ", "Finalizing model configuration"); - fflush(stdout); - switch (cfg_strategy) { - case P7_BASE_CONFIG: Plan7GlobalConfig(hmm); break; - case P7_SW_CONFIG: Plan7SWConfig(hmm, swentry, swexit); break; - case P7_LS_CONFIG: Plan7LSConfig(hmm); break; - case P7_FS_CONFIG: Plan7FSConfig(hmm, swentry, swexit); break; - default: Die("bogus configuration choice"); - } - printf("done.\n"); - - /* Save new HMM to disk: open a file for appending or writing. - */ - printf("%-40s ... ", "Saving model to file"); - fflush(stdout); - if (do_binary) WriteBinHMM(hmmfp, hmm); - else WriteAscHMM(hmmfp, hmm); - printf("done.\n"); - - /* the annotated alignment may be resaved */ - if (alignfp != NULL) - { - MSA *new_msa; - SQINFO *sqinfo; - - printf("%-40s ... ", "Saving annotated alignment"); - fflush(stdout); - sqinfo = MSAToSqinfo(msa); - new_msa = P7Traces2Alignment(dsq, sqinfo, msa->wgt, msa->nseq, - hmm->M, tr, FALSE); - - WriteStockholm(alignfp, new_msa); - MSAFree(new_msa); - for (idx = 0; idx < msa->nseq; idx++) - FreeSequence(NULL, &(sqinfo[idx])); - free(sqinfo); - printf("done.\n"); - } - - /* Verbose output; show scores for each sequence - */ - if (verbose) - print_all_scores(stdout, hmm, dsq, msa, tr); - - /* Clean up before moving on to next alignment - */ - for (idx = 0; idx < msa->nseq; idx++) P7FreeTrace(tr[idx]); - free(tr); - FreePlan7(hmm); - MSAFree(msa); - Free2DArray((void **) dsq, msa->nseq); - fflush(hmmfp); - if (cfp != NULL) fflush(cfp); - if (alignfp != NULL) fflush(alignfp); - - puts("//\n"); - nali++; - } - - - - /* Clean up and exit - */ - MSAFileClose(afp); - fclose(hmmfp); - if (cfp != NULL) fclose(cfp); - if (alignfp != NULL) fclose(alignfp); - P7FreePrior(pri); - SqdClean(); - return 0; -} - - -/* Function: print_all_scores() - * - * Purpose: For each training sequence, print its score under - * the final model. - * - * Args: fp - where to print the output (usu. stdout) - * hmm - newly constructed HMM, with prob's. - * dsq - digitized unaligned training sequences. - * msa - alignment and associated info - * tr - array of tracebacks - * - * Return: (void) - */ -static void -print_all_scores(FILE *fp, struct plan7_s *hmm, - char **dsq, MSA *msa, struct p7trace_s **tr) -{ - int idx; /* counter for sequences */ - - /* make sure model scores are ready */ - P7Logoddsify(hmm, TRUE); - /* header */ - fputs("**\n", fp); - fputs("Individual training sequence scores:\n", fp); - /* score for each sequence */ - for (idx = 0; idx < msa->nseq; idx++) - { - fprintf(fp, "%7.2f %-12s %s\n", - P7TraceScore(hmm, dsq[idx], tr[idx]), - msa->sqname[idx], - (MSAGetSeqDescription(msa,idx) != NULL) ? - MSAGetSeqDescription(msa,idx) : ""); - P7PrintTrace(fp, tr[idx], hmm, dsq[idx]); - } - fputs("\n", fp); -} - - - -/* Function: save_countvectors() - * - * Purpose: Save emission/transition count vectors to a file. - * Used for gathering the data on which to train a - * prior (e.g. mixture Dirichlet, etc.) - * - * The format of the file is one vector per line: - * M ...: 20 match emission counts in order AC..WY. - * followed by two chars of CS, CA annotation. - * I ...: 20 insert emission counts in order AC..WY. - * followed by two chars of CS, CA annotation. - * T ...: 7 transition counts in order TMM, TMI, TMD, - * TIM, TII, TDM, TDD. (see structs.h) - * followed by four chars of structure - * annotation: CS, CS of M+1; CA, CA of M+1. - * - * Args: cfp - open counts file - * name - name of alignment or HMM to associate with these vectors - * hmm - counts-based HMM - */ -static void -save_countvectors(FILE *cfp, char *name, struct plan7_s *hmm) -{ - int k, x; - /* match emission vectors */ - for (k = 1; k <= hmm->M; k++) - { - fputs("M ", cfp); - for (x = 0; x < Alphabet_size; x++) - fprintf(cfp, "%8.2f ", hmm->mat[k][x]); - - fprintf(cfp, "%15s %6d %6d ", name, hmm->map[k], k); - if ((hmm->flags & PLAN7_CS) && hmm->flags & PLAN7_CA) - fprintf(cfp, "%c %c", hmm->cs[k], hmm->ca[k]); - else - fputs("- -", cfp); - fputs("\n", cfp); - } - /* insert emission vectors */ - for (k = 1; k < hmm->M; k++) - { - fputs("I ", cfp); - for (x = 0; x < Alphabet_size; x++) - fprintf(cfp, "%8.2f ", hmm->ins[k][x]); - - fprintf(cfp, "%15s %6d %6d ", name, hmm->map[k], k); - if ((hmm->flags & PLAN7_CS) && hmm->flags & PLAN7_CA) - fprintf(cfp, "%c %c", hmm->cs[k], hmm->ca[k]); - else - fputs("- -", cfp); - - fputs("\n", cfp); - } - /* transition vectors */ - for (k = 1; k < hmm->M; k++) - { - fputs("T ", cfp); - - for (x = 0; x < 7; x++) - fprintf(cfp, "%8.2f ", hmm->t[k][x]); - - fprintf(cfp, "%15s %6d %6d ", name, hmm->map[k], k); - if ((hmm->flags & PLAN7_CS) && hmm->flags & PLAN7_CA) - fprintf(cfp, "%c %c %c %c", - hmm->cs[k], hmm->cs[k+1], - hmm->ca[k], hmm->ca[k+1]); - else - fputs("- -", cfp); - fputs("\n", cfp); - } -} - - -/* Function: position_average_score() - * Date: Wed Dec 31 09:36:35 1997 [StL] - * - * Purpose: Calculate scores from tracebacks, keeping them - * in a position specific array. The final array - * is normalized position-specifically too, according - * to how many sequences contributed data to this - * position. Used for compensating for sequence - * fragments in ME and MD score optimization. - * Very much ad hoc. - * - * Code related to (derived from) TraceScore(). - * - * Args: hmm - HMM structure, scores valid - * dsq - digitized unaligned sequences - * wgt - weights on the sequences - * nseq - number of sequences - * tr - array of nseq tracebacks that aligns each dsq to hmm - * pernode - RETURN: [0]1..M array of position-specific avg scores - * ret_avg - RETURN: overall average full-length, one-domain score - * - * Return: 1 on success, 0 on failure. - * pernode is malloc'ed [0]1..M by CALLER and filled here. - */ -static void -position_average_score(struct plan7_s *hmm, - char **dsq, - float *wgt, - int nseq, - struct p7trace_s **tr, - float *pernode, - float *ret_avg) -{ - int pos; /* position in seq */ - int sym; - int tpos; /* position in trace/state sequence */ - float *counts; /* counts at each position */ - float avg; /* RETURN: average overall */ - int k; /* counter for model position */ - int idx; /* counter for sequence number */ - - /* Allocations - */ - counts = MallocOrDie ((hmm->M+1) * sizeof(float)); - FSet(pernode, hmm->M+1, 0.); - FSet(counts, hmm->M+1, 0.); - - /* Loop over traces, accumulate weighted scores per position - */ - for (idx = 0; idx < nseq; idx++) - for (tpos = 0; tpos < tr[idx]->tlen; tpos++) - { - pos = tr[idx]->pos[tpos]; - sym = (int) dsq[idx][tr[idx]->pos[tpos]]; - k = tr[idx]->nodeidx[tpos]; - - /* Counts: how many times did we use this model position 1..M? - * (weighted) - */ - if (tr[idx]->statetype[tpos] == STM || tr[idx]->statetype[tpos] == STD) - counts[k] += wgt[idx]; - - /* Emission scores. - */ - if (tr[idx]->statetype[tpos] == STM) - pernode[k] += wgt[idx] * Scorify(hmm->msc[sym][k]); - else if (tr[idx]->statetype[tpos] == STI) - pernode[k] += wgt[idx] * Scorify(hmm->isc[sym][k]); - - /* Transition scores. - */ - if (tr[idx]->statetype[tpos] == STM || - tr[idx]->statetype[tpos] == STD || - tr[idx]->statetype[tpos] == STI) - pernode[k] += wgt[idx] * - Scorify(TransitionScoreLookup(hmm, tr[idx]->statetype[tpos], tr[idx]->nodeidx[tpos], - tr[idx]->statetype[tpos+1],tr[idx]->nodeidx[tpos+1])); - } - - /* Divide accumulated scores by accumulated weighted counts - */ - avg = 0.; - for (k = 1; k <= hmm->M; k++) - { - pernode[k] /= counts[k]; - avg += pernode[k]; - } - - free(counts); - *ret_avg = avg; - return; -} - - -/* Function: frag_trace_score() - * Date: SRE, Wed Dec 31 10:03:47 1997 [StL] - * - * Purpose: Allow MD/ME optimization to be used for alignments - * that include fragments and multihits -- estimate a full-length - * per-domain score. - * - * - * - * Return: "corrected" score. - */ -static float -frag_trace_score(struct plan7_s *hmm, char *dsq, struct p7trace_s *tr, - float *pernode, float expected) -{ - float sc; /* corrected score */ - float fragexp; /* expected score for a trace like this */ - int tpos; /* position in trace */ - - /* get uncorrected score */ - sc = P7TraceScore(hmm, dsq, tr); - - /* calc expected score for trace like this */ - fragexp = 0.; - for (tpos = 0; tpos < tr->tlen; tpos++) - if (tr->statetype[tpos] == STM || tr->statetype[tpos] == STD) - fragexp += pernode[tr->nodeidx[tpos]]; - - /* correct for multihits */ - fragexp /= (float) TraceDomainNumber(tr); - - /* extrapolate to full-length, one-hit score */ - sc = sc * expected / fragexp; - return sc; -} - - -/* Function: maximum_entropy() - * Date: SRE, Fri Jan 2 10:56:00 1998 [StL] - * - * Purpose: Optimizes a model according to maximum entropy weighting. - * See Krogh and Mitchison (1995). - * - * [Actually, we do minimum relative entropy, rather than - * maximum entropy. Same thing, though we refer to "ME" - * weights and models. The optimization is a steepest - * descents minimization of the relative entropy.] - * - * Expects to be called shortly after a Maxmodelmaker() - * or Handmodelmaker(), so that both a new model architecture - * (with MAP parameters) and fake tracebacks are available. - * - * Prints a summary of optimization progress to stdout. - * - * Args: hmm - model. allocated, set with initial MAP parameters. - * dsq - dealigned digitized seqs the model is based on - * ainfo - extra info for aseqs - * nseq - number of aseqs - * eff_nseq- effective sequence number; weights normalize up to this. - * prior - prior distributions for parameterizing model - * tr - array of fake traces for each sequence - * - * Return: (void) - * hmm changed to an ME HMM - * ainfo changed, contains ME weights - */ -static void -maximum_entropy(struct plan7_s *hmm, char **dsq, MSA *msa, - float eff_nseq, struct p7prior_s *prior, struct p7trace_s **tr) -{ - float *wgt; /* current best set of ME weights */ - float *new_wgt; /* new set of ME weights to try */ - float *sc; /* log-odds score of each sequence */ - float *grad; /* gradient */ - float epsilon; /* steepness of descent */ - float relative_entropy; /* current best relative entropy */ - float new_entropy; /* relative entropy at new weights */ - float last_new_entropy; /* last new_entropy we calc'ed */ - float use_epsilon; /* current epsilon value in use */ - int idx; /* counter over sequences */ - int i1, i2; /* counters for iterations */ - - float converge_criterion; - float minw, maxw; /* min, max weight */ - int posw, highw; /* number of positive weights */ - float mins, maxs, avgs; /* min, max, avg score */ - float *pernode; /* expected score per node of HMM */ - float expscore; /* expected score of complete HMM */ - int max_iter; /* bulletproof against infinite loop bugs */ - - epsilon = 0.2; /* works fine */ - max_iter = 666; - - /* Allocations - */ - sc = MallocOrDie (sizeof(float) * msa->nseq); - wgt = MallocOrDie (sizeof(float) * msa->nseq); - new_wgt = MallocOrDie (sizeof(float) * msa->nseq); - grad = MallocOrDie (sizeof(float) * msa->nseq); - pernode = MallocOrDie (sizeof(float) * (hmm->M+1)); - - /* Initialization. Start with all weights == 1.0. - * Find relative entropy and gradient. - */ - Plan7SWConfig(hmm, 0.5, 0.5); - P7Logoddsify(hmm, TRUE); - - FSet(wgt, msa->nseq, 1.0); - position_average_score(hmm, dsq, wgt, msa->nseq, tr, pernode,&expscore); - for (idx = 0; idx < msa->nseq; idx++) - sc[idx] = frag_trace_score(hmm, dsq[idx], tr[idx], pernode, expscore); - relative_entropy = FSum(sc, msa->nseq) / (float) msa->nseq; - for (idx = 0; idx < msa->nseq; idx++) - grad[idx] = relative_entropy - sc[idx]; - - - printf("iter avg-sc min-sc max-sc min-wgt max-wgt +wgt ++wgt rel.ent convergence\n"); - printf("---- ------ ------ ------ ------- ------- ---- ----- ------- -----------\n"); - mins = maxs = avgs = sc[0]; - for (idx = 1; idx < msa->nseq; idx++) - { - if (sc[idx] < mins) mins = sc[idx]; - if (sc[idx] > maxs) maxs = sc[idx]; - avgs += sc[idx]; - } - avgs /= (float) msa->nseq; - printf("%4d %6.1f %6.1f %6.1f %7.2f %7.2f %4d %5d %7.2f %8s\n", - 0, avgs, mins, maxs, 1.0, 1.0, msa->nseq, 0, relative_entropy, "-"); - - - /* Steepest descents optimization; - * iterate until relative entropy converges. - */ - i1 = 0; - while (++i1 < max_iter) - { - /* Gradient gives us a line of steepest descents. - * (Roughly speaking, anyway. We actually have a constraint - * that weights are nonnegative and normalized, and the - * gradient doesn't take these into account.) - * Look along this line, a distance of epsilon * gradient: - * if new point is better, accept; if new point is worse, - * move back along the line by half the distance and re-evaluate. - */ - use_epsilon = epsilon; - new_entropy = relative_entropy + 1.0; /* just ensure new > old */ - - i2 = 0; - while (new_entropy > relative_entropy && ++i2 < max_iter) - { - last_new_entropy = new_entropy; - - /* find a new point in weight space */ - for (idx = 0; idx < msa->nseq; idx++) - { - new_wgt[idx] = wgt[idx] + use_epsilon * grad[idx]; - if (new_wgt[idx] < 0.) new_wgt[idx] = 0.0; - } - FNorm(new_wgt, msa->nseq); - FScale(new_wgt, msa->nseq, (float) msa->nseq); - - /* Make new HMM using these weights */ - ZeroPlan7(hmm); - for (idx = 0; idx < msa->nseq; idx++) - P7TraceCount(hmm, dsq[idx], new_wgt[idx], tr[idx]); - P7PriorifyHMM(hmm, prior); - - - /* Evaluate new point */ - Plan7SWConfig(hmm, 0.5, 0.5); - P7Logoddsify(hmm, TRUE); - position_average_score(hmm, dsq, new_wgt, msa->nseq, tr, pernode, &expscore); - for (idx = 0; idx < msa->nseq; idx++) - sc[idx] = frag_trace_score(hmm, dsq[idx], tr[idx], pernode, expscore); - new_entropy = FDot(sc, new_wgt, msa->nseq) / (float) msa->nseq; - - use_epsilon /= 2.0; - /* Failsafe: we're not converging. Set epsilon to zero, - * do one more round. - */ - if (use_epsilon < 1e-6) use_epsilon = 0.0; - if (use_epsilon == 0.0) break; - - /* Failsafe: avoid infinite loops. Sometimes the - new entropy converges without ever being better - than the previous point, probably as a result - of minor roundoff error. */ - if (last_new_entropy == new_entropy) break; - } - if (i2 == max_iter) printf(" -- exceeded maximum iterations; giving up --\n"); - - /* Evaluate convergence before accepting the new weights; - * then, accept the new point and evaluate the gradient there. - */ - converge_criterion = fabs((relative_entropy-new_entropy)/relative_entropy); - relative_entropy = new_entropy; - FCopy(wgt, new_wgt, msa->nseq); - for (idx = 0; idx < msa->nseq; idx++) - grad[idx] = relative_entropy - sc[idx]; - - /* Print some statistics about this iteration - */ - mins = maxs = avgs = sc[0]; - minw = maxw = wgt[0]; - posw = (wgt[0] > 0.0) ? 1 : 0; - highw = (wgt[0] > 1.0) ? 1 : 0; - for (idx = 1; idx < msa->nseq; idx++) - { - if (sc[idx] < mins) mins = sc[idx]; - if (sc[idx] > maxs) maxs = sc[idx]; - if (wgt[idx] < minw) minw = wgt[idx]; - if (wgt[idx] > maxw) maxw = wgt[idx]; - if (wgt[idx] > 0.0) posw++; - if (wgt[idx] > 1.0) highw++; - avgs += sc[idx]; - } - avgs /= (float) msa->nseq; - printf("%4d %6.1f %6.1f %6.1f %7.2f %7.2f %4d %5d %7.2f %8.5f\n", - i1, - avgs, mins, maxs, - minw, maxw, posw, highw, - relative_entropy, converge_criterion); - - if (converge_criterion < 1e-5) break; - } - if (i1 == max_iter) printf(" -- exceeded maximum iterations; giving up --\n"); - - /* Renormalize weights to sum to eff_nseq, and save. - */ - FNorm(wgt, msa->nseq); - FScale(wgt, msa->nseq, (float) eff_nseq); - FCopy(msa->wgt, wgt, msa->nseq); - /* Make final HMM using these adjusted weights */ - ZeroPlan7(hmm); - for (idx = 0; idx < msa->nseq; idx++) - P7TraceCount(hmm, dsq[idx], wgt[idx], tr[idx]); - P7PriorifyHMM(hmm, prior); - - /* Cleanup and return - */ - free(pernode); - free(new_wgt); - free(grad); - free(wgt); - free(sc); - return; -} diff --git a/forester/archive/RIO/others/hmmer/src/hmmcalibrate-pvm.c b/forester/archive/RIO/others/hmmer/src/hmmcalibrate-pvm.c deleted file mode 100644 index 52824c6..0000000 --- a/forester/archive/RIO/others/hmmer/src/hmmcalibrate-pvm.c +++ /dev/null @@ -1,209 +0,0 @@ -/************************************************************ - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - ************************************************************/ - -#ifdef HMMER_PVM - -/* hmmcalibrate-pvm.c - * SRE, Tue Aug 18 15:19:28 1998 - * Redesigned for better parallelization: SRE, Wed Dec 1 09:48:58 1999 - * - * Design: - * Initialization: - * receive parameters of random sequence synthesis, and an HMM. - * send an OK signal to the master. - * - * Main loop: - * receive work packet: # of seqs to make - * Synthesize and score # seqs - * send results: # raw scores. - * - * Termination: - * master sends a shutdown signal instead of a work packet. - * - * PVM slave for hmmcalibrate. - * RCS $Id: hmmcalibrate-pvm.c,v 1.1.1.1 2005/03/22 08:34:12 cmzmasek Exp $ - */ - -#include -#include -#include -#include - -#include "version.h" -#include "structs.h" /* data structures, macros, #define's */ -#include "config.h" /* compile-time configuration constants */ -#include "funcs.h" /* function declarations */ -#include "globals.h" /* alphabet global variables */ -#include "squid.h" /* general sequence analysis library */ -#include "stopwatch.h" /* CPU timing routines */ - -static void leave_pvm(void); - -int -main(void) -{ - int master_tid; /* PVM TID of our master */ - int slaveidx; /* my slave index (0..nslaves-1) */ - struct plan7_s *hmm; /* HMM to calibrate, sent from master */ - char *seq; /* synthetic random sequence */ - char *dsq; /* digitized seq */ - int len; /* length of seq */ - float *sc; /* scores of seqs */ - int seed; /* random number seed */ - int nsample; /* number of seqs to sample */ - int fixedlen; /* if nonzero, fixed length of seq */ - float lenmean; /* Gaussian mean length of seq */ - float lensd; /* Gaussian length std. dev. for seq */ - float randomseq[MAXABET]; /* iid frequencies of residues */ - float p1; - int alphatype; /* alphabet type, hmmAMINO or hmmNUCLEIC */ - int idx; - int code; - Stopwatch_t stopwatch; /* CPU timings */ - - /* Register leave_pvm() cleanup function so any exit() call - * first calls pvm_exit(). - */ - if (atexit(leave_pvm) != 0) { - pvm_exit(); Die("slave couldn't register leave_pvm()"); - } - - /***************************************************************** - * initialization. - * Master broadcasts the problem to us: - * an HMM; - * parameters of the HMM calibration. - * We send back: - * an OK flag, and our RELEASE, for some sanity checking. - ******************************************************************/ - - StopwatchStart(&stopwatch); - - master_tid = pvm_parent(); /* who's our master? */ - - pvm_recv(master_tid, HMMPVM_INIT); - pvm_upkfloat(&lenmean, 1, 1); /* mean length of random seqs */ - pvm_upkfloat(&lensd, 1, 1); /* std. dev. of random seq len */ - pvm_upkint(&fixedlen, 1, 1); /* if non-zero, override lenmean */ - pvm_upkint(&alphatype, 1, 1); /* alphabet type, hmmAMINO or hmmNUCLEIC */ - pvm_upkint(&seed, 1, 1); /* random number seed */ - SetAlphabet(alphatype); /* must set alphabet before reading HMM! */ - hmm = PVMUnpackHMM(); - if (hmm == NULL) Die("oh no, the HMM never arrived"); - - P7DefaultNullModel(randomseq, &p1); - P7Logoddsify(hmm, TRUE); - - /* tell the master we're OK and ready to go (or not) - */ - code = HMMPVM_OK; - pvm_initsend(PvmDataDefault); - pvm_pkint(&code, 1, 1); - PVMPackString(RELEASE); - pvm_send(master_tid, HMMPVM_RESULTS); - - /***************************************************************** - * Main loop. - * Receive: a number of sequences we're supposed to do. - * If we receive a 0, we have no work, so wait for shutdown; - * if we receive a -1, shut down. - *****************************************************************/ - slaveidx = -1; - for (;;) - { - pvm_recv(master_tid, HMMPVM_WORK); - pvm_upkint(&nsample, 1, 1); - pvm_upkint(&idx, 1, 1); - - if (nsample == 0) continue; /* go into stasis */ - if (nsample == -1) break; /* shut down */ - - if (slaveidx == -1) { /* first time: set id, seed sre_random */ - slaveidx = idx; - sre_srandom(seed+idx); /* unique seed in current PVM */ - } - - sc = MallocOrDie(sizeof(float) * nsample); - for (idx = 0; idx < nsample; idx++) - { - /* choose length of random sequence */ - if (fixedlen) len = fixedlen; - else do len = (int) Gaussrandom(lenmean, lensd); while (len < 1); - /* generate it */ - seq = RandomSequence(Alphabet, randomseq, Alphabet_size, len); - dsq = DigitizeSequence(seq, len); - SQD_DPRINTF2(("slave %d seq: %d : %20.20s...\n", slaveidx, len, seq)); - - if (P7ViterbiSize(len, hmm->M) <= RAMLIMIT) - sc[idx] = P7Viterbi(dsq, len, hmm, NULL); - else - sc[idx] = P7SmallViterbi(dsq, len, hmm, NULL); - - free(seq); - free(dsq); - } - - /* Return output to master, some of which is sanity checking. - * 1. our slave index. - * 2. how many seqs we simulated. - * 3. the array of scores we got, so the master can stuff - * them into a histogram. - */ - pvm_initsend(PvmDataDefault); - pvm_pkint(&slaveidx, 1, 1); - pvm_pkint(&nsample, 1, 1); - pvm_pkfloat(sc, nsample,1); - pvm_send(master_tid, HMMPVM_RESULTS); - - /* cleanup - */ - free(sc); - } - - /*********************************************** - * Cleanup, return. - ***********************************************/ - - FreePlan7(hmm); - StopwatchStop(&stopwatch); - - /* tell the master we heard his shutdown signal, and - * give him our CPU times; then exit. - */ - pvm_initsend(PvmDataDefault); - pvm_pkint(&slaveidx, 1, 1); - StopwatchPVMPack(&stopwatch); - pvm_send(master_tid, HMMPVM_RESULTS); - - return 0; /* pvm_exit() is called by atexit() registration. */ -} - -/* Function: leave_pvm() - * - * Purpose: Cleanup function, to deal with crashes. We register - * this function using atexit() so it gets called before - * the slave dies. - */ -void leave_pvm(void) -{ - SQD_DPRINTF1(("slave leaving PVM.\n")); - pvm_exit(); -} - -#else /* if HMMER_PVM not defined: include a dummy */ - -#include -int main(void) -{ - printf("hmmcalibrate-pvm disabled. PVM support was not compiled into HMMER.\n"); - exit(0); -} - -#endif diff --git a/forester/archive/RIO/others/hmmer/src/hmmcalibrate.c b/forester/archive/RIO/others/hmmer/src/hmmcalibrate.c deleted file mode 100644 index b003f8b..0000000 --- a/forester/archive/RIO/others/hmmer/src/hmmcalibrate.c +++ /dev/null @@ -1,957 +0,0 @@ -/************************************************************ - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - ************************************************************/ - -/* hmmcalibrate.c - * SRE, Fri Oct 31 09:25:21 1997 [St. Louis] - * - * Score an HMM against random sequence data sets; - * set histogram fitting parameters. - * - * CVS $Id: hmmcalibrate.c,v 1.1.1.1 2005/03/22 08:34:03 cmzmasek Exp $ - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#ifdef HMMER_THREADS -#include -#endif -#ifdef HMMER_PVM -#include -#endif - -#include "squid.h" /* general sequence analysis library */ -#include "config.h" /* compile-time configuration constants */ -#include "structs.h" /* data structures, macros, #define's */ -#include "funcs.h" /* function declarations */ -#include "globals.h" /* alphabet global variables */ -#include "version.h" /* release version info */ -#include "stopwatch.h" /* process timings */ - -static char banner[] = "hmmcalibrate -- calibrate HMM search statistics"; - -static char usage[] = "\ -Usage: hmmcalibrate [-options] \n\ -Available options are:\n\ - -h : print short usage and version info, then exit\n\ -"; - -static char experts[] = "\ - --cpu : run threads in parallel (if threaded)\n\ - --fixed : fix random sequence length at \n\ - --histfile : save histogram(s) to file \n\ - --mean : set random seq length mean at [350]\n\ - --num : set number of sampled seqs to [5000]\n\ - --pvm : run on a Parallel Virtual Machine (PVM)\n\ - --sd : set random seq length std. dev to [350]\n\ - --seed : set random seed to [time()]\n\ -"; - -static struct opt_s OPTIONS[] = { - { "-h", TRUE, sqdARG_NONE }, - { "--cpu", FALSE, sqdARG_INT }, - { "--fixed", FALSE, sqdARG_INT }, - { "--histfile", FALSE, sqdARG_STRING }, - { "--mean", FALSE, sqdARG_FLOAT }, - { "--num", FALSE, sqdARG_INT }, - { "--pvm", FALSE, sqdARG_NONE }, - { "--sd", FALSE, sqdARG_FLOAT }, - { "--seed", FALSE, sqdARG_INT}, -}; -#define NOPTIONS (sizeof(OPTIONS) / sizeof(struct opt_s)) - - -static void main_loop_serial(struct plan7_s *hmm, int seed, int nsample, - float lenmean, float lensd, int fixedlen, - struct histogram_s **ret_hist, float *ret_max); - -#ifdef HMMER_THREADS -/* A structure of this type is shared by worker threads in the POSIX - * threads parallel version. - */ -struct workpool_s { - /* Static configuration: - */ - struct plan7_s *hmm; /* ptr to single HMM to search with */ - int fixedlen; /* if >0, fix random seq len to this */ - float lenmean; /* mean of Gaussian for random seq len */ - float lensd; /* s.d. of Gaussian for random seq len */ - float *randomseq; /* 0..Alphabet_size-1 i.i.d. probs */ - int nsample; /* number of random seqs to do */ - - /* Shared (mutex-protected) input: - */ - int nseq; /* current number of seqs searched */ - - /* Shared (mutex-protected) output: - */ - struct histogram_s *hist; /* histogram */ - float max_score; /* maximum score seen */ - Stopwatch_t watch; /* Timings accumulated for threads */ - - /* Thread pool information: - */ - pthread_t *thread; /* our pool of threads */ - int num_threads; /* number of threads */ - pthread_mutex_t input_lock; /* a mutex protecting input fields */ - pthread_mutex_t output_lock; /* a mutex protecting output fields */ -}; -static void main_loop_threaded(struct plan7_s *hmm, int seed, int nsample, - float lenmean, float lensd, int fixedlen, - int nthreads, - struct histogram_s **ret_hist, float *ret_max, - Stopwatch_t *twatch); -static struct workpool_s *workpool_start(struct plan7_s *hmm, - float lenmean, float lensd, int fixedlen, - float *randomseq, int nsample, - struct histogram_s *hist, - int num_threads); -static void workpool_stop(struct workpool_s *wpool); -static void workpool_free(struct workpool_s *wpool); -static void *worker_thread(void *ptr); -#endif /* HMMER_THREADS */ - -#ifdef HMMER_PVM -static void main_loop_pvm(struct plan7_s *hmm, int seed, int nsample, - int lumpsize, - float lenmean, float lensd, int fixedlen, - struct histogram_s **ret_hist, float *ret_max, - Stopwatch_t *extrawatch, int *ret_nslaves); -#endif /* HMMER_PVM */ - - -int -main(int argc, char **argv) -{ - char *hmmfile; /* HMM file to open */ - char *tmpfile; /* temporary calibrated HMM file */ - HMMFILE *hmmfp; /* opened hmm file pointer */ - FILE *outfp; /* for writing HMM(s) into tmpfile */ - char *mode; /* write mode, "w" or "wb" */ - struct plan7_s *hmm; /* the hidden Markov model */ - int idx; /* counter over sequences */ - sigset_t blocksigs; /* list of signals to protect from */ - int nhmm; /* number of HMMs calibrated */ - - struct histogram_s *hist; /* a resulting histogram */ - float max; /* maximum score from an HMM */ - char *histfile; /* histogram save file */ - FILE *hfp; /* open file pointer for histfile */ - - Stopwatch_t stopwatch; /* main stopwatch for process */ - Stopwatch_t extrawatch; /* stopwatch for threads/PVM slaves */ - - float *mu; /* array of EVD mu's for HMMs */ - float *lambda; /* array of EVD lambda's for HMMs */ - int mu_lumpsize; /* allocation lumpsize for mu, lambda */ - - int nsample; /* number of random seqs to sample */ - int seed; /* random number seed */ - int fixedlen; /* fixed length, or 0 if unused */ - float lenmean; /* mean of length distribution */ - float lensd; /* std dev of length distribution */ - int do_pvm; /* TRUE to use PVM */ - int pvm_lumpsize; /* # of seqs to do per PVM slave exchange */ - int pvm_nslaves; /* number of slaves used in the PVM */ - - - char *optname; /* name of option found by Getopt() */ - char *optarg; /* argument found by Getopt() */ - int optind; /* index in argv[] */ - - int num_threads; /* number of worker threads */ - - - /*********************************************** - * Parse the command line - ***********************************************/ - StopwatchStart(&stopwatch); - StopwatchZero(&extrawatch); - - nsample = 5000; - fixedlen = 0; - lenmean = 325.; - lensd = 200.; - seed = (int) time ((time_t *) NULL); - histfile = NULL; - do_pvm = FALSE; - pvm_lumpsize = 20; /* 20 seqs/PVM exchange: sets granularity */ - mu_lumpsize = 100; -#ifdef HMMER_THREADS - num_threads = ThreadNumber(); /* only matters if we're threaded */ -#else - num_threads = 0; -#endif - - while (Getopt(argc, argv, OPTIONS, NOPTIONS, usage, - &optind, &optname, &optarg)) - { - if (strcmp(optname, "--cpu") == 0) num_threads = atoi(optarg); - else if (strcmp(optname, "--fixed") == 0) fixedlen = atoi(optarg); - else if (strcmp(optname, "--histfile") == 0) histfile = optarg; - else if (strcmp(optname, "--mean") == 0) lenmean = atof(optarg); - else if (strcmp(optname, "--num") == 0) nsample = atoi(optarg); - else if (strcmp(optname, "--pvm") == 0) do_pvm = TRUE; - else if (strcmp(optname, "--sd") == 0) lensd = atof(optarg); - else if (strcmp(optname, "--seed") == 0) seed = atoi(optarg); - else if (strcmp(optname, "-h") == 0) - { - Banner(stdout, banner); - puts(usage); - puts(experts); - exit(0); - } - } - - if (argc - optind != 1) Die("Incorrect number of arguments.\n%s\n", usage); - hmmfile = argv[optind++]; - -#ifndef HMMER_PVM - if (do_pvm) Die("PVM support is not compiled into HMMER; --pvm doesn't work."); -#endif -#ifndef HMMER_THREADS - if (num_threads) Die("Posix threads support is not compiled into HMMER; --cpu doesn't have any effect"); -#endif - - /*********************************************** - * Open our i/o file pointers, make sure all is well - ***********************************************/ - - /* HMM file */ - if ((hmmfp = HMMFileOpen(hmmfile, NULL)) == NULL) - Die("failed to open HMM file %s for reading.", hmmfile); - - /* histogram file */ - hfp = NULL; - if (histfile != NULL) { - if ((hfp = fopen(histfile, "w")) == NULL) - Die("Failed to open histogram save file %s for writing\n", histfile); - } - - /* Generate calibrated HMM(s) in a tmp file in the current - * directory. When we're finished, we delete the original - * HMM file and rename() this one. That way, the worst - * effect of a catastrophic failure should be that we - * leave a tmp file lying around, but the original HMM - * file remains uncorrupted. tmpnam() doesn't work portably here, - * because it'll put the file in /tmp and we won't - * necessarily be able to rename() it from there. - */ - tmpfile = MallocOrDie(strlen(hmmfile) + 5); - strcpy(tmpfile, hmmfile); - strcat(tmpfile, ".xxx"); /* could be more inventive here... */ - if (FileExists(tmpfile)) - Die("temporary file %s already exists; please delete it first", tmpfile); - if (hmmfp->is_binary) mode = "wb"; - else mode = "w"; - - /*********************************************** - * Show the banner - ***********************************************/ - - Banner(stdout, banner); - printf("HMM file: %s\n", hmmfile); - if (fixedlen) - printf("Length fixed to: %d\n", fixedlen); - else { - printf("Length distribution mean: %.0f\n", lenmean); - printf("Length distribution s.d.: %.0f\n", lensd); - } - printf("Number of samples: %d\n", nsample); - printf("random seed: %d\n", seed); - printf("histogram(s) saved to: %s\n", - histfile != NULL ? histfile : "[not saved]"); - if (do_pvm) - printf("PVM: ACTIVE\n"); - else if (num_threads > 0) - printf("POSIX threads: %d\n", num_threads); - printf("- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n\n"); - - /*********************************************** - * Read the HMMs one at a time, and send them off - * in probability form to one of the main loops. - * The main loop functions are responsible for - * synthesizing random sequences and returning - * a score histogram for each HMM. - ***********************************************/ - - nhmm = 0; - mu = MallocOrDie(sizeof(float) * mu_lumpsize); - lambda = MallocOrDie(sizeof(float) * mu_lumpsize); - - while (HMMFileRead(hmmfp, &hmm)) - { - if (hmm == NULL) - Die("HMM file may be corrupt or in incorrect format; parse failed"); - - if (! do_pvm && num_threads == 0) - main_loop_serial(hmm, seed, nsample, lenmean, lensd, fixedlen, - &hist, &max); -#ifdef HMMER_PVM - else if (do_pvm) { - pvm_nslaves = 0; /* solely to silence compiler warnings */ - main_loop_pvm(hmm, seed, nsample, pvm_lumpsize, - lenmean, lensd, fixedlen, - &hist, &max, &extrawatch, &pvm_nslaves); - } -#endif -#ifdef HMMER_THREADS - else if (num_threads > 0) - main_loop_threaded(hmm, seed, nsample, lenmean, lensd, fixedlen, - num_threads, &hist, &max, &extrawatch); -#endif - else - Die("wait. that can't happen. I didn't do anything."); - - - /* Fit an EVD to the observed histogram. - * The TRUE left-censors and fits only the right slope of the histogram. - * The 9999. is an arbitrary high number that means we won't trim - * outliers on the right. - */ - if (! ExtremeValueFitHistogram(hist, TRUE, 9999.)) - Die("fit failed; -n may be set too small?\n"); - - mu[nhmm] = hist->param[EVD_MU]; - lambda[nhmm] = hist->param[EVD_LAMBDA]; - nhmm++; - if (nhmm % 100 == 0) { - mu = ReallocOrDie(mu, sizeof(float) * (nhmm+mu_lumpsize)); - lambda = ReallocOrDie(lambda, sizeof(float) * (nhmm+mu_lumpsize)); - } - - /* Output - */ - printf("HMM : %s\n", hmm->name); - printf("mu : %12f\n", hist->param[EVD_MU]); - printf("lambda : %12f\n", hist->param[EVD_LAMBDA]); - printf("max : %12f\n", max); - printf("//\n"); - - if (hfp != NULL) - { - fprintf(hfp, "HMM: %s\n", hmm->name); - PrintASCIIHistogram(hfp, hist); - fprintf(hfp, "//\n"); - } - - FreeHistogram(hist); - } - SQD_DPRINTF1(("Main body believes it has calibrations for %d HMMs\n", nhmm)); - - /***************************************************************** - * Rewind the HMM file for a second pass. - * Write a temporary HMM file with new mu, lambda values in it - *****************************************************************/ - - HMMFileRewind(hmmfp); - if (FileExists(tmpfile)) - Die("Ouch. Temporary file %s appeared during the run.", tmpfile); - if ((outfp = fopen(tmpfile, mode)) == NULL) - Die("Ouch. Temporary file %s couldn't be opened for writing.", tmpfile); - - for (idx = 0; idx < nhmm; idx++) - { - /* Sanity checks - */ - if (!HMMFileRead(hmmfp, &hmm)) - Die("Ran out of HMMs too early in pass 2"); - if (hmm == NULL) - Die("HMM file %s was corrupted? Parse failed in pass 2", hmmfile); - - /* Put results in HMM - */ - hmm->mu = mu[idx]; - hmm->lambda = lambda[idx]; - hmm->flags |= PLAN7_STATS; - Plan7ComlogAppend(hmm, argc, argv); - - /* Save HMM to tmpfile - */ - if (hmmfp->is_binary) WriteBinHMM(outfp, hmm); - else WriteAscHMM(outfp, hmm); - - FreePlan7(hmm); - } - - /***************************************************************** - * Now, carefully remove original file and replace it - * with the tmpfile. Note the protection from signals; - * we wouldn't want a user to ctrl-C just as we've deleted - * their HMM file but before the new one is moved. - *****************************************************************/ - - HMMFileClose(hmmfp); - if (fclose(outfp) != 0) PANIC; - - if (sigemptyset(&blocksigs) != 0) PANIC; - if (sigaddset(&blocksigs, SIGINT) != 0) PANIC; - if (sigprocmask(SIG_BLOCK, &blocksigs, NULL) != 0) PANIC; - if (remove(hmmfile) != 0) PANIC; - if (rename(tmpfile, hmmfile) != 0) PANIC; - if (sigprocmask(SIG_UNBLOCK, &blocksigs, NULL) != 0) PANIC; - - /*********************************************** - * Exit - ***********************************************/ - - StopwatchStop(&stopwatch); - if (do_pvm > 0) { - printf("PVM processors used: %d\n", pvm_nslaves); - StopwatchInclude(&stopwatch, &extrawatch); - } -#ifdef PTHREAD_TIMES_HACK - else if (num_threads > 0) StopwatchInclude(&stopwatch, &extrawatch); -#endif - - /* StopwatchDisplay(stdout, "CPU Time: ", &stopwatch); */ - - free(mu); - free(lambda); - free(tmpfile); - if (hfp != NULL) fclose(hfp); - SqdClean(); - return 0; -} - -/* Function: main_loop_serial() - * Date: SRE, Tue Aug 18 16:18:28 1998 [St. Louis] - * - * Purpose: Given an HMM and parameters for synthesizing random - * sequences; return a histogram of scores. - * (Serial version) - * - * Args: hmm - an HMM to calibrate. - * seed - random number seed - * nsample - number of seqs to synthesize - * lenmean - mean length of random sequence - * lensd - std dev of random seq length - * fixedlen - if nonzero, override lenmean, always this len - * ret_hist - RETURN: the score histogram - * ret_max - RETURN: highest score seen in simulation - * - * Returns: (void) - * hist is alloc'ed here, and must be free'd by caller. - */ -static void -main_loop_serial(struct plan7_s *hmm, int seed, int nsample, - float lenmean, float lensd, int fixedlen, - struct histogram_s **ret_hist, float *ret_max) -{ - struct histogram_s *hist; - float randomseq[MAXABET]; - float p1; - float max; - char *seq; - char *dsq; - float score; - int sqlen; - int idx; - - /* Initialize. - * We assume we've already set the alphabet (safe, because - * HMM input sets the alphabet). - */ - sre_srandom(seed); - P7Logoddsify(hmm, TRUE); - P7DefaultNullModel(randomseq, &p1); - hist = AllocHistogram(-200, 200, 100); - max = -FLT_MAX; - - for (idx = 0; idx < nsample; idx++) - { - /* choose length of random sequence */ - if (fixedlen) sqlen = fixedlen; - else do sqlen = (int) Gaussrandom(lenmean, lensd); while (sqlen < 1); - /* generate it */ - seq = RandomSequence(Alphabet, randomseq, Alphabet_size, sqlen); - dsq = DigitizeSequence(seq, sqlen); - - if (P7ViterbiSize(sqlen, hmm->M) <= RAMLIMIT) - score = P7Viterbi(dsq, sqlen, hmm, NULL); - else - score = P7SmallViterbi(dsq, sqlen, hmm, NULL); - - AddToHistogram(hist, score); - if (score > max) max = score; - - free(dsq); - free(seq); - } - - *ret_hist = hist; - *ret_max = max; - return; -} - - -#ifdef HMMER_THREADS -/* Function: main_loop_threaded() - * Date: SRE, Wed Dec 1 12:43:09 1999 [St. Louis] - * - * Purpose: Given an HMM and parameters for synthesizing random - * sequences; return a histogram of scores. - * (Threaded version.) - * - * Args: hmm - an HMM to calibrate. - * seed - random number seed - * nsample - number of seqs to synthesize - * lenmean - mean length of random sequence - * lensd - std dev of random seq length - * fixedlen - if nonzero, override lenmean, always this len - * nthreads - number of threads to start - * ret_hist - RETURN: the score histogram - * ret_max - RETURN: highest score seen in simulation - * twatch - RETURN: accumulation of thread times - * - * Returns: (void) - * hist is alloc'ed here, and must be free'd by caller. - */ -static void -main_loop_threaded(struct plan7_s *hmm, int seed, int nsample, - float lenmean, float lensd, int fixedlen, - int nthreads, - struct histogram_s **ret_hist, float *ret_max, - Stopwatch_t *twatch) -{ - struct histogram_s *hist; - float randomseq[MAXABET]; - float p1; - struct workpool_s *wpool; /* pool of worker threads */ - - /* Initialize. - * We assume we've already set the alphabet (safe, because - * HMM input sets the alphabet). - */ - sre_srandom(seed); - P7Logoddsify(hmm, TRUE); - P7DefaultNullModel(randomseq, &p1); - hist = AllocHistogram(-200, 200, 100); - - wpool = workpool_start(hmm, lenmean, lensd, fixedlen, randomseq, nsample, - hist, nthreads); - workpool_stop(wpool); - - *ret_hist = hist; - *ret_max = wpool->max_score; - StopwatchInclude(twatch, &(wpool->watch)); - - workpool_free(wpool); - return; -} - -/***************************************************************** - * POSIX threads implementation. - * API: - * workpool_start() (makes a workpool_s structure. Starts calculations.) - * workpool_stop() (waits for threads to finish.) - * [process histogram] - * workpool_free() (destroys the structure) - * - * Threads: - * worker_thread() (the actual parallelized worker thread). - *****************************************************************/ - -/* Function: workpool_start() - * Date: SRE, Thu Jul 16 11:09:05 1998 [St. Louis] - * - * Purpose: Initialize a workpool_s structure, and return it. - * - * Args: hmm - the HMM to calibrate - * fixedlen - 0, or a fixed length for seqs (bypass of Gaussian) - * lenmean - mean sequence length - * lensd - std. dev. for sequence length - * randomseq- i.i.d. frequencies for residues, 0..Alphabet_size-1 - * nsample - how many seqs to calibrate on - * hist - histogram structure for storing results - * num_threads - how many processors to run on - * - * Returns: ptr to struct workpool_s. - * Caller must wait for threads to finish with workpool_stop(), - * then free the structure with workpool_free(). - */ -static struct workpool_s * -workpool_start(struct plan7_s *hmm, float lenmean, float lensd, int fixedlen, - float *randomseq, int nsample, struct histogram_s *hist, - int num_threads) -{ - struct workpool_s *wpool; - pthread_attr_t attr; - int i; - int rtn; - - wpool = MallocOrDie(sizeof(struct workpool_s)); - wpool->thread = MallocOrDie(num_threads * sizeof(pthread_t)); - wpool->hmm = hmm; - wpool->fixedlen = fixedlen; - wpool->lenmean = lenmean; - wpool->lensd = lensd; - wpool->randomseq = randomseq; - wpool->nsample = nsample; - - wpool->nseq = 0; - wpool->hist = hist; - wpool->max_score = -FLT_MAX; - wpool->num_threads= num_threads; - - StopwatchZero(&(wpool->watch)); - - if ((rtn = pthread_mutex_init(&(wpool->input_lock), NULL)) != 0) - Die("pthread_mutex_init FAILED; %s\n", strerror(rtn)); - if ((rtn = pthread_mutex_init(&(wpool->output_lock), NULL)) != 0) - Die("pthread_mutex_init FAILED; %s\n", strerror(rtn)); - - /* Create slave threads. - * Note the crazy machinations we have to go through to achieve concurrency. - * You'd think that POSIX threads were portable... ha. - * On IRIX 6.5, system scope threads are only available to root, or if - * /etc/capability has been configured specially, so to avoid strange - * permissions errors we can't set PTHREAD_SCOPE_SYSTEM for IRIX. - * On IRIX pre-6.5, we can't get good concurrency, period. As of 6.5, - * SGI provides the nonportable pthread_setconcurrency() call. - * On FreeBSD (3.0 snapshots), the pthread_attr_setscope() call isn't - * even provided, apparently on grounds of "if it doesn't do anything, - * why provide it?" Hello? POSIX compliance, perhaps? - * On Sun Solaris, we need to set system scope to achieve concurrency. - * Linux and DEC Digital UNIX seem to work fine in either process scope - * or system scope, without a pthread_setconcurrency call. - */ - pthread_attr_init(&attr); -#ifndef __sgi -#ifdef HAVE_PTHREAD_ATTR_SETSCOPE - pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM); -#endif -#endif -#ifdef HAVE_PTHREAD_SETCONCURRENCY - pthread_setconcurrency(num_threads+1); -#endif - for (i = 0; i < num_threads; i++) - if ((rtn = pthread_create(&(wpool->thread[i]), &attr, - worker_thread , (void *) wpool)) != 0) - Die("Failed to create thread %d; return code %d\n", i, rtn); - - pthread_attr_destroy(&attr); - - return wpool; -} - -/* Function: workpool_stop() - * Date: SRE, Thu Jul 16 11:20:16 1998 [St. Louis] - * - * Purpose: Waits for threads in a workpool to finish. - * - * Args: wpool -- ptr to the workpool structure - * - * Returns: (void) - */ -static void -workpool_stop(struct workpool_s *wpool) -{ - int i; - /* wait for threads to stop */ - for (i = 0; i < wpool->num_threads; i++) - if (pthread_join(wpool->thread[i],NULL) != 0) - Die("pthread_join failed"); - return; -} - -/* Function: workpool_free() - * Date: SRE, Thu Jul 16 11:26:27 1998 [St. Louis] - * - * Purpose: Free a workpool_s structure, after the threads - * have finished. - * - * Args: wpool -- ptr to the workpool. - * - * Returns: (void) - */ -static void -workpool_free(struct workpool_s *wpool) -{ - free(wpool->thread); - free(wpool); - return; -} - -/* Function: worker_thread() - * Date: SRE, Thu Jul 16 10:41:02 1998 [St. Louis] - * - * Purpose: The procedure executed by the worker threads. - * - * Args: ptr - (void *) that is recast to a pointer to - * the workpool. - * - * Returns: (void *) - */ -void * -worker_thread(void *ptr) -{ - struct plan7_s *hmm; - struct workpool_s *wpool; - char *seq; - char *dsq; - int len; - float sc; - int rtn; - Stopwatch_t thread_watch; - - StopwatchStart(&thread_watch); - wpool = (struct workpool_s *) ptr; - hmm = wpool->hmm; - for (;;) - { - /* 1. Synthesize a random sequence. - * The input sequence number is a shared resource, - * and sre_random() isn't thread-safe, so protect - * the whole section with mutex. - */ - /* acquire a lock */ - if ((rtn = pthread_mutex_lock(&(wpool->input_lock))) != 0) - Die("pthread_mutex_lock failure: %s\n", strerror(rtn)); - /* generate a sequence */ - wpool->nseq++; - if (wpool->nseq > wpool->nsample) - { /* we're done; release input lock, break loop */ - if ((rtn = pthread_mutex_unlock(&(wpool->input_lock))) != 0) - Die("pthread_mutex_unlock failure: %s\n", strerror(rtn)); - break; - } - if (wpool->fixedlen) len = wpool->fixedlen; - else do len = (int) Gaussrandom(wpool->lenmean, wpool->lensd); while (len < 1); - seq = RandomSequence(Alphabet, wpool->randomseq, Alphabet_size, len); - - /* release the lock */ - if ((rtn = pthread_mutex_unlock(&(wpool->input_lock))) != 0) - Die("pthread_mutex_unlock failure: %s\n", strerror(rtn)); - - /* 2. Score the sequence against the model. - */ - dsq = DigitizeSequence(seq, len); - - if (P7ViterbiSize(len, hmm->M) <= RAMLIMIT) - sc = P7Viterbi(dsq, len, hmm, NULL); - else - sc = P7SmallViterbi(dsq, len, hmm, NULL); - free(dsq); - free(seq); - - /* 3. Save the output; hist and max_score are shared, - * so protect this section with the output mutex. - */ - /* acquire lock on the output queue */ - if ((rtn = pthread_mutex_lock(&(wpool->output_lock))) != 0) - Die("pthread_mutex_lock failure: %s\n", strerror(rtn)); - /* save output */ - AddToHistogram(wpool->hist, sc); - if (sc > wpool->max_score) wpool->max_score = sc; - /* release our lock */ - if ((rtn = pthread_mutex_unlock(&(wpool->output_lock))) != 0) - Die("pthread_mutex_unlock failure: %s\n", strerror(rtn)); - } - - StopwatchStop(&thread_watch); - /* acquire lock on the output queue */ - if ((rtn = pthread_mutex_lock(&(wpool->output_lock))) != 0) - Die("pthread_mutex_lock failure: %s\n", strerror(rtn)); - /* accumulate cpu time into main stopwatch */ - StopwatchInclude(&(wpool->watch), &thread_watch); - /* release our lock */ - if ((rtn = pthread_mutex_unlock(&(wpool->output_lock))) != 0) - Die("pthread_mutex_unlock failure: %s\n", strerror(rtn)); - - pthread_exit(NULL); - return NULL; /* solely to silence compiler warnings */ -} -#endif /* HMMER_THREADS */ - - - -#ifdef HMMER_PVM -/* Function: main_loop_pvm() - * Date: SRE, Wed Aug 19 13:59:54 1998 [St. Louis] - * - * Purpose: Given an HMM and parameters for synthesizing random - * sequences; return a histogram of scores. - * (PVM version) - * - * Args: hmm - an HMM to calibrate. - * seed - random number seed - * nsample - number of seqs to synthesize - * lumpsize- # of seqs per slave exchange; controls granularity - * lenmean - mean length of random sequence - * lensd - std dev of random seq length - * fixedlen- if nonzero, override lenmean, always this len - * hist - RETURN: the score histogram - * ret_max - RETURN: highest score seen in simulation - * extrawatch - RETURN: total CPU time spend in slaves. - * ret_nslaves- RETURN: number of PVM slaves run. - * - * Returns: (void) - * hist is alloc'ed here, and must be free'd by caller. - */ -static void -main_loop_pvm(struct plan7_s *hmm, int seed, int nsample, int lumpsize, - float lenmean, float lensd, int fixedlen, - struct histogram_s **ret_hist, float *ret_max, - Stopwatch_t *extrawatch, int *ret_nslaves) -{ - struct histogram_s *hist; - int master_tid; - int *slave_tid; - int nslaves; - int nsent; /* # of seqs we've asked for so far */ - int ndone; /* # of seqs we've got results for so far */ - int packet; /* # of seqs to have a slave do */ - float max; - int slaveidx; /* id of a slave */ - float *sc; /* scores returned by a slave */ - Stopwatch_t slavewatch; - int i; - - StopwatchZero(extrawatch); - hist = AllocHistogram(-200, 200, 100); - max = -FLT_MAX; - - /* Initialize PVM - */ - if ((master_tid = pvm_mytid()) < 0) - Die("pvmd not responding -- do you have PVM running?"); -#if DEBUGLEVEL >= 1 - pvm_catchout(stderr); /* catch output for debugging */ -#endif - PVMSpawnSlaves("hmmcalibrate-pvm", &slave_tid, &nslaves); - - /* Initialize the slaves - */ - pvm_initsend(PvmDataDefault); - pvm_pkfloat(&lenmean, 1, 1); - pvm_pkfloat(&lensd, 1, 1); - pvm_pkint( &fixedlen, 1, 1); - pvm_pkint( &Alphabet_type, 1, 1); - pvm_pkint( &seed, 1, 1); - if (! PVMPackHMM(hmm)) Die("Failed to pack the HMM"); - pvm_mcast(slave_tid, nslaves, HMMPVM_INIT); - SQD_DPRINTF1(("Initialized %d slaves\n", nslaves)); - - /* Confirm slaves' OK status. - */ - PVMConfirmSlaves(slave_tid, nslaves); - SQD_DPRINTF1(("Slaves confirm that they're ok...\n")); - - /* Load the slaves - */ - nsent = ndone = 0; - for (slaveidx = 0; slaveidx < nslaves; slaveidx++) - { - packet = (nsample - nsent > lumpsize ? lumpsize : nsample - nsent); - - pvm_initsend(PvmDataDefault); - pvm_pkint(&packet, 1, 1); - pvm_pkint(&slaveidx, 1, 1); - pvm_send(slave_tid[slaveidx], HMMPVM_WORK); - nsent += packet; - } - SQD_DPRINTF1(("Loaded %d slaves\n", nslaves)); - - /* Receive/send loop - */ - sc = MallocOrDie(sizeof(float) * lumpsize); - while (nsent < nsample) - { - /* integrity check of slaves */ - PVMCheckSlaves(slave_tid, nslaves); - - /* receive results */ - SQD_DPRINTF2(("Waiting for results...\n")); - pvm_recv(-1, HMMPVM_RESULTS); - pvm_upkint(&slaveidx, 1, 1); - pvm_upkint(&packet, 1, 1); - pvm_upkfloat(sc, packet, 1); - SQD_DPRINTF2(("Got results.\n")); - ndone += packet; - - /* store results */ - for (i = 0; i < packet; i++) { - AddToHistogram(hist, sc[i]); - if (sc[i] > max) max = sc[i]; - } - /* send new work */ - packet = (nsample - nsent > lumpsize ? lumpsize : nsample - nsent); - - pvm_initsend(PvmDataDefault); - pvm_pkint(&packet, 1, 1); - pvm_pkint(&slaveidx, 1, 1); - pvm_send(slave_tid[slaveidx], HMMPVM_WORK); - SQD_DPRINTF2(("Told slave %d to do %d more seqs.\n", slaveidx, packet)); - nsent += packet; - } - - /* Wait for the last output to come in. - */ - while (ndone < nsample) - { - /* integrity check of slaves */ - PVMCheckSlaves(slave_tid, nslaves); - - /* receive results */ - SQD_DPRINTF1(("Waiting for final results...\n")); - pvm_recv(-1, HMMPVM_RESULTS); - pvm_upkint(&slaveidx, 1, 1); - pvm_upkint(&packet, 1, 1); - pvm_upkfloat(sc, packet, 1); - SQD_DPRINTF2(("Got some final results.\n")); - ndone += packet; - /* store results */ - for (i = 0; i < packet; i++) { - AddToHistogram(hist, sc[i]); - if (sc[i] > max) max = sc[i]; - } - } - - /* Shut down the slaves: send -1,-1,-1. - */ - pvm_initsend(PvmDataDefault); - packet = -1; - pvm_pkint(&packet, 1, 1); - pvm_pkint(&packet, 1, 1); - pvm_pkint(&packet, 1, 1); - pvm_mcast(slave_tid, nslaves, HMMPVM_WORK); - - /* Collect stopwatch results; quit the VM; return. - */ - for (i = 0; i < nslaves; i++) - { - pvm_recv(-1, HMMPVM_RESULTS); - pvm_upkint(&slaveidx, 1, 1); - StopwatchPVMUnpack(&slavewatch); - - SQD_DPRINTF1(("Slave %d finished; says it used %.2f cpu, %.2f sys\n", - slaveidx, slavewatch.user, slavewatch.sys)); - - StopwatchInclude(extrawatch, &slavewatch); - } - - free(slave_tid); - free(sc); - pvm_exit(); - *ret_hist = hist; - *ret_max = max; - *ret_nslaves = nslaves; - return; -} -#endif /* HMMER_PVM */ - - - diff --git a/forester/archive/RIO/others/hmmer/src/hmmconvert.c b/forester/archive/RIO/others/hmmer/src/hmmconvert.c deleted file mode 100644 index 6a9ea06..0000000 --- a/forester/archive/RIO/others/hmmer/src/hmmconvert.c +++ /dev/null @@ -1,209 +0,0 @@ -/************************************************************ - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - ************************************************************/ - -/* hmmconvert.c - * SRE, Thu Oct 30 08:56:22 1997; St. Louis - * - * main() for converting between HMM file formats, and - * for converting HMMs to other software formats like GCG profiles. - * - * CVS $Id: hmmconvert.c,v 1.1.1.1 2005/03/22 08:33:58 cmzmasek Exp $ - */ - -#include -#include - -#include "structs.h" /* data structures, macros, #define's */ -#include "config.h" /* compile-time configuration constants */ -#include "funcs.h" /* function declarations */ -#include "globals.h" /* alphabet global variables */ -#include "squid.h" /* general sequence analysis library */ - -static char banner[] = "hmmconvert - convert between profile HMM file formats"; - -static char usage[] = "\ -Usage: hmmconvert [-options] \n\ - Available options are:\n\ - -h : help; print brief help on version and usage\n\ -\n\ - -a : convert to HMMER ASCII file (the default)\n\ - -b : convert to HMMER binary file\n\ - -p : convert to GCG Profile .prf format\n\ - -P : convert to Compugen extended .eprf profile format\n\ -\n\ - -A : append mode; append to \n\ - -F : force mode; allow overwriting of existing files\n\ -"; - -static char experts[] = "\ -\n"; - - -static struct opt_s OPTIONS[] = { - { "-a", TRUE, sqdARG_NONE }, - { "-b", TRUE, sqdARG_NONE }, - { "-h", TRUE, sqdARG_NONE }, - { "-p", TRUE, sqdARG_NONE }, - { "-A", TRUE, sqdARG_NONE }, - { "-F", TRUE, sqdARG_NONE }, - { "-P", TRUE, sqdARG_NONE }, -}; -#define NOPTIONS (sizeof(OPTIONS) / sizeof(struct opt_s)) - -int -main(int argc, char **argv) -{ - char *infile; /* name of input HMM file */ - char *outfile; /* name of output HMM file */ - HMMFILE *infp; /* input HMM file ptr */ - FILE *outfp; /* output HMM file ptr */ - char *mode; /* mode to open file in */ - struct plan7_s *hmm; /* a profile HMM structure */ - int nhmm; /* number of HMMs converted */ - - char *optname; /* name of option found by Getopt() */ - char *optarg; /* argument found by Getopt() */ - int optind; /* index in argv[] */ - - int do_append; /* TRUE to append to existing outfile */ - int do_force; /* TRUE to allow overwriting */ - enum hmmfmt_e { P7ASCII, P7BINARY, GCGPROFILE, BICPROFILE } - outfmt; /* output format */ - - /*********************************************** - * Parse command line - ***********************************************/ - - outfmt = P7ASCII; - do_append = FALSE; - do_force = FALSE; - - while (Getopt(argc, argv, OPTIONS, NOPTIONS, usage, - &optind, &optname, &optarg)) { - if (strcmp(optname, "-a") == 0) { outfmt = P7ASCII; } - else if (strcmp(optname, "-b") == 0) { outfmt = P7BINARY; } - else if (strcmp(optname, "-p") == 0) { outfmt = GCGPROFILE; } - else if (strcmp(optname, "-A") == 0) { do_append = TRUE; } - else if (strcmp(optname, "-F") == 0) { do_force = TRUE; } - else if (strcmp(optname, "-P") == 0) { outfmt = BICPROFILE; } - else if (strcmp(optname, "-h") == 0) { - Banner(stdout, banner); - puts(usage); - puts(experts); - exit(0); - } - } - if (argc - optind != 2) - Die("Incorrect number of arguments.\n%s\n", usage); - - infile = argv[optind++]; - outfile = argv[optind++]; - - /*********************************************** - * Open input HMM database (might be in HMMERDB or current directory) - ***********************************************/ - - if ((infp = HMMFileOpen(infile, "HMMERDB")) == NULL) - Die("Failed to open HMM database %s\n%s", infile, usage); - - /*********************************************** - * Open output HMM file - ***********************************************/ - - if (do_append) - { /* If we're appending to a file, it needs to be Plan7 format */ - HMMFILE *test; - - if (FileExists(outfile)) { - test = HMMFileOpen(outfile, NULL); - if (test == NULL) - Die("%s not an HMM file; I refuse to append to it; using stdout instead", - outfile); - - /* bug #14 fix. 12/24/00, xref STL3 p.133. */ - if (test->is_binary && outfmt != P7BINARY) - Die("File %s is in Plan 7 binary format; must append the same fmt.", outfile); - else if (! test->is_binary && outfmt != P7ASCII) - Die("File %s is in Plan 7 ASCII format; must append the same fmt.", outfile); - - HMMFileClose(test); - } - switch (outfmt) { - case P7ASCII: mode = "a"; break; - case P7BINARY: mode = "ab"; break; - case GCGPROFILE: Die("You cannot append GCG profiles"); - case BICPROFILE: Die("You cannot append Compugen extended profiles"); - default: Die("unexpected format"); - } - } - else - { /* else, we're writing a new file */ - if (! do_force && FileExists(outfile)) - Die("Output HMM file %s already exists. Please rename or delete it.", outfile); - switch (outfmt) { - case P7ASCII: mode = "w"; break; - case P7BINARY: mode = "wb"; break; - case GCGPROFILE: mode = "w"; break; - case BICPROFILE: mode = "w"; break; - default: Die("unexpected format"); - } - } - if ((outfp = fopen(outfile, mode)) == NULL) - Die("Failed to open output file %s for writing", outfile); - - /*********************************************** - * Show the banner - ***********************************************/ - - Banner(stdout, banner); - printf( "Input HMM file: %s\n", infile); - printf( "Output HMM file: %s\n", outfile); - printf( "Converting to: "); - switch (outfmt) { - case P7ASCII: puts("HMMER Plan7 ASCII"); break; - case P7BINARY: puts("HMMER Plan7 binary"); break; - case GCGPROFILE: puts("GCG Profile .prf"); break; - case BICPROFILE: puts("Compugen .eprf profile"); break; - default: Die("unexpected fault"); - } - printf("- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n\n"); - - /*********************************************** - * Do the conversion - ***********************************************/ - - nhmm = 0; - while (HMMFileRead(infp, &hmm)) { - if (hmm == NULL) - Die("HMM file %s may be corrupt or in incorrect format; parse failed", infile); - - switch(outfmt) { - case P7ASCII: WriteAscHMM(outfp, hmm); break; - case P7BINARY: WriteBinHMM(outfp, hmm); break; - case GCGPROFILE: WriteProfile(outfp, hmm, FALSE); break; - case BICPROFILE: WriteProfile(outfp, hmm, TRUE); break; - default: Die("unexpected format"); - } - - printf(" - converted %s\n", hmm->name); - FreePlan7(hmm); - nhmm++; - } - printf("\n%d HMM(s) converted and written to %s\n", nhmm, outfile); - - /*********************************************** - * Clean-up and exit. - ***********************************************/ - - HMMFileClose(infp); - fclose(outfp); - SqdClean(); - return EXIT_SUCCESS; -} diff --git a/forester/archive/RIO/others/hmmer/src/hmmemit.c b/forester/archive/RIO/others/hmmer/src/hmmemit.c deleted file mode 100644 index 857b61c..0000000 --- a/forester/archive/RIO/others/hmmer/src/hmmemit.c +++ /dev/null @@ -1,267 +0,0 @@ -/************************************************************ - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - ************************************************************/ - -/* hmmemit.c - * SRE, Sun Mar 8 14:11:24 1998 [St. Louis] - * - * main() for generating sequences from an HMM - * CVS $Id: hmmemit.c,v 1.1.1.1 2005/03/22 08:34:09 cmzmasek Exp $ - */ - -#include -#include -#include - -#include "structs.h" /* data structures, macros, #define's */ -#include "config.h" /* compile-time configuration constants */ -#include "funcs.h" /* function declarations */ -#include "globals.h" /* alphabet global variables */ -#include "squid.h" /* general sequence analysis library */ -#include "msa.h" /* squid's multiple sequence i/o */ - -static char banner[] = "hmmemit - generate sequences from a profile HMM"; - -static char usage[] = "\ -Usage: hmmemit [-options] \n\ -Available options are:\n\ - -a : write generated sequences as an alignment, not FASTA\n\ - -c : generate a single \"consensus\" sequence\n\ - -h : help; print brief help on version and usage\n\ - -n : emit sequences (default 10)\n\ - -o : save sequences in file \n\ - -q : quiet - suppress verbose banner\n\ -"; - -static char experts[] = "\ - --seed : set random number seed to \n\ -"; - -static struct opt_s OPTIONS[] = { - { "-a", TRUE, sqdARG_NONE }, - { "-c", TRUE, sqdARG_NONE }, - { "-h", TRUE, sqdARG_NONE }, - { "-n", TRUE, sqdARG_INT}, - { "-o", TRUE, sqdARG_STRING}, - { "-q", TRUE, sqdARG_NONE}, - { "--seed", FALSE, sqdARG_INT}, -}; -#define NOPTIONS (sizeof(OPTIONS) / sizeof(struct opt_s)) - -int -main(int argc, char **argv) -{ - char *hmmfile; /* file to read HMMs from */ - HMMFILE *hmmfp; /* opened hmmfile for reading */ - struct plan7_s *hmm; /* HMM to generate from */ - FILE *fp; /* output file handle */ - int L; /* length of a sequence */ - int i; /* counter over sequences */ - int nhmm; /* counter over HMMs */ - - char *ofile; /* output sequence file */ - int nseq; /* number of seqs to sample */ - int seed; /* random number generator seed */ - int be_quiet; /* TRUE to silence header/footer */ - int do_alignment;/* TRUE to output in aligned format */ - int do_consensus;/* TRUE to do a single consensus seq */ - - char *optname; /* name of option found by Getopt() */ - char *optarg; /* argument found by Getopt() */ - int optind; /* index in argv[] */ - - /*********************************************** - * Parse command line - ***********************************************/ - - nseq = 10; - seed = time ((time_t *) NULL); - be_quiet = FALSE; - do_alignment = FALSE; - do_consensus = FALSE; - ofile = NULL; - - while (Getopt(argc, argv, OPTIONS, NOPTIONS, usage, - &optind, &optname, &optarg)) { - if (strcmp(optname, "-a") == 0) do_alignment = TRUE; - else if (strcmp(optname, "-c") == 0) do_consensus = TRUE; - else if (strcmp(optname, "-n") == 0) nseq = atoi(optarg); - else if (strcmp(optname, "-o") == 0) ofile = optarg; - else if (strcmp(optname, "-q") == 0) be_quiet = TRUE; - else if (strcmp(optname, "--seed") == 0) seed = atoi(optarg); - else if (strcmp(optname, "-h") == 0) - { - Banner(stdout, banner); - puts(usage); - puts(experts); - exit(0); - } - } - if (argc - optind != 1) - Die("Incorrect number of arguments.\n%s\n", usage); - - hmmfile = argv[optind++]; - - sre_srandom(seed); - - if (do_alignment && do_consensus) - Die("Sorry, -a and -c are incompatible.\nUsage:\n%s", usage); - if (nseq != 10 && do_consensus) - Warn("-c (consensus) overrides -n (# of sampled seqs)"); - - /*********************************************** - * Open HMM file (might be in HMMERDB or current directory). - * Open output file, if needed. - ***********************************************/ - - if ((hmmfp = HMMFileOpen(hmmfile, "HMMERDB")) == NULL) - Die("Failed to open HMM file %s\n%s", hmmfile, usage); - - if (ofile == NULL) fp = stdout; - else { - if ((fp = fopen(ofile, "w")) == NULL) - Die("Failed to open output file %s for writing", ofile); - } - - /*********************************************** - * Show the options banner - ***********************************************/ - - if (! be_quiet) - { - Banner(stdout, banner); - printf("HMM file: %s\n", hmmfile); - if (! do_consensus) { - printf("Number of seqs: %d\n", nseq); - printf("Random seed: %d\n", seed); - } else { - printf("Generating consensus sequence.\n"); - } - printf("- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n\n"); - } - - /*********************************************** - * For every HMM in the file, do some emission. - ***********************************************/ - - nhmm = 0; - while (HMMFileRead(hmmfp, &hmm)) { - if (hmm == NULL) - Die("HMM file %s corrupt or in incorrect format? Parse failed", hmmfile); - - /* Configure the HMM to shut off N,J,C emission: so we - * do a simple single pass through the model. - */ - Plan7NakedConfig(hmm); - Plan7Renormalize(hmm); - - /*********************************************** - * Do the work. - * If we're generating an alignment, we have to collect - * all our traces, then output. If we're generating unaligned - * sequences, we can emit one at a time. - ***********************************************/ - - if (do_consensus) - { - char *seq; - SQINFO sqinfo; /* info about sequence (name/desc) */ - - EmitConsensusSequence(hmm, &seq, NULL, &L, NULL); - strcpy(sqinfo.name, hmm->name); - strcpy(sqinfo.desc, "profile HMM generated consensus sequence [hmmemit]"); - - sqinfo.len = L; - sqinfo.flags = SQINFO_NAME | SQINFO_DESC | SQINFO_LEN; - - WriteSeq(fp, SQFILE_FASTA, seq, &sqinfo); - free(seq); - } - else if (do_alignment) - { - struct p7trace_s **tr; /* traces for aligned sequences */ - char **dsq; /* digitized sequences */ - SQINFO *sqinfo; /* info about sequences (name/desc) */ - MSA *msa; /* alignment */ - float *wgt; - - dsq = MallocOrDie(sizeof(char *) * nseq); - tr = MallocOrDie(sizeof(struct p7trace_s *) * nseq); - sqinfo = MallocOrDie(sizeof(SQINFO) * nseq); - wgt = MallocOrDie(sizeof(float) * nseq); - FSet(wgt, nseq, 1.0); - - for (i = 0; i < nseq; i++) - { - EmitSequence(hmm, &(dsq[i]), &L, &(tr[i])); - sprintf(sqinfo[i].name, "seq%d", i+1); - sqinfo[i].len = L; - sqinfo[i].flags = SQINFO_NAME | SQINFO_LEN; - } - - msa = P7Traces2Alignment(dsq, sqinfo, wgt, nseq, hmm->M, tr, FALSE); - msa->name = sre_strdup(hmm->name, -1); - msa->desc = sre_strdup("Synthetic sequence alignment generated by hmmemit", -1); - - /* Output the alignment */ - WriteStockholm(fp, msa); - - /* Free memory - */ - for (i = 0; i < nseq; i++) - { - P7FreeTrace(tr[i]); - free(dsq[i]); - } - MSAFree(msa); - free(sqinfo); - free(dsq); - free(wgt); - free(tr); - } - else /* unaligned sequence output */ - { - struct p7trace_s *tr; /* generated trace */ - char *dsq; /* digitized sequence */ - char *seq; /* alphabetic sequence */ - SQINFO sqinfo; /* info about sequence (name/len) */ - - for (i = 0; i < nseq; i++) - { - EmitSequence(hmm, &dsq, &L, &tr); - sprintf(sqinfo.name, "%s-%d", hmm->name, i+1); - sqinfo.len = L; - sqinfo.flags = SQINFO_NAME | SQINFO_LEN; - - seq = DedigitizeSequence(dsq, L); - - WriteSeq(fp, SQFILE_FASTA, seq, &sqinfo); - - P7FreeTrace(tr); - free(dsq); - free(seq); - } - } - nhmm++; - FreePlan7(hmm); - } - - /* We're done; clean up and exit. - */ - if (nhmm == 0) - Die("Failed to read any HMMs from %s\n", hmmfile); - if (ofile != NULL) { - fclose(fp); - if (!be_quiet) printf("Output saved in file %s\n", ofile); - } - HMMFileClose(hmmfp); - SqdClean(); - return 0; -} - diff --git a/forester/archive/RIO/others/hmmer/src/hmmfetch.c b/forester/archive/RIO/others/hmmer/src/hmmfetch.c deleted file mode 100644 index 3f06f7c..0000000 --- a/forester/archive/RIO/others/hmmer/src/hmmfetch.c +++ /dev/null @@ -1,130 +0,0 @@ -/************************************************************ - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - ************************************************************/ - -/* hmmfetch.c - * SRE, Wed Aug 5 14:26:51 1998 [St. Louis] - * - * Recover a specific HMM file from an HMM database, using - * an SSI index (created with hmmindex). - * - * CVS $Id: hmmfetch.c,v 1.1.1.1 2005/03/22 08:34:14 cmzmasek Exp $ - */ - -#include -#include -#include - -#include "squid.h" -#include "config.h" -#include "structs.h" -#include "funcs.h" -#include "version.h" - -#include "globals.h" - -static char banner[] = "hmmfetch -- retrieve specific HMM from an HMM database"; - -static char usage[] = "\ -Usage: hmmfetch [-options] \n\ -Available options are:\n\ - -h : print short usage and version info, then exit\n\ - -n : interpret instead as an HMM number\n\ -"; - -static char experts[] = "\ -"; - -static struct opt_s OPTIONS[] = { - { "-h", TRUE, sqdARG_NONE }, - { "-n", TRUE, sqdARG_NONE }, -}; -#define NOPTIONS (sizeof(OPTIONS) / sizeof(struct opt_s)) - - -int -main(int argc, char **argv) -{ - char *hmmfile; /* HMM file to open */ - char *key; /* HMM name to retrieve */ - HMMFILE *hmmfp; /* opened hmm file pointer */ - struct plan7_s *hmm; /* a hidden Markov model */ - - char *optname; /* name of option found by Getopt() */ - char *optarg; /* argument found by Getopt() */ - int optind; /* index in argv[] */ - - int by_number; /* fetch by number, not name */ - int nhmm; /* hmm number */ - - /*********************************************** - * Parse the command line - ***********************************************/ - - by_number = FALSE; - - while (Getopt(argc, argv, OPTIONS, NOPTIONS, usage, - &optind, &optname, &optarg)) - { - if (strcmp(optname, "-n") == 0) by_number = TRUE; - else if (strcmp(optname, "-h") == 0) - { - Banner(stdout, banner); - puts(usage); - puts(experts); - exit(0); - } - } - - if (argc - optind != 2) Die("Incorrect number of arguments.\n%s\n", usage); - hmmfile = argv[optind++]; - key = argv[optind++]; - - /*********************************************** - * Open HMM file, make sure SSI index exists - ***********************************************/ - - if ((hmmfp = HMMFileOpen(hmmfile, "HMMERDB")) == NULL) - Die("failed to open HMM file %s for reading.", hmmfile); - if (hmmfp->ssi == NULL) - Die("There is no SSI index for %s; you need to use hmmindex on it.", hmmfile); - - /*********************************************** - * find key in hmmfile; get HMM; show as ASCII - ***********************************************/ - - if (by_number) { - if (! IsInt(key)) Die("%s does not appear to be a number.", key); - nhmm = atoi(key); - if (! HMMFilePositionByIndex(hmmfp, nhmm)) - Die("failed to position %s to HMM #%d", hmmfile, nhmm); - } else { - if (! HMMFilePositionByName(hmmfp, key)) - Die("No such hmm %s in HMM file %s\n", key, hmmfile); - } - - if (! HMMFileRead(hmmfp, &hmm)) - Die("Unexpected end of HMM file"); - if (hmm == NULL) - Die("HMM file %s may be corrupt or in incorrect format; parse failed", hmmfile); - - WriteAscHMM(stdout, hmm); - - FreePlan7(hmm); - HMMFileClose(hmmfp); - - /*********************************************** - * Exit - ***********************************************/ - - SqdClean(); - return 0; -} - - diff --git a/forester/archive/RIO/others/hmmer/src/hmmindex.c b/forester/archive/RIO/others/hmmer/src/hmmindex.c deleted file mode 100644 index 8323687..0000000 --- a/forester/archive/RIO/others/hmmer/src/hmmindex.c +++ /dev/null @@ -1,166 +0,0 @@ -/************************************************************ - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - ************************************************************/ - -/* hmmindex.c - * SRE, Wed Aug 5 11:05:03 1998 [St. Louis] - * - * Create an SSI index file for an HMM database. - * - * CVS $Id: hmmindex.c,v 1.1.1.1 2005/03/22 08:34:03 cmzmasek Exp $ - */ - -#include -#include -#include - -#include "squid.h" -#include "config.h" -#include "structs.h" -#include "funcs.h" -#include "version.h" -#include "globals.h" -#include "ssi.h" - -static char banner[] = "hmmindex -- create SSI index for an HMM database"; - -static char usage[] = "\ -Usage: hmmindex [-options] \n\ -Available options are:\n\ - -h : print short usage and version info, then exit\n\ -"; - -static char experts[] = "\ -"; - -static struct opt_s OPTIONS[] = { - { "-h", TRUE, sqdARG_NONE }, -}; -#define NOPTIONS (sizeof(OPTIONS) / sizeof(struct opt_s)) - -int -main(int argc, char **argv) -{ - char *hmmfile; /* HMM file to open */ - SSIINDEX *ssi; /* SSI index in memory */ - char *ssifile; /* name of SSI index on disk */ - HMMFILE *hmmfp; /* opened hmm file pointer */ - struct plan7_s *hmm; /* a hidden Markov model */ - int idx, nhmm; /* counter over HMMs */ - int npri, nsec; /* # of names, accessions */ - int fh; /* file handle */ - int status; /* return status from SSI call */ - - char *optname; /* name of option found by Getopt() */ - char *optarg; /* argument found by Getopt() */ - int optind; /* index in argv[] */ - - /*********************************************** - * Parse the command line - ***********************************************/ - - while (Getopt(argc, argv, OPTIONS, NOPTIONS, usage, - &optind, &optname, &optarg)) - { - if (strcmp(optname, "-h") == 0) - { - Banner(stdout, banner); - puts(usage); - puts(experts); - exit(0); - } - } - - if (argc - optind != 1) Die("Incorrect number of arguments.\n%s\n", usage); - hmmfile = argv[optind++]; - - /*********************************************** - * Open our input HMM file, make sure all is well with the output SSI filename - ***********************************************/ - - if ((hmmfp = HMMFileOpen(hmmfile, NULL)) == NULL) - Die("failed to open HMM file %s for reading.", hmmfile); - if (hmmfp->ssi != NULL) - Die("SSI index already exists for %s.\nPlease delete it first.", hmmfile); - - ssifile = MallocOrDie(strlen(hmmfile) + 5); - sprintf(ssifile, "%s%s", hmmfile, ".ssi"); - if (FileExists(ssifile)) /* shouldn't happen */ - Die("An SSI file %s already exists; please delete it first", ssifile); - - if ((ssi = SSICreateIndex(hmmfp->mode)) == NULL) - Die("Failed to initialize the SSI index structure"); - if (SSIAddFileToIndex(ssi, hmmfile, hmmfp->is_binary, &fh) != 0) - Die("SSIAddFileToIndex() failed"); - - /*********************************************** - * Show the banner - ***********************************************/ - - Banner(stdout, banner); - printf("HMM file: %s\n", hmmfile); - if (hmmfp->mode == SSI_OFFSET_I64) - printf("Index file mode: 64-bit (large HMM file)\n"); - printf("- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n\n"); - - /*********************************************** - * Get offsets and names for every model; store in keylist - ***********************************************/ - - printf("Determining offsets for %s, please be patient...\n", hmmfile); - - nhmm = npri = nsec = 0; - while (HMMFileRead(hmmfp, &hmm)) - { - if (hmm == NULL) - Die("HMM file %s may be corrupt or in incorrect format; parse failed", hmmfile); - - /* record name of HMM as the primary retrieval key */ - status = SSIAddPrimaryKeyToIndex(ssi, hmm->name, fh, &(hmmfp->offset), NULL, 0); - if (status != 0) Die("SSIAddPrimaryKeyToIndex() failed"); - npri++; - - /* record accession of HMM as a secondary retrieval key */ - if (hmm->flags & PLAN7_ACC) { - status = SSIAddSecondaryKeyToIndex(ssi, hmm->acc, hmm->name); - if (status != 0) Die("SSIAddSecondaryKeyToIndex() failed"); - nsec++; - } - - nhmm++; - FreePlan7(hmm); - } - HMMFileClose(hmmfp); - - /*********************************************** - * Output the SSI file - ***********************************************/ - - status = SSIWriteIndex(ssifile, ssi); - if (status != 0) Die("SSIWriteIndex() failed"); - - printf("Complete.\n"); - printf("HMM file: %s\n", hmmfile); - printf("SSI index: %s\n", ssifile); - printf("# of HMMS: %d\n", nhmm); - printf("HMM names: %d\n", npri); - printf("HMM accessions: %d\n", nsec); - - - /*********************************************** - * Exit - ***********************************************/ - - free(ssifile); - SSIFreeIndex(ssi); - SqdClean(); - return 0; -} - - diff --git a/forester/archive/RIO/others/hmmer/src/hmmio.c b/forester/archive/RIO/others/hmmer/src/hmmio.c deleted file mode 100644 index f2d6da5..0000000 --- a/forester/archive/RIO/others/hmmer/src/hmmio.c +++ /dev/null @@ -1,1744 +0,0 @@ -/************************************************************ - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - ************************************************************/ - -/* hmmio.c - * - * Input/output of HMMs. - * - * As of HMMER 2.0, HMMs are saved by default in a tabular ASCII format - * as log-odds or log probabilities scaled to an integer. A binary save - * file format is also available which is faster to access (a - * consideration which might be important for HMM library applications). - * HMMs can be concatenated into HMM libraries. - * - * A comment on loss of accuracy. Storing a number as a scaled log - * probability guarantees us an error of about 0.035% or - * less in the retrieved probability. We are relatively invulnerable - * to the truncation errors which HMMER 1.8 was vulnerable to. - * - * Magic numbers (both for the ASCII and binary save formats) are used - * to label save files with a major version number. This simplifies the task of - * backwards compatibility as new versions of the program are created. - * Reverse but not forward compatibility is guaranteed. I.e. HMMER 2.0 - * can read `1.7' save files, but not vice versa. Note that the major - * version number in the save files is NOT the version of the software - * that generated it; rather, the number of the last major version in which - * save format changed. - * - ****************************************************************** - * - * The HMM input API: - * - * HMMFILE *hmmfp; - * char *hmmfile; - * struct plan7_s *hmm; - * char env[] = "HMMERDB"; (a la BLASTDB) - * - * hmmfp = HMMFileOpen(hmmfile, env) NULL on failure - * while (HMMFileRead(hmmfp, &hmm)) 0 if no more HMMs - * if (hmm == NULL) Die(); NULL on file parse failure - * whatever; - * FreeHMM(hmm); - * } - * HMMFileClose(hmmfp); - * - ***************************************************************** - * - * The HMM output API: - * - * FILE *ofp; - * struct plan7_s *hmm; - * - * WriteAscHMM(ofp, hmm); to write/append an HMM to open file - * or WriteBinHMM(ofp, hmm); to write/append binary format HMM to open file - * - ***************************************************************** - * - * V1.0: original implementation - * V1.1: regularizers removed from model structure - * V1.7: ref and cs annotation lines added from alignment, one - * char per match state 1..M - * V1.9: null model and name added to HMM structure. ASCII format changed - * to compact tabular one. - * V2.0: Plan7. Essentially complete rewrite. - */ - -#include -#include -#include -#include -#include -#include /* to get SEEK_CUR definition on silly Suns */ - -#include "squid.h" -#include "config.h" -#include "structs.h" -#include "funcs.h" -#include "version.h" -#include "ssi.h" - -/* Magic numbers identifying binary formats. - * Do not change the old magics! Necessary for backwards compatibility. - */ -static unsigned int v10magic = 0xe8ededb1; /* v1.0 binary: "hmm1" + 0x80808080 */ -static unsigned int v10swap = 0xb1edede8; /* byteswapped v1.0 */ -static unsigned int v11magic = 0xe8ededb2; /* v1.1 binary: "hmm2" + 0x80808080 */ -static unsigned int v11swap = 0xb2edede8; /* byteswapped v1.1 */ -static unsigned int v17magic = 0xe8ededb3; /* v1.7 binary: "hmm3" + 0x80808080 */ -static unsigned int v17swap = 0xb3edede8; /* byteswapped v1.7 */ -static unsigned int v19magic = 0xe8ededb4; /* V1.9 binary: "hmm4" + 0x80808080 */ -static unsigned int v19swap = 0xb4edede8; /* V1.9 binary, byteswapped */ -static unsigned int v20magic = 0xe8ededb5; /* V2.0 binary: "hmm5" + 0x80808080 */ -static unsigned int v20swap = 0xb5edede8; /* V2.0 binary, byteswapped */ - -/* Old HMMER 1.x file formats. - */ -#define HMMER1_0B 1 /* binary HMMER 1.0 */ -#define HMMER1_0F 2 /* flat ascii HMMER 1.0 */ -#define HMMER1_1B 3 /* binary HMMER 1.1 */ -#define HMMER1_1F 4 /* flat ascii HMMER 1.1 */ -#define HMMER1_7B 5 /* binary HMMER 1.7 */ -#define HMMER1_7F 6 /* flat ascii HMMER 1.7 */ -#define HMMER1_9B 7 /* HMMER 1.9 binary */ -#define HMMER1_9F 8 /* HMMER 1.9 flat ascii */ - -static int read_asc20hmm(HMMFILE *hmmfp, struct plan7_s **ret_hmm); -static int read_bin20hmm(HMMFILE *hmmfp, struct plan7_s **ret_hmm); -static int read_asc19hmm(HMMFILE *hmmfp, struct plan7_s **ret_hmm); -static int read_bin19hmm(HMMFILE *hmmfp, struct plan7_s **ret_hmm); -static int read_asc17hmm(HMMFILE *hmmfp, struct plan7_s **ret_hmm); -static int read_bin17hmm(HMMFILE *hmmfp, struct plan7_s **ret_hmm); -static int read_asc11hmm(HMMFILE *hmmfp, struct plan7_s **ret_hmm); -static int read_bin11hmm(HMMFILE *hmmfp, struct plan7_s **ret_hmm); -static int read_asc10hmm(HMMFILE *hmmfp, struct plan7_s **ret_hmm); -static int read_bin10hmm(HMMFILE *hmmfp, struct plan7_s **ret_hmm); - -static void byteswap(char *swap, int nbytes); -static char *prob2ascii(float p, float null); -static float ascii2prob(char *s, float null); -static void write_bin_string(FILE *fp, char *s); -static int read_bin_string(FILE *fp, int doswap, char **ret_s); -static void multiline(FILE *fp, char *pfx, char *s); - -static struct plan9_s *read_plan9_binhmm(FILE *fp, int version, int swapped); -static struct plan9_s *read_plan9_aschmm(FILE *fp, int version); - -/***************************************************************** - * HMM input API functions: - * HMMFileOpen() - * HMMFileRead() - * HMMFileClose() - * HMMFileRewind() - *****************************************************************/ - -/* Function: HMMFileOpen() - * - * Purpose: Open an HMM file for reading. The file may be either - * an index for a library of HMMs, or an HMM. - * - * Args: hmmfile - name of file - * env - NULL, or environment variable for HMM database. - * - * Return: Valid HMMFILE *, or NULL on failure. - */ -HMMFILE * -HMMFileOpen(char *hmmfile, char *env) -{ - HMMFILE *hmmfp; - unsigned int magic; - char buf[512]; - char *ssifile; - char *dir; /* dir name in which HMM file was found */ - int status; - - hmmfp = (HMMFILE *) MallocOrDie (sizeof(HMMFILE)); - hmmfp->f = NULL; - hmmfp->parser = NULL; - hmmfp->is_binary = FALSE; - hmmfp->byteswap = FALSE; - hmmfp->is_seekable= TRUE; /* always; right now, an HMM must always be in a file. */ - - /* Open the file. Look in current directory. - * If that doesn't work, check environment var for - * a second possible directory (usually the location - * of a system-wide HMM library). - * Using dir name if necessary, construct correct SSI file name. - */ - hmmfp->f = NULL; - hmmfp->ssi = NULL; - if ((hmmfp->f = fopen(hmmfile, "r")) != NULL) - { - ssifile = MallocOrDie(sizeof(char) * (strlen(hmmfile) + 5)); - sprintf(ssifile, "%s.ssi", hmmfile); - - if ((hmmfp->mode = SSIRecommendMode(hmmfile)) == -1) - Die("SSIRecommendMode() failed"); - } - else if ((hmmfp->f = EnvFileOpen(hmmfile, env, &dir)) != NULL) - { - char *full; - full = FileConcat(dir, hmmfile); - - ssifile = MallocOrDie(sizeof(char) * (strlen(full) + strlen(hmmfile) + 5)); - sprintf(ssifile, "%s.ssi", full); - - if ((hmmfp->mode = SSIRecommendMode(full)) == -1) - Die("SSIRecommendMode() failed"); - - free(full); - free(dir); - } - else return NULL; - - /* Open the SSI index file. If it doesn't exist, or it's corrupt, or - * some error happens, hmmfp->ssi stays NULL. - */ - SQD_DPRINTF1(("Opening ssifile %s...\n", ssifile)); - SSIOpen(ssifile, &(hmmfp->ssi)); - free(ssifile); - - /* Initialize the disk offset stuff. - */ - status = SSIGetFilePosition(hmmfp->f, hmmfp->mode, &(hmmfp->offset)); - if (status != 0) Die("SSIGetFilePosition() failed"); - - /* Check for binary or byteswapped binary format - * by peeking at first 4 bytes. - */ - if (! fread((char *) &magic, sizeof(unsigned int), 1, hmmfp->f)) { - HMMFileClose(hmmfp); - return NULL; - } - rewind(hmmfp->f); - - if (magic == v20magic) { - hmmfp->parser = read_bin20hmm; - hmmfp->is_binary = TRUE; - return hmmfp; - } - else if (magic == v20swap) { - SQD_DPRINTF1(("Opened a HMMER 2.0 binary file [byteswapped]\n")); - hmmfp->parser = read_bin20hmm; - hmmfp->is_binary = TRUE; - hmmfp->byteswap = TRUE; - return hmmfp; - } - else if (magic == v19magic) { - hmmfp->parser = read_bin19hmm; - hmmfp->is_binary = TRUE; - return hmmfp; - } - else if (magic == v19swap) { - hmmfp->parser = read_bin19hmm; - hmmfp->is_binary = TRUE; - hmmfp->byteswap = TRUE; - return hmmfp; - } - else if (magic == v17magic) { - hmmfp->parser = read_bin17hmm; - hmmfp->is_binary = TRUE; - return hmmfp; - } - else if (magic == v17swap) { - hmmfp->parser = read_bin17hmm; - hmmfp->is_binary = TRUE; - hmmfp->byteswap = TRUE; - return hmmfp; - } - else if (magic == v11magic) { - hmmfp->parser = read_bin11hmm; - hmmfp->is_binary = TRUE; - return hmmfp; - } - else if (magic == v11swap) { - hmmfp->parser = read_bin11hmm; - hmmfp->is_binary = TRUE; - hmmfp->byteswap = TRUE; - return hmmfp; - } - else if (magic == v10magic) { - hmmfp->parser = read_bin10hmm; - hmmfp->is_binary = TRUE; - return hmmfp; - } - else if (magic == v10swap) { - hmmfp->parser = read_bin10hmm; - hmmfp->is_binary = TRUE; - hmmfp->byteswap = TRUE; - return hmmfp; - } - /* else we fall thru; it may be an ASCII file. */ - - /* If magic looks binary but we don't recognize it, choke and die. - */ - if (magic & 0x80000000) { - Warn("\ -%s appears to be a binary but format is not recognized\n\ -It may be from a HMMER version more recent than yours,\n\ -or may be a different kind of binary altogether.\n", hmmfile); - HMMFileClose(hmmfp); - return NULL; - } - - /* Check for ASCII format by peeking at first word. - */ - if (fgets(buf, 512, hmmfp->f) == NULL) { - HMMFileClose(hmmfp); - return NULL; - } - rewind(hmmfp->f); - - if (strncmp("HMMER2.0", buf, 8) == 0) { - hmmfp->parser = read_asc20hmm; - return hmmfp; - } else if (strncmp("HMMER v1.9", buf, 10) == 0) { - hmmfp->parser = read_asc19hmm; - return hmmfp; - } else if (strncmp("# HMM v1.7", buf, 10) == 0) { - hmmfp->parser = read_asc17hmm; - return hmmfp; - } else if (strncmp("# HMM v1.1", buf, 10) == 0) { - hmmfp->parser = read_asc11hmm; - return hmmfp; - } else if (strncmp("# HMM v1.0", buf, 10) == 0) { - hmmfp->parser = read_asc10hmm; - return hmmfp; - } - - /* If we haven't recognized it yet, it's bogus. - */ - HMMFileClose(hmmfp); - return NULL; -} -int -HMMFileRead(HMMFILE *hmmfp, struct plan7_s **ret_hmm) -{ - int status; - /* Set the disk position marker. */ - if (hmmfp->is_seekable) { - status = SSIGetFilePosition(hmmfp->f, hmmfp->mode, &(hmmfp->offset)); - if (status != 0) Die("SSIGetFilePosition() failed"); - } - /* Parse the HMM and return it. */ - return (*hmmfp->parser)(hmmfp, ret_hmm); -} -void -HMMFileClose(HMMFILE *hmmfp) -{ - if (hmmfp->f != NULL) fclose(hmmfp->f); - if (hmmfp->ssi != NULL) SSIClose(hmmfp->ssi); - free(hmmfp); -} -void -HMMFileRewind(HMMFILE *hmmfp) -{ - rewind(hmmfp->f); -} -int -HMMFilePositionByName(HMMFILE *hmmfp, char *name) -{ - SSIOFFSET offset; /* offset in hmmfile, from SSI */ - int fh; /* ignored. */ - - if (hmmfp->ssi == NULL) return 0; - if (SSIGetOffsetByName(hmmfp->ssi, name, &fh, &offset) != 0) return 0; - if (SSISetFilePosition(hmmfp->f, &offset) != 0) return 0; - return 1; -} -int -HMMFilePositionByIndex(HMMFILE *hmmfp, int idx) -{ /* idx runs from 0..nhmm-1 */ - int fh; /* file handle is ignored; only one HMM file */ - SSIOFFSET offset; /* file position of HMM */ - - if (hmmfp->ssi == NULL) return 0; - if (SSIGetOffsetByNumber(hmmfp->ssi, idx, &fh, &offset) != 0) return 0; - if (SSISetFilePosition(hmmfp->f, &offset) != 0) return 0; - return 1; -} - -/***************************************************************** - * HMM output API: - * WriteAscHMM() - * WriteBinHMM() - * - *****************************************************************/ - -/* Function: WriteAscHMM() - * - * Purpose: Save an HMM in flat text ASCII format. - * - * Args: fp - open file for writing - * hmm - HMM to save - */ -void -WriteAscHMM(FILE *fp, struct plan7_s *hmm) -{ - int k; /* counter for nodes */ - int x; /* counter for symbols */ - int ts; /* counter for state transitions */ - - fprintf(fp, "HMMER2.0 [%s]\n", RELEASE); /* magic header */ - - /* write header information - */ - fprintf(fp, "NAME %s\n", hmm->name); - if (hmm->flags & PLAN7_ACC) - fprintf(fp, "ACC %s\n", hmm->acc); - if (hmm->flags & PLAN7_DESC) - fprintf(fp, "DESC %s\n", hmm->desc); - fprintf(fp, "LENG %d\n", hmm->M); - fprintf(fp, "ALPH %s\n", - (Alphabet_type == hmmAMINO) ? "Amino":"Nucleic"); - fprintf(fp, "RF %s\n", (hmm->flags & PLAN7_RF) ? "yes" : "no"); - fprintf(fp, "CS %s\n", (hmm->flags & PLAN7_CS) ? "yes" : "no"); - fprintf(fp, "MAP %s\n", (hmm->flags & PLAN7_MAP) ? "yes" : "no"); - multiline(fp, "COM ", hmm->comlog); - fprintf(fp, "NSEQ %d\n", hmm->nseq); - fprintf(fp, "DATE %s\n", hmm->ctime); - fprintf(fp, "CKSUM %d\n", hmm->checksum); - if (hmm->flags & PLAN7_GA) - fprintf(fp, "GA %.1f %.1f\n", hmm->ga1, hmm->ga2); - if (hmm->flags & PLAN7_TC) - fprintf(fp, "TC %.1f %.1f\n", hmm->tc1, hmm->tc2); - if (hmm->flags & PLAN7_NC) - fprintf(fp, "NC %.1f %.1f\n", hmm->nc1, hmm->nc2); - - /* Specials - */ - fputs("XT ", fp); - for (k = 0; k < 4; k++) - for (x = 0; x < 2; x++) - fprintf(fp, "%6s ", prob2ascii(hmm->xt[k][x], 1.0)); - fputs("\n", fp); - - /* Save the null model first, so HMM readers can decode - * log odds scores on the fly. Save as log odds probabilities - * relative to 1/Alphabet_size (flat distribution) - */ - fprintf(fp, "NULT "); - fprintf(fp, "%6s ", prob2ascii(hmm->p1, 1.0)); /* p1 */ - fprintf(fp, "%6s\n", prob2ascii(1.0-hmm->p1, 1.0)); /* p2 */ - fputs("NULE ", fp); - for (x = 0; x < Alphabet_size; x++) - fprintf(fp, "%6s ", prob2ascii(hmm->null[x], 1/(float)(Alphabet_size))); - fputs("\n", fp); - - /* EVD statistics - */ - if (hmm->flags & PLAN7_STATS) - fprintf(fp, "EVD %10f %10f\n", hmm->mu, hmm->lambda); - - /* Print header - */ - fprintf(fp, "HMM "); - for (x = 0; x < Alphabet_size; x++) fprintf(fp, " %c ", Alphabet[x]); - fprintf(fp, "\n"); - fprintf(fp, " %6s %6s %6s %6s %6s %6s %6s %6s %6s\n", - "m->m", "m->i", "m->d", "i->m", "i->i", "d->m", "d->d", "b->m", "m->e"); - - /* Print HMM parameters (main section of the save file) - */ - fprintf(fp, " %6s %6s ", prob2ascii(1-hmm->tbd1, 1.0), "*"); - fprintf(fp, "%6s\n", prob2ascii(hmm->tbd1, 1.0)); - for (k = 1; k <= hmm->M; k++) - { - /* Line 1: k, match emissions, map */ - fprintf(fp, " %5d ", k); - for (x = 0; x < Alphabet_size; x++) - fprintf(fp, "%6s ", prob2ascii(hmm->mat[k][x], hmm->null[x])); - if (hmm->flags & PLAN7_MAP) fprintf(fp, "%5d", hmm->map[k]); - fputs("\n", fp); - /* Line 2: RF and insert emissions */ - fprintf(fp, " %5c ", hmm->flags & PLAN7_RF ? hmm->rf[k] : '-'); - for (x = 0; x < Alphabet_size; x++) - fprintf(fp, "%6s ", (k < hmm->M) ? prob2ascii(hmm->ins[k][x], hmm->null[x]) : "*"); - fputs("\n", fp); - /* Line 3: CS and transition probs */ - fprintf(fp, " %5c ", hmm->flags & PLAN7_CS ? hmm->cs[k] : '-'); - for (ts = 0; ts < 7; ts++) - fprintf(fp, "%6s ", (k < hmm->M) ? prob2ascii(hmm->t[k][ts], 1.0) : "*"); - fprintf(fp, "%6s ", prob2ascii(hmm->begin[k], 1.0)); - fprintf(fp, "%6s ", prob2ascii(hmm->end[k], 1.0)); - - fputs("\n", fp); - } - fputs("//\n", fp); -} - -/* Function: WriteBinHMM() - * - * Purpose: Write an HMM in binary format. - */ -void -WriteBinHMM(FILE *fp, struct plan7_s *hmm) -{ - int k; - - /* ye olde magic number */ - fwrite((char *) &(v20magic), sizeof(unsigned int), 1, fp); - - /* header section - */ - fwrite((char *) &(hmm->flags), sizeof(int), 1, fp); - write_bin_string(fp, hmm->name); - if (hmm->flags & PLAN7_ACC) write_bin_string(fp, hmm->acc); - if (hmm->flags & PLAN7_DESC) write_bin_string(fp, hmm->desc); - fwrite((char *) &(hmm->M), sizeof(int), 1, fp); - fwrite((char *) &(Alphabet_type), sizeof(int), 1, fp); - if (hmm->flags & PLAN7_RF) fwrite((char *) hmm->rf, sizeof(char), hmm->M+1, fp); - if (hmm->flags & PLAN7_CS) fwrite((char *) hmm->cs, sizeof(char), hmm->M+1, fp); - if (hmm->flags & PLAN7_MAP) fwrite((char *) hmm->map, sizeof(int), hmm->M+1, fp); - write_bin_string(fp, hmm->comlog); - fwrite((char *) &(hmm->nseq), sizeof(int), 1, fp); - write_bin_string(fp, hmm->ctime); - fwrite((char *) &(hmm->checksum), sizeof(int), 1, fp); - if (hmm->flags & PLAN7_GA) { - fwrite((char *) &(hmm->ga1), sizeof(float), 1, fp); - fwrite((char *) &(hmm->ga2), sizeof(float), 1, fp); - } - if (hmm->flags & PLAN7_TC) { - fwrite((char *) &(hmm->tc1), sizeof(float), 1, fp); - fwrite((char *) &(hmm->tc2), sizeof(float), 1, fp); - } - if (hmm->flags & PLAN7_NC) { - fwrite((char *) &(hmm->nc1), sizeof(float), 1, fp); - fwrite((char *) &(hmm->nc2), sizeof(float), 1, fp); - } - - /* Specials */ - for (k = 0; k < 4; k++) - fwrite((char *) hmm->xt[k], sizeof(float), 2, fp); - - /* Null model */ - fwrite((char *)&(hmm->p1), sizeof(float), 1, fp); - fwrite((char *) hmm->null, sizeof(float), Alphabet_size, fp); - - /* EVD stats */ - if (hmm->flags & PLAN7_STATS) { - fwrite((char *) &(hmm->mu), sizeof(float), 1, fp); - fwrite((char *) &(hmm->lambda), sizeof(float), 1, fp); - } - - /* entry/exit probabilities - */ - fwrite((char *)&(hmm->tbd1),sizeof(float), 1, fp); - fwrite((char *) hmm->begin, sizeof(float), hmm->M+1, fp); - fwrite((char *) hmm->end, sizeof(float), hmm->M+1, fp); - - /* main model - */ - for (k = 1; k <= hmm->M; k++) - fwrite((char *) hmm->mat[k], sizeof(float), Alphabet_size, fp); - for (k = 1; k < hmm->M; k++) - fwrite((char *) hmm->ins[k], sizeof(float), Alphabet_size, fp); - for (k = 1; k < hmm->M; k++) - fwrite((char *) hmm->t[k], sizeof(float), 7, fp); -} - - -/***************************************************************** - * - * Internal: HMM file parsers for various releases of HMMER. - * - * read_{asc,bin}xxhmm(HMMFILE *hmmfp, struct plan7_s **ret_hmm) - * - * Upon return, *ret_hmm is an allocated Plan7 HMM. - * Return 0 if no more HMMs in the file (normal). - * Return 1 and *ret_hmm = something if we got an HMM (normal) - * Return 1 if an error occurs (meaning "I tried to - * read something...") and *ret_hmm == NULL (meaning - * "...but it wasn't an HMM"). I know, this is a funny - * way to handle errors. - * - *****************************************************************/ - -static int -read_asc20hmm(HMMFILE *hmmfp, struct plan7_s **ret_hmm) -{ - struct plan7_s *hmm; - char buffer[512]; - char *s; - int M; - float p; - int k, x; - int atype; /* alphabet type, hmmAMINO or hmmNUCLEIC */ - - hmm = NULL; - if (feof(hmmfp->f) || fgets(buffer, 512, hmmfp->f) == NULL) return 0; - if (strncmp(buffer, "HMMER2.0", 8) != 0) goto FAILURE; - - /* Get the header information: tag/value pairs in any order, - * ignore unknown tags, stop when "HMM" is reached (signaling - * start of main model) - */ - hmm = AllocPlan7Shell(); - M = -1; - while (fgets(buffer, 512, hmmfp->f) != NULL) { - if (strncmp(buffer, "NAME ", 5) == 0) Plan7SetName(hmm, buffer+6); - else if (strncmp(buffer, "ACC ", 5) == 0) Plan7SetAccession(hmm, buffer+6); - else if (strncmp(buffer, "DESC ", 5) == 0) Plan7SetDescription(hmm, buffer+6); - else if (strncmp(buffer, "LENG ", 5) == 0) M = atoi(buffer+6); - else if (strncmp(buffer, "NSEQ ", 5) == 0) hmm->nseq = atoi(buffer+6); - else if (strncmp(buffer, "ALPH ", 5) == 0) - { /* Alphabet type */ - s2upper(buffer+6); - if (strncmp(buffer+6, "AMINO", 5) == 0) atype = hmmAMINO; - else if (strncmp(buffer+6, "NUCLEIC", 7) == 0) atype = hmmNUCLEIC; - else goto FAILURE; - - if (Alphabet_type == hmmNOTSETYET) SetAlphabet(atype); - else if (atype != Alphabet_type) - Die("Alphabet mismatch error.\nI thought we were working with %s, but tried to read a %s HMM.\n", AlphabetType2String(Alphabet_type), AlphabetType2String(atype)); - } - else if (strncmp(buffer, "RF ", 5) == 0) - { /* Reference annotation present? */ - if (sre_toupper(*(buffer+6)) == 'Y') hmm->flags |= PLAN7_RF; - } - else if (strncmp(buffer, "CS ", 5) == 0) - { /* Consensus annotation present? */ - if (sre_toupper(*(buffer+6)) == 'Y') hmm->flags |= PLAN7_CS; - } - else if (strncmp(buffer, "MAP ", 5) == 0) - { /* Map annotation present? */ - if (sre_toupper(*(buffer+6)) == 'Y') hmm->flags |= PLAN7_MAP; - } - else if (strncmp(buffer, "COM ", 5) == 0) - { /* Command line log */ - StringChop(buffer+6); - if (hmm->comlog == NULL) - hmm->comlog = Strdup(buffer+6); - else - { - hmm->comlog = ReallocOrDie(hmm->comlog, sizeof(char *) * - (strlen(hmm->comlog) + 1 + strlen(buffer+6))); - strcat(hmm->comlog, "\n"); - strcat(hmm->comlog, buffer+6); - } - } - else if (strncmp(buffer, "DATE ", 5) == 0) - { /* Date file created */ - StringChop(buffer+6); - hmm->ctime= Strdup(buffer+6); - } - else if (strncmp(buffer, "GA ", 5) == 0) - { - if ((s = strtok(buffer+6, " \t\n")) == NULL) goto FAILURE; - hmm->ga1 = atof(s); - if ((s = strtok(NULL, " \t\n")) == NULL) goto FAILURE; - hmm->ga2 = atof(s); - hmm->flags |= PLAN7_GA; - } - else if (strncmp(buffer, "TC ", 5) == 0) - { - if ((s = strtok(buffer+6, " \t\n")) == NULL) goto FAILURE; - hmm->tc1 = atof(s); - if ((s = strtok(NULL, " \t\n")) == NULL) goto FAILURE; - hmm->tc2 = atof(s); - hmm->flags |= PLAN7_TC; - } - else if (strncmp(buffer, "NC ", 5) == 0) - { - if ((s = strtok(buffer+6, " \t\n")) == NULL) goto FAILURE; - hmm->nc1 = atof(s); - if ((s = strtok(NULL, " \t\n")) == NULL) goto FAILURE; - hmm->nc2 = atof(s); - hmm->flags |= PLAN7_NC; - } - else if (strncmp(buffer, "XT ", 5) == 0) - { /* Special transition section */ - if ((s = strtok(buffer+6, " \t\n")) == NULL) goto FAILURE; - for (k = 0; k < 4; k++) - for (x = 0; x < 2; x++) - { - if (s == NULL) goto FAILURE; - hmm->xt[k][x] = ascii2prob(s, 1.0); - s = strtok(NULL, " \t\n"); - } - } - else if (strncmp(buffer, "NULT ", 5) == 0) - { /* Null model transitions */ - if ((s = strtok(buffer+6, " \t\n")) == NULL) goto FAILURE; - hmm->p1 = ascii2prob(s, 1.); - if ((s = strtok(NULL, " \t\n")) == NULL) goto FAILURE; - hmm->p1 = hmm->p1 / (hmm->p1 + ascii2prob(s, 1.0)); - } - else if (strncmp(buffer, "NULE ", 5) == 0) - { /* Null model emissions */ - if (Alphabet_type == hmmNOTSETYET) - Die("ALPH must precede NULE in HMM save files"); - s = strtok(buffer+6, " \t\n"); - for (x = 0; x < Alphabet_size; x++) { - if (s == NULL) goto FAILURE; - hmm->null[x] = ascii2prob(s, 1./(float)Alphabet_size); - s = strtok(NULL, " \t\n"); - } - } - else if (strncmp(buffer, "EVD ", 5) == 0) - { /* EVD parameters */ - hmm->flags |= PLAN7_STATS; - if ((s = strtok(buffer+6, " \t\n")) == NULL) goto FAILURE; - hmm->mu = atof(s); - if ((s = strtok(NULL, " \t\n")) == NULL) goto FAILURE; - hmm->lambda = atof(s); - } - else if (strncmp(buffer, "CKSUM", 5) == 0) hmm->checksum = atoi(buffer+6); - else if (strncmp(buffer, "HMM ", 5) == 0) break; - } - - /* partial check for mandatory fields */ - if (feof(hmmfp->f)) goto FAILURE; - if (M < 1) goto FAILURE; - if (hmm->name == NULL) goto FAILURE; - if (Alphabet_type == hmmNOTSETYET) goto FAILURE; - - /* Main model section. Read as integer log odds, convert - * to probabilities - */ - AllocPlan7Body(hmm, M); - /* skip an annotation line */ - if (fgets(buffer, 512, hmmfp->f) == NULL) goto FAILURE; - /* parse tbd1 line */ - if (fgets(buffer, 512, hmmfp->f) == NULL) goto FAILURE; - if ((s = strtok(buffer, " \t\n")) == NULL) goto FAILURE; - p = ascii2prob(s, 1.0); - if ((s = strtok(NULL, " \t\n")) == NULL) goto FAILURE; - if ((s = strtok(NULL, " \t\n")) == NULL) goto FAILURE; - hmm->tbd1 = ascii2prob(s, 1.0); - hmm->tbd1 = hmm->tbd1 / (p + hmm->tbd1); - - /* main model */ - for (k = 1; k <= hmm->M; k++) { - /* Line 1: k, match emissions, map */ - if (fgets(buffer, 512, hmmfp->f) == NULL) goto FAILURE; - if ((s = strtok(buffer, " \t\n")) == NULL) goto FAILURE; - if (atoi(s) != k) goto FAILURE; - for (x = 0; x < Alphabet_size; x++) { - if ((s = strtok(NULL, " \t\n")) == NULL) goto FAILURE; - hmm->mat[k][x] = ascii2prob(s, hmm->null[x]); - } - if (hmm->flags & PLAN7_MAP) { - if ((s = strtok(NULL, " \t\n")) == NULL) goto FAILURE; - hmm->map[k] = atoi(s); - } - /* Line 2: RF and insert emissions */ - if (fgets(buffer, 512, hmmfp->f) == NULL) goto FAILURE; - if ((s = strtok(buffer, " \t\n")) == NULL) goto FAILURE; - if (hmm->flags & PLAN7_RF) hmm->rf[k] = *s; - if (k < hmm->M) { - for (x = 0; x < Alphabet_size; x++) { - if ((s = strtok(NULL, " \t\n")) == NULL) goto FAILURE; - hmm->ins[k][x] = ascii2prob(s, hmm->null[x]); - } - } - /* Line 3: CS and transitions */ - if (fgets(buffer, 512, hmmfp->f) == NULL) goto FAILURE; - if ((s = strtok(buffer, " \t\n")) == NULL) goto FAILURE; - if (hmm->flags & PLAN7_CS) hmm->cs[k] = *s; - for (x = 0; x < 7; x++) { - if ((s = strtok(NULL, " \t\n")) == NULL) goto FAILURE; - if (k < hmm->M) hmm->t[k][x] = ascii2prob(s, 1.0); - } - if ((s = strtok(NULL, " \t\n")) == NULL) goto FAILURE; - hmm->begin[k] = ascii2prob(s, 1.0); - if ((s = strtok(NULL, " \t\n")) == NULL) goto FAILURE; - hmm->end[k] = ascii2prob(s, 1.0); - - } /* end loop over main model */ - - /* Advance to record separator - */ - while (fgets(buffer, 512, hmmfp->f) != NULL) - if (strncmp(buffer, "//", 2) == 0) break; - - Plan7Renormalize(hmm); /* Paracel reported bug 6/11/99 */ - - /* Set flags and return - */ - hmm->flags |= PLAN7_HASPROB; /* probabilities are valid */ - hmm->flags &= ~PLAN7_HASBITS; /* scores are not valid */ - - *ret_hmm = hmm; - return 1; - -FAILURE: - if (hmm != NULL) FreePlan7(hmm); - *ret_hmm = NULL; - return 1; -} - - -static int -read_bin20hmm(HMMFILE *hmmfp, struct plan7_s **ret_hmm) -{ - struct plan7_s *hmm; - int k,x; - int type; - unsigned int magic; - - hmm = NULL; - - /* Header section - */ - if (feof(hmmfp->f)) return 0; - if (! fread((char *) &magic, sizeof(unsigned int), 1, hmmfp->f)) return 0; - - if (hmmfp->byteswap) byteswap((char *)&magic, sizeof(unsigned int)); - if (magic != v20magic) goto FAILURE; - /* allocate HMM shell for header info */ - hmm = AllocPlan7Shell(); - /* flags */ - if (! fread((char *) &(hmm->flags), sizeof(int), 1, hmmfp->f)) goto FAILURE; - if (hmmfp->byteswap) byteswap((char *)&(hmm->flags), sizeof(int)); - /* name */ - if (! read_bin_string(hmmfp->f, hmmfp->byteswap, &(hmm->name))) goto FAILURE; - - /* optional accession */ - if ((hmm->flags & PLAN7_ACC) && - ! read_bin_string(hmmfp->f, hmmfp->byteswap, &(hmm->acc))) goto FAILURE; - /* optional description */ - if ((hmm->flags & PLAN7_DESC) && - ! read_bin_string(hmmfp->f, hmmfp->byteswap, &(hmm->desc))) goto FAILURE; - /* length of model */ - if (! fread((char *) &hmm->M, sizeof(int), 1, hmmfp->f)) goto FAILURE; - if (hmmfp->byteswap) byteswap((char *)&(hmm->M), sizeof(int)); - /* alphabet type */ - if (! fread((char *) &type, sizeof(int), 1, hmmfp->f)) goto FAILURE; - if (hmmfp->byteswap) byteswap((char *)&type, sizeof(int)); - if (Alphabet_type == hmmNOTSETYET) SetAlphabet(type); - else if (type != Alphabet_type) - Die("Alphabet mismatch error.\nI thought we were working with %s, but tried to read a %s HMM.\n", AlphabetType2String(Alphabet_type), AlphabetType2String(type)); - - /* now allocate for rest of model */ - AllocPlan7Body(hmm, hmm->M); - - /* optional #=RF alignment annotation */ - if ((hmm->flags & PLAN7_RF) && - !fread((char *) hmm->rf, sizeof(char), hmm->M+1, hmmfp->f)) goto FAILURE; - hmm->rf[hmm->M+1] = '\0'; - /* optional #=CS alignment annotation */ - if ((hmm->flags & PLAN7_CS) && - !fread((char *) hmm->cs, sizeof(char), hmm->M+1, hmmfp->f)) goto FAILURE; - hmm->cs[hmm->M+1] = '\0'; - /* optional alignment map annotation */ - if ((hmm->flags & PLAN7_MAP) && - !fread((char *) hmm->map, sizeof(int), hmm->M+1, hmmfp->f)) goto FAILURE; - if (hmmfp->byteswap) - for (k = 1; k <= hmm->M; k++) - byteswap((char*)&(hmm->map[k]), sizeof(int)); - /* command line log */ - if (!read_bin_string(hmmfp->f, hmmfp->byteswap, &(hmm->comlog))) goto FAILURE; - /* nseq */ - if (!fread((char *) &(hmm->nseq),sizeof(int), 1, hmmfp->f)) goto FAILURE; - if (hmmfp->byteswap) byteswap((char *)&(hmm->nseq), sizeof(int)); - /* creation time */ - if (!read_bin_string(hmmfp->f, hmmfp->byteswap, &(hmm->ctime))) goto FAILURE; - /* checksum */ - if (!fread((char *) &(hmm->checksum),sizeof(int), 1, hmmfp->f)) goto FAILURE; - if (hmmfp->byteswap) byteswap((char *)&(hmm->checksum), sizeof(int)); - - /* Pfam gathering thresholds */ - if (hmm->flags & PLAN7_GA) { - if (! fread((char *) &(hmm->ga1), sizeof(float), 1, hmmfp->f)) goto FAILURE; - if (! fread((char *) &(hmm->ga2), sizeof(float), 1, hmmfp->f)) goto FAILURE; - if (hmmfp->byteswap) { - byteswap((char *) &(hmm->ga1), sizeof(float)); - byteswap((char *) &(hmm->ga2), sizeof(float)); - } - } - /* Pfam trusted cutoffs */ - if (hmm->flags & PLAN7_TC) { - if (! fread((char *) &(hmm->tc1), sizeof(float), 1, hmmfp->f)) goto FAILURE; - if (! fread((char *) &(hmm->tc2), sizeof(float), 1, hmmfp->f)) goto FAILURE; - if (hmmfp->byteswap) { - byteswap((char *) &(hmm->tc1), sizeof(float)); - byteswap((char *) &(hmm->tc2), sizeof(float)); - } - } - /* Pfam noise cutoffs */ - if (hmm->flags & PLAN7_NC) { - if (! fread((char *) &(hmm->nc1), sizeof(float), 1, hmmfp->f)) goto FAILURE; - if (! fread((char *) &(hmm->nc2), sizeof(float), 1, hmmfp->f)) goto FAILURE; - if (hmmfp->byteswap) { - byteswap((char *) &(hmm->nc1), sizeof(float)); - byteswap((char *) &(hmm->nc2), sizeof(float)); - } - } - - /* specials */ - for (k = 0; k < 4; k++) - { - if (! fread((char *) hmm->xt[k], sizeof(float), 2, hmmfp->f)) goto FAILURE; - if (hmmfp->byteswap) { - for (x = 0; x < 2; x++) - byteswap((char *)&(hmm->xt[k][x]), sizeof(float)); - } - } - - /* null model */ - if (!fread((char *) &(hmm->p1),sizeof(float), 1, hmmfp->f)) goto FAILURE; - if (!fread((char *)hmm->null,sizeof(float),Alphabet_size,hmmfp->f))goto FAILURE; - - /* EVD stats */ - if (hmm->flags & PLAN7_STATS) { - if (! fread((char *) &(hmm->mu), sizeof(float), 1, hmmfp->f))goto FAILURE; - if (! fread((char *) &(hmm->lambda), sizeof(float), 1, hmmfp->f))goto FAILURE; - - if (hmmfp->byteswap) { - byteswap((char *)&(hmm->mu), sizeof(float)); - byteswap((char *)&(hmm->lambda), sizeof(float)); - } - } - - /* entry/exit probabilities - */ - if (! fread((char *)&(hmm->tbd1), sizeof(float), 1, hmmfp->f)) goto FAILURE; - if (! fread((char *) hmm->begin, sizeof(float), hmm->M+1, hmmfp->f)) goto FAILURE; - if (! fread((char *) hmm->end, sizeof(float), hmm->M+1, hmmfp->f)) goto FAILURE; - - /* main model */ - for (k = 1; k <= hmm->M; k++) - if (! fread((char *) hmm->mat[k], sizeof(float), Alphabet_size, hmmfp->f)) goto FAILURE; - for (k = 1; k < hmm->M; k++) - if (! fread((char *) hmm->ins[k], sizeof(float), Alphabet_size, hmmfp->f)) goto FAILURE; - for (k = 1; k < hmm->M; k++) - if (! fread((char *) hmm->t[k], sizeof(float), 7, hmmfp->f)) goto FAILURE; - - /* byteswapping - */ - if (hmmfp->byteswap) { - for (x = 0; x < Alphabet_size; x++) - byteswap((char *) &(hmm->null[x]), sizeof(float)); - byteswap((char *)&(hmm->p1), sizeof(float)); - byteswap((char *)&(hmm->tbd1), sizeof(float)); - - for (k = 1; k <= hmm->M; k++) - { - for (x = 0; x < Alphabet_size; x++) - byteswap((char *)&(hmm->mat[k][x]), sizeof(float)); - if (k < hmm->M) - for (x = 0; x < Alphabet_size; x++) - byteswap((char *)&(hmm->ins[k][x]), sizeof(float)); - byteswap((char *)&(hmm->begin[k]), sizeof(float)); - byteswap((char *)&(hmm->end[k]), sizeof(float)); - if (k < hmm->M) - for (x = 0; x < 7; x++) - byteswap((char *)&(hmm->t[k][x]), sizeof(float)); - } - } - - - /* set flags and return - */ - hmm->flags |= PLAN7_HASPROB; /* probabilities are valid */ - hmm->flags &= ~PLAN7_HASBITS; /* scores are not yet valid */ - *ret_hmm = hmm; - return 1; - -FAILURE: - if (hmm != NULL) FreePlan7(hmm); - *ret_hmm = NULL; - return 1; -} - - - - - -/* Function: read_asc19hmm() - * Date: Tue Apr 7 17:11:29 1998 [StL] - * - * Purpose: Read ASCII-format tabular (1.9 and later) save files. - * - * HMMER 1.9 was only used internally at WashU, as far as - * I know, so this code shouldn't be terribly important - * to anyone. - */ -static int -read_asc19hmm(HMMFILE *hmmfp, struct plan7_s **ret_hmm) -{ - struct plan7_s *hmm; - FILE *fp; - char buffer[512]; - char *s; - int M; /* length of model */ - int k; /* state number */ - int x; /* symbol number */ - int atype; /* Alphabet type */ - - hmm = NULL; - fp = hmmfp->f; - if (feof(fp) || fgets(buffer, 512, fp) == NULL) return 0; - if (strncmp(buffer, "HMMER v1.9", 10) != 0) goto FAILURE; - - hmm = AllocPlan7Shell(); - /* read M from first line */ - if ((s = Getword(fp, sqdARG_INT)) == NULL) goto FAILURE; M = atoi(s); /* model length */ - if ((s = Getword(fp, sqdARG_INT)) == NULL) goto FAILURE; /* ignore alphabet size */ - if ((s = Getword(fp, sqdARG_STRING)) == NULL) goto FAILURE; Plan7SetName(hmm, s); /* name */ - if ((s = Getword(fp, sqdARG_STRING)) == NULL) goto FAILURE; /* alphabet type */ - s2upper(s); - if (strcmp(s, "AMINO") == 0) atype = hmmAMINO; - else if (strcmp(s, "NUCLEIC") == 0) atype = hmmNUCLEIC; - else goto FAILURE; - - if (Alphabet_type == hmmNOTSETYET) SetAlphabet(atype); - else if (atype != Alphabet_type) - Die("Alphabet mismatch error.\nI thought we were working with %s, but tried to read a %s HMM.\n", AlphabetType2String(Alphabet_type), AlphabetType2String(atype)); - - /* read alphabet, make sure it's Plan7-compatible... */ - if ((s = Getword(fp, sqdARG_STRING)) == NULL) goto FAILURE; - if (strncmp(s, Alphabet, Alphabet_size) != 0) goto FAILURE; - - /* whether we have ref, cs info */ - if ((s = Getword(fp, sqdARG_STRING)) == NULL) goto FAILURE; - if (strcmp(s, "yes") == 0) hmm->flags |= PLAN7_RF; - if ((s = Getword(fp, sqdARG_STRING)) == NULL) goto FAILURE; - if (strcmp(s, "yes") == 0) hmm->flags |= PLAN7_CS; - - /* null model. 1.9 has emissions only. invent transitions. */ - if ((s = Getword(fp, sqdARG_STRING)) == NULL) goto FAILURE; - if (strcmp(s, "null") != 0) goto FAILURE; - for (x = 0; x < Alphabet_size; x++) { - if ((s = Getword(fp, sqdARG_INT)) == NULL) goto FAILURE; - hmm->null[x] = ascii2prob(s, 1.0); - } - hmm->p1 = (Alphabet_type == hmmAMINO)? 350./351. : 1000./1001.; - - /* Done with header; check some stuff before proceeding - */ - if (feof(hmmfp->f)) goto FAILURE; - if (M < 1) goto FAILURE; - if (hmm->name == NULL) goto FAILURE; - if (Alphabet_type == hmmNOTSETYET) goto FAILURE; - - /* Allocate the model. Set up the probabilities that Plan9 - * doesn't set. - */ - AllocPlan7Body(hmm, M); - ZeroPlan7(hmm); - Plan7LSConfig(hmm); - - /* The zero row has: 4 or 20 unused scores for nonexistent M0 state - * then: B->M, tbd1, a B->I that Plan7 doesn't have; - * three unused D-> transitions; then three I0 transitions that Plan7 doesn't have; - * then two unused rf, cs annotations. - */ - if ((s = Getword(fp, sqdARG_INT)) == NULL) goto FAILURE; /* position index ignored */ - for (x = 0; x < Alphabet_size; x++) - if ((s = Getword(fp, sqdARG_INT)) == NULL) goto FAILURE; /* emissions ignored */ - if ((s = Getword(fp, sqdARG_INT)) == NULL) goto FAILURE; - hmm->begin[1] = ascii2prob(s, 1.0); - if ((s = Getword(fp, sqdARG_INT)) == NULL) goto FAILURE; - hmm->tbd1 = ascii2prob(s, 1.0); - /* renormalize */ - hmm->begin[1] = hmm->begin[1] / (hmm->begin[1] + hmm->tbd1); - hmm->tbd1 = hmm->tbd1 / (hmm->begin[1] + hmm->tbd1); - /* skip rest of line, seven integer fields, two char fields */ - for (x = 0; x < 7; x++) - if ((s = Getword(fp, sqdARG_INT)) == NULL) goto FAILURE; - if ((s = Getword(fp, sqdARG_STRING)) == NULL) goto FAILURE; - if ((s = Getword(fp, sqdARG_STRING)) == NULL) goto FAILURE; - - /* main model: table of emissions, transitions, annotation */ - for (k = 1; k <= hmm->M; k++) - { - /* position index ignored */ - if ((s = Getword(fp, sqdARG_INT)) == NULL) goto FAILURE; - /* match emissions */ - for (x = 0; x < Alphabet_size; x++) { - if ((s = Getword(fp, sqdARG_INT)) == NULL) goto FAILURE; - hmm->mat[k][x] = ascii2prob(s, hmm->null[x]); - } - /* nine transitions; two are ignored */ - if ((s = Getword(fp, sqdARG_INT)) == NULL) goto FAILURE; - if (k < hmm->M) hmm->t[k][TMM] = ascii2prob(s, 1.0); - if ((s = Getword(fp, sqdARG_INT)) == NULL) goto FAILURE; - if (k < hmm->M) hmm->t[k][TMD] = (k == hmm->M) ? 0.0 : ascii2prob(s, 1.0); - if ((s = Getword(fp, sqdARG_INT)) == NULL) goto FAILURE; - if (k < hmm->M) hmm->t[k][TMI] = ascii2prob(s, 1.0); - - if ((s = Getword(fp, sqdARG_INT)) == NULL) goto FAILURE; - if (k < hmm->M) hmm->t[k][TDM] = ascii2prob(s, 1.0); - if ((s = Getword(fp, sqdARG_INT)) == NULL) goto FAILURE; - if (k < hmm->M) hmm->t[k][TDD] = (k == hmm->M) ? 0.0 : ascii2prob(s, 1.0); - if ((s = Getword(fp, sqdARG_INT)) == NULL) goto FAILURE;/* TDI ignored. */ - - /* no insert state at k == M, be careful */ - if ((s = Getword(fp, sqdARG_INT)) == NULL) goto FAILURE; - if (k < hmm->M) hmm->t[k][TIM] = ascii2prob(s, 1.0); - if ((s = Getword(fp, sqdARG_INT)) == NULL) goto FAILURE; /* TID ignored. */ - if ((s = Getword(fp, sqdARG_INT)) == NULL) goto FAILURE; - if (k < hmm->M) hmm->t[k][TII] = ascii2prob(s, 1.0); - - /* annotations */ - if ((s = Getword(fp, sqdARG_STRING)) == NULL) goto FAILURE; - if (hmm->flags & PLAN7_RF) hmm->rf[k] = *s; - if ((s = Getword(fp, sqdARG_STRING)) == NULL) goto FAILURE; - if (hmm->flags & PLAN7_CS) hmm->cs[k] = *s; - } - /* table of insert emissions; - * Plan7 has no insert state at 0 or M */ - for (k = 0; k <= hmm->M; k++) - { - if ((s = Getword(fp, sqdARG_INT)) == NULL) goto FAILURE; /* position index ignored */ - for (x = 0; x < Alphabet_size; x++) { - if ((s = Getword(fp, sqdARG_INT)) == NULL) goto FAILURE; - if (k > 0 && k < hmm->M) - hmm->ins[k][x] = ascii2prob(s, hmm->null[x]); - } - } - - /* Set flags and return - */ - hmm->flags |= PLAN7_HASPROB; /* probabilities are valid */ - hmm->flags &= ~PLAN7_HASBITS; /* scores are not valid */ - Plan7Renormalize(hmm); - hmm->comlog = Strdup("[converted from an old Plan9 HMM]"); - Plan7SetCtime(hmm); - *ret_hmm = hmm; - return 1; - -FAILURE: - if (hmm != NULL) FreePlan7(hmm); - *ret_hmm = NULL; - return 1; -} - -static int -read_bin19hmm(HMMFILE *hmmfp, struct plan7_s **ret_hmm) -{ - unsigned int magic; - struct plan7_s *hmm; /* plan7 HMM */ - struct plan9_s *p9hmm; /* old style 1.x HMM */ - - /* Read the magic number; if we don't see it, then we - * must be out of data in the file. - */ - if (feof(hmmfp->f)) return 0; - if (! fread((char *) &magic, sizeof(unsigned int), 1, hmmfp->f)) return 0; - - p9hmm = read_plan9_binhmm(hmmfp->f, HMMER1_9B, hmmfp->byteswap); - if (p9hmm == NULL) { *ret_hmm = NULL; return 1; } - - Plan9toPlan7(p9hmm, &hmm); - - hmm->comlog = Strdup("[converted from an old Plan9 HMM]"); - Plan7SetCtime(hmm); - - P9FreeHMM(p9hmm); - *ret_hmm = hmm; - return 1; -} -static int -read_asc17hmm(HMMFILE *hmmfp, struct plan7_s **ret_hmm) -{ - struct plan7_s *hmm; /* plan7 HMM */ - struct plan9_s *p9hmm; /* old style 1.x HMM */ - char buffer[512]; - - /* Read the magic header; if we don't see it, then - * we must be out of data in the file. - */ - if (feof(hmmfp->f) || fgets(buffer, 512, hmmfp->f) == NULL) return 0; - - p9hmm = read_plan9_aschmm(hmmfp->f, HMMER1_7F); - if (p9hmm == NULL) { *ret_hmm = NULL; return 1; } - - Plan9toPlan7(p9hmm, &hmm); - - hmm->comlog = Strdup("[converted from an old Plan9 HMM]"); - Plan7SetCtime(hmm); - - P9FreeHMM(p9hmm); - Plan7Renormalize(hmm); - *ret_hmm = hmm; - return 1; -} - -static int -read_bin17hmm(HMMFILE *hmmfp, struct plan7_s **ret_hmm) -{ - unsigned int magic; - struct plan7_s *hmm; /* plan7 HMM */ - struct plan9_s *p9hmm; /* old style 1.x HMM */ - - /* Read the magic number; if we don't see it, then we - * must be out of data in the file. - */ - if (feof(hmmfp->f)) return 0; - if (! fread((char *) &magic, sizeof(unsigned int), 1, hmmfp->f)) return 0; - - p9hmm = read_plan9_binhmm(hmmfp->f, HMMER1_7B, hmmfp->byteswap); - if (p9hmm == NULL) { *ret_hmm = NULL; return 1; } - - Plan9toPlan7(p9hmm, &hmm); - - hmm->comlog = Strdup("[converted from an old Plan9 HMM]"); - Plan7SetCtime(hmm); - - P9FreeHMM(p9hmm); - *ret_hmm = hmm; - return 1; -} - -static int -read_asc11hmm(HMMFILE *hmmfp, struct plan7_s **ret_hmm) -{ - Die("1.1 ASCII HMMs unsupported"); - return 1; -} -static int -read_bin11hmm(HMMFILE *hmmfp, struct plan7_s **ret_hmm) -{ - unsigned int magic; - struct plan7_s *hmm; /* plan7 HMM */ - struct plan9_s *p9hmm; /* old style 1.x HMM */ - - /* Read the magic number; if we don't see it, then we - * must be out of data in the file. - */ - if (feof(hmmfp->f)) return 0; - if (! fread((char *) &magic, sizeof(unsigned int), 1, hmmfp->f)) return 0; - - p9hmm = read_plan9_binhmm(hmmfp->f, HMMER1_1B, hmmfp->byteswap); - if (p9hmm == NULL) { *ret_hmm = NULL; return 1; } - - Plan9toPlan7(p9hmm, &hmm); - - hmm->comlog = Strdup("[converted from an old Plan9 HMM]"); - Plan7SetCtime(hmm); - - P9FreeHMM(p9hmm); - *ret_hmm = hmm; - return 1; -} - -static int -read_asc10hmm(HMMFILE *hmmfp, struct plan7_s **ret_hmm) -{ - Die("1.0 ASCII HMMs unsupported"); - return 1; -} - -static int -read_bin10hmm(HMMFILE *hmmfp, struct plan7_s **ret_hmm) -{ - unsigned int magic; - struct plan7_s *hmm; /* plan7 HMM */ - struct plan9_s *p9hmm; /* old style 1.x HMM */ - - /* Read the magic number; if we don't see it, then we - * must be out of data in the file. - */ - if (feof(hmmfp->f)) return 0; - if (! fread((char *) &magic, sizeof(unsigned int), 1, hmmfp->f)) return 0; - - p9hmm = read_plan9_binhmm(hmmfp->f, HMMER1_0B, hmmfp->byteswap); - if (p9hmm == NULL) { *ret_hmm = NULL; return 1; } - - Plan9toPlan7(p9hmm, &hmm); - - hmm->comlog = Strdup("[converted from an old Plan9 HMM]"); - Plan7SetCtime(hmm); - - P9FreeHMM(p9hmm); - *ret_hmm = hmm; - return 1; -} - -/***************************************************************** - * Some miscellaneous utility functions - *****************************************************************/ - -/* Function: prob2ascii() - * - * Purpose: Format a probability for output to an ASCII save - * file. Returns a ptr to a static internal buffer. - * - */ -static char * -prob2ascii(float p, float null) -{ - static char buffer[8]; - - if (p == 0.0) return "*"; - sprintf(buffer, "%6d", Prob2Score(p, null)); - return buffer; -} - - -/* Function: ascii2prob() - * - * Purpose: Convert a saved string back to a probability. - */ -static float -ascii2prob(char *s, float null) -{ - return (*s == '*') ? 0. : Score2Prob(atoi(s), null); -} - -/* Function: byteswap() - * - * Purpose: Swap between big-endian and little-endian. - * For example: - * int foo = 0x12345678; - * byteswap((char *) &foo, sizeof(int)); - * printf("%x\n", foo) - * gives 78563412. - * - * I don't fully understand byte-swapping issues. - * However, I have tested this on chars through floats, - * on various machines: - * SGI IRIX 4.0.5, SunOS 4.1.3, DEC Alpha OSF/1, Alliant - * - * Note: this is only a partial solution to the problem of - * binary file portability. 32 bit integers are assumed by HMMER, - * for instance. This should be true for all UNIX, VAX, and WinNT - * platforms, I believe. - * - * Date: Sun Feb 12 10:26:22 1995 - */ -static void -byteswap(char *swap, int nbytes) -{ - int x; - char byte; - - for (x = 0; x < nbytes / 2; x++) - { - byte = swap[nbytes - x - 1]; - swap[nbytes - x - 1] = swap[x]; - swap[x] = byte; - } -} - -/* Function: write_bin_string() - * Date: SRE, Wed Oct 29 13:49:27 1997 [TWA 721 over Canada] - * - * Purpose: Write a string in binary save format: an integer - * for the string length (including \0), followed by - * the string. - */ -static void -write_bin_string(FILE *fp, char *s) -{ - int len; - if (s != NULL) - { - len = strlen(s) + 1; - fwrite((char *) &len, sizeof(int), 1, fp); - fwrite((char *) s, sizeof(char), len, fp); - } - else - { - len = 0; - fwrite((char *) &len, sizeof(int), 1, fp); - } -} - -/* Function: read_bin_string() - * Date: SRE, Wed Oct 29 14:03:23 1997 [TWA 721] - * - * Purpose: Read in a string from a binary file, where - * the first integer is the length (including '\0'). - * - * Args: fp - FILE to read from - * doswap - TRUE to byteswap - * ret_s - string to read into - * - * Return: 0 on failure. ret_s is malloc'ed here. - */ -static int -read_bin_string(FILE *fp, int doswap, char **ret_s) -{ - char *s; - int len; - - if (! fread((char *) &len, sizeof(int), 1, fp)) return 0; - if (doswap) byteswap((char *)&len, sizeof(int)); - s = MallocOrDie (sizeof(char) * (len)); - if (! fread((char *) s, sizeof(char), len, fp)) - { - free(s); - return 0; - } - - *ret_s = s; - return 1; -} - -/* Function: multiline() - * Date: Mon Jan 5 14:57:50 1998 [StL] - * - * Purpose: Given a record (like the comlog) that contains - * multiple lines, print it as multiple lines with - * a given prefix. e.g.: - * - * given: "COM ", "foo\nbar\nbaz" - * print: COM foo - * COM bar - * COM baz - * - * - * Used to print the command log to ASCII save files. - * - * Args: fp: FILE to print to - * pfx: prefix for each line - * s: line to break up and print; tolerates a NULL - * - * Return: (void) - */ -static void -multiline(FILE *fp, char *pfx, char *s) -{ - char *buf; - char *sptr; - - if (s == NULL) return; - buf = Strdup(s); - sptr = strtok(buf, "\n"); - while (sptr != NULL) - { - fprintf(fp, "%s%s\n", pfx, sptr); - sptr = strtok(NULL, "\n"); - } - free(buf); -} - - -/***************************************************************** - * HMMER 1.x save file reading functions, modified from the - * corpse of 1.9m. - *****************************************************************/ - - -/* Function: read_plan9_binhmm() - * - * Read old (Plan9) binary HMM save files from HMMER 1.9 and earlier. - * V1.0 saved regularizer and sympvec info, which V1.1 ignores. - * V1.7 and later may include optional ref, cs annotation lines. - * V1.9 added name, null model. - * - * Returns pointer to the HMM on success; NULL - * on failure. Sets global alphabet information based on - * whether it reads 4 or 20 as alphabet size (don't rely - * on ancient HMMER macro definitions). - */ -static struct plan9_s * -read_plan9_binhmm(FILE *fp, int version, int swapped) -{ - struct plan9_s *hmm; - int M; /* length of model */ - int k; /* state number */ - int x; /* symbol or transition number */ - int len; /* length of variable length string */ - int asize; /* alphabet size */ - int atype; /* alphabet type (read but ignored) */ - char abet[20]; /* alphabet (read but ignored) */ - - /* read M and alphabet size */ - if (! fread((char *) &(M), sizeof(int), 1, fp)) return NULL; - if (! fread((char *) &asize, sizeof(int), 1, fp)) return NULL; - if (swapped) { - byteswap((char *) &M, sizeof(int)); - byteswap((char *) &asize, sizeof(int)); - } - - /* Set global alphabet information - */ - if (asize == 4) atype = hmmNUCLEIC; - else if (asize == 20) atype = hmmAMINO; - else Die("A nonbiological alphabet size of %d; so I can't convert plan9 to plan7", asize); - if (Alphabet_type == hmmNOTSETYET) SetAlphabet(atype); - else if (atype != Alphabet_type) - Die("Alphabet mismatch error.\nI thought we were working with %s, but tried to read a %s HMM.\n", AlphabetType2String(Alphabet_type), AlphabetType2String(atype)); - - /* now, create space for hmm */ - if ((hmm = P9AllocHMM(M)) == NULL) - Die("malloc failed for reading hmm in\n"); - - /* version 1.9+ files have a name */ - if (version == HMMER1_9B) { - if (! fread((char *) &len, sizeof(int), 1, fp)) return NULL; - if (swapped) byteswap((char *) &len, sizeof(int)); - hmm->name = (char *) ReallocOrDie (hmm->name, sizeof(char) * (len+1)); - if (! fread((char *) hmm->name, sizeof(char), len, fp)) return NULL; - hmm->name[len] = '\0'; - } - - /* read alphabet_type and alphabet, but ignore: we've already set them */ - if (! fread((char *) &atype, sizeof(int), 1, fp)) return NULL; - if (! fread((char *) abet, sizeof(char), Alphabet_size, fp)) return NULL; - - /* skip the random symbol frequencies in V1.0 */ - if (version == HMMER1_0B) - fseek(fp, (long) (sizeof(float) * Alphabet_size), SEEK_CUR); - - /* Get optional info in V1.7 and later - */ - if (version == HMMER1_7B || version == HMMER1_9B) - { - if (! fread((char *) &(hmm->flags), sizeof(int), 1, fp)) return NULL; - if (swapped) byteswap((char *) &hmm->flags, sizeof(int)); - if ((hmm->flags & HMM_REF) && - ! fread((char *) hmm->ref, sizeof(char), hmm->M+1, fp)) return NULL; - hmm->ref[hmm->M+1] = '\0'; - if ((hmm->flags & HMM_CS) && - ! fread((char *) hmm->cs, sizeof(char), hmm->M+1, fp)) return NULL; - hmm->cs[hmm->M+1] = '\0'; - } - - /* Get the null model in V1.9 and later - */ - if (version == HMMER1_9B) - { - if (! fread((char *) hmm->null, sizeof(float), Alphabet_size, fp)) return NULL; - if (swapped) - for (x = 0; x < Alphabet_size; x++) - byteswap((char *) &(hmm->null[x]), sizeof(float)); - } - else P9DefaultNullModel(hmm->null); - - /* everything else is states */ - for (k = 0; k <= hmm->M; k++) - { - /* get match state info */ - if (! fread((char *) &(hmm->mat[k].t[MATCH]), sizeof(float), 1, fp)) return NULL; - if (! fread((char *) &(hmm->mat[k].t[DELETE]), sizeof(float), 1, fp)) return NULL; - if (! fread((char *) &(hmm->mat[k].t[INSERT]), sizeof(float), 1, fp)) return NULL; - if (! fread((char *) hmm->mat[k].p, sizeof(float), Alphabet_size, fp)) return NULL -; - if (swapped) { - byteswap((char *) &(hmm->mat[k].t[MATCH]), sizeof(float)); - byteswap((char *) &(hmm->mat[k].t[DELETE]), sizeof(float)); - byteswap((char *) &(hmm->mat[k].t[INSERT]), sizeof(float)); - for (x = 0; x < Alphabet_size; x++) - byteswap((char *) &(hmm->mat[k].p[x]), sizeof(float)); - } - - /* skip the regularizer info in V1.0 */ - if (version == HMMER1_0B) - fseek(fp, (long)(sizeof(float) * (3 + Alphabet_size)), SEEK_CUR); - - /* get delete state info */ - if (! fread((char *) &(hmm->del[k].t[MATCH]), sizeof(float), 1, fp)) return NULL; - if (! fread((char *) &(hmm->del[k].t[DELETE]), sizeof(float), 1, fp)) return NULL; - if (! fread((char *) &(hmm->del[k].t[INSERT]), sizeof(float), 1, fp)) return NULL; - if (swapped) { - byteswap((char *) &(hmm->del[k].t[MATCH]), sizeof(float)); - byteswap((char *) &(hmm->del[k].t[DELETE]), sizeof(float)); - byteswap((char *) &(hmm->del[k].t[INSERT]), sizeof(float)); - } - - /* skip the regularizer info in V1.0 */ - if (version == HMMER1_0B) - fseek(fp, (long)(sizeof(float) * 3), SEEK_CUR); - - /* get insert state info */ - if (! fread((char *) &(hmm->ins[k].t[MATCH]), sizeof(float), 1, fp)) return NULL; - if (! fread((char *) &(hmm->ins[k].t[DELETE]), sizeof(float), 1, fp)) return NULL; - if (! fread((char *) &(hmm->ins[k].t[INSERT]), sizeof(float), 1, fp)) return NULL; - if (! fread((char *) hmm->ins[k].p, sizeof(float), Alphabet_size, fp)) return NULL -; - if (swapped) { - byteswap((char *) &(hmm->ins[k].t[MATCH]), sizeof(float)); - byteswap((char *) &(hmm->ins[k].t[DELETE]), sizeof(float)); - byteswap((char *) &(hmm->ins[k].t[INSERT]), sizeof(float)); - for (x = 0; x < Alphabet_size; x++) - byteswap((char *) &(hmm->ins[k].p[x]), sizeof(float)); - } - - /* skip the regularizer info in V1.0 */ - if (version == HMMER1_0B) - fseek(fp, (long)(sizeof(float) * (3 + Alphabet_size)), SEEK_CUR); - } - P9Renormalize(hmm); - return hmm; -} - - -/* Function: read_plan9_aschmm() - * - * Purpose: Read ASCII-format save files from 1.8.4 and earlier. - * V1.0 contained sympvec and regularizers; these are ignored - * in V1.1 and later - * V1.7 and later contain ref and cs annotation. - * - * Args: fp - open save file, header has been read already - * version - HMMER1_7F, for instance - * - * Returns ptr to the (allocated) new HMM on success, - * or NULL on failure. - */ -static struct plan9_s * -read_plan9_aschmm(FILE *fp, int version) -{ - struct plan9_s *hmm; - int M; /* length of model */ - char buffer[512]; - char *statetype; - char *s; - int k; /* state number */ - int i; /* symbol number */ - int asize; /* Alphabet size */ - int atype; /* Alphabet type */ - - /* read M from first line */ - if (fgets(buffer, 512, fp) == NULL) return NULL; - if ((s = strtok(buffer, " \t\n")) == NULL) return NULL; - if (!isdigit((int) (*s))) return NULL; - M = atoi(s); - /* read alphabet_length */ - if (fgets(buffer, 512, fp) == NULL) return NULL; - if ((s = strtok(buffer, " \t\n")) == NULL) return NULL; - if (!isdigit((int) (*s))) return NULL; - asize = atoi(s); - - /* Set global alphabet information - */ - if (asize == 4) atype = hmmNUCLEIC; - else if (asize == 20) atype = hmmAMINO; - else Die("A nonbiological alphabet size of %d; so I can't convert plan9 to plan7", asize); - if (Alphabet_type == hmmNOTSETYET) SetAlphabet(atype); - else if (atype != Alphabet_type) - Die("Alphabet mismatch error.\nI thought we were working with %s, but tried to read a %s HMM.\n", AlphabetType2String(Alphabet_type), AlphabetType2String(atype)); - - /* now, create space for hmm */ - if ((hmm = P9AllocHMM(M)) == NULL) - Die("malloc failed for reading hmm in\n"); - - /* read alphabet_type but ignore */ - if (fgets(buffer, 512, fp) == NULL) return NULL; - if ((s = strtok(buffer, " \t\n")) == NULL) return NULL; - if (!isdigit((int) (*s))) return NULL; - /* read alphabet but ignore */ - if (fgets(buffer, 512, fp) == NULL) return NULL; - if ((s = strtok(buffer, " \t\n")) == NULL) return NULL; - - /* skip the random symbol frequencies in V1.0 files. now unused */ - if (version == HMMER1_0F) - for (i = 0; i < Alphabet_size; i++) - if (fgets(buffer, 512, fp) == NULL) return NULL; - - /* V1.7 has lines for whether we have valid ref, cs info - */ - if (version == HMMER1_7F) - { - if (fgets(buffer, 512, fp) == NULL) return NULL; - if (strncmp(buffer, "yes", 3) == 0) hmm->flags |= HMM_REF; - if (fgets(buffer, 512, fp) == NULL) return NULL; - if (strncmp(buffer, "yes", 3) == 0) hmm->flags |= HMM_CS; - } - - /* everything else is states */ - while (fgets(buffer, 512, fp) != NULL) - { - /* get state type and index info */ - if ((statetype = strtok(buffer, " \t\n")) == NULL) return NULL; - if ((s = strtok((char *) NULL, " \t\n")) == NULL) return NULL; - if (!isdigit((int) (*s))) return NULL; - k = atoi(s); - if (k < 0 || k > hmm->M+1) return NULL; - - if (strcmp(statetype, "###MATCH_STATE") == 0) - { - /* V1.7: get ref, cs info: */ - /* ###MATCH_STATE 16 (x) (H) */ - if (version == HMMER1_7F) - { - s = strtok(NULL, "\n"); - while (*s != '(' && *s != '\0') s++; - if (*s != '(') return NULL; - hmm->ref[k] = *(s+1); - while (*s != '(' && *s != '\0') s++; - if (*s != '(') return NULL; - hmm->cs[k] = *(s+1); - } - - if (fgets(buffer, 512, fp) == NULL) return NULL; - if ((s = strtok(buffer, " \t\n")) == NULL) return NULL; - hmm->mat[k].t[MATCH] = (float) atof(s); - - if (fgets(buffer, 512, fp) == NULL) return NULL; - if ((s = strtok(buffer, " \t\n")) == NULL) return NULL; - hmm->mat[k].t[DELETE] = (float) atof(s); - - if (fgets(buffer, 512, fp) == NULL) return NULL; - if ((s = strtok(buffer, " \t\n")) == NULL) return NULL; - hmm->mat[k].t[INSERT] = (float) atof(s); - - for (i = 0; i < Alphabet_size; i++) - { - if (fgets(buffer, 512, fp) == NULL) return NULL; - if ((s = strtok(buffer, " \t\n")) == NULL) return NULL; - hmm->mat[k].p[i] = (float) atof(s); - } - - /* Skip all regularizer info for V1.0 */ - if (version == HMMER1_0F) - for (i = 0; i < Alphabet_size + 3; i++) - if (fgets(buffer, 512, fp) == NULL) return NULL; - - } - else if (strcmp(statetype, "###INSERT_STATE") == 0) - { - if (fgets(buffer, 512, fp) == NULL) return NULL; - if ((s = strtok(buffer, " \t\n")) == NULL) return NULL; - hmm->ins[k].t[MATCH] = (float) atof(s); - - if (fgets(buffer, 512, fp) == NULL) return NULL; - if ((s = strtok(buffer, " \t\n")) == NULL) return NULL; - hmm->ins[k].t[DELETE] = (float) atof(s); - - if (fgets(buffer, 512, fp) == NULL) return NULL; - if ((s = strtok(buffer, " \t\n")) == NULL) return NULL; - hmm->ins[k].t[INSERT] = (float) atof(s); - - for (i = 0; i < Alphabet_size; i++) - { - if (fgets(buffer, 512, fp) == NULL) return NULL; - if ((s = strtok(buffer, " \t\n")) == NULL) return NULL; - hmm->ins[k].p[i] = (float) atof(s); - } - - /* Skip all regularizer info in V1.0 files */ - if (version == HMMER1_0F) - for (i = 0; i < Alphabet_size + 3; i++) - if (fgets(buffer, 512, fp) == NULL) return NULL; - - } - else if (strcmp(statetype, "###DELETE_STATE") == 0) - { - if (fgets(buffer, 512, fp) == NULL) return NULL; - if ((s = strtok(buffer, " \t\n")) == NULL) return NULL; - hmm->del[k].t[MATCH] = (float) atof(s); - - if (fgets(buffer, 512, fp) == NULL) return NULL; - if ((s = strtok(buffer, " \t\n")) == NULL) return NULL; - hmm->del[k].t[DELETE] = (float) atof(s); - - if (fgets(buffer, 512, fp) == NULL) return NULL; - if ((s = strtok(buffer, " \t\n")) == NULL) return NULL; - hmm->del[k].t[INSERT] = (float) atof(s); - - /* Skip all regularizer info in V1.0 files*/ - if (version == HMMER1_0F) - for (i = 0; i < 3; i++) - if (fgets(buffer, 512, fp) == NULL) return NULL; - } - else - return NULL; - } - - P9DefaultNullModel(hmm->null); - P9Renormalize(hmm); - return hmm; -} diff --git a/forester/archive/RIO/others/hmmer/src/hmmpfam-pvm.c b/forester/archive/RIO/others/hmmer/src/hmmpfam-pvm.c deleted file mode 100644 index ea75d20..0000000 --- a/forester/archive/RIO/others/hmmer/src/hmmpfam-pvm.c +++ /dev/null @@ -1,229 +0,0 @@ -/************************************************************ - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - ************************************************************/ - -#ifdef HMMER_PVM - -/* hmmslave-pvm.c - * SRE, Sun Jul 12 17:15:36 1998 - * - * PVM slave for hmmpfam-pvm and hmmsearch-pvm. - * RCS $Id: hmmpfam-pvm.c,v 1.1.1.1 2005/03/22 08:34:15 cmzmasek Exp $ - */ - -#include -#include -#include - -#include "version.h" -#include "structs.h" /* data structures, macros, #define's */ -#include "config.h" /* compile-time configuration constants */ -#include "funcs.h" /* function declarations */ -#include "globals.h" /* alphabet global variables */ -#include "squid.h" /* general sequence analysis library */ - -static void leave_pvm(void); - -int -main(void) -{ - struct p7trace_s *tr; /* traceback of an alignment */ - int master_tid; /* PVM TID of our master */ - char *hmmfile; /* file to read HMM(s) from */ - HMMFILE *hmmfp; /* opened hmmfile for reading */ - struct plan7_s *hmm; - char *seq; - char *dsq; - int len; - int nhmm; /* number of HMM to work on */ - float sc; - int my_idx = -1; /* my index, 0..nslaves-1 */ - double pvalue; /* Z*pvalue = Evalue */ - double evalue; /* upper bound on evalue */ - struct threshold_s thresh; /* threshold settings */ - int send_trace; /* TRUE if score is significant */ - int do_xnu; /* TRUE to do XNU filter on seq */ - int do_forward; /* TRUE to use Forward() scores not Viterbi */ - int do_null2; /* TRUE to correct scores w/ ad hoc null2 */ - int alphatype; /* alphabet type, hmmAMINO or hmmNUCLEIC */ - int code; /* return code after initialization */ - - - SQD_DPRINTF1(("a slave reporting for duty!\n")); - - /* Register leave_pvm() cleanup function so any exit() call - * first calls pvm_exit(). - */ - if (atexit(leave_pvm) != 0) { pvm_exit(); Die("slave couldn't register leave_pvm()"); } - - /***************************************************************** - * initialization. - * Master broadcasts to us: - * 1) len of HMM file name (int) - * 2) name of HMM file (string) - * 3) length of sequence string (int) - * 4) sequence (string) - * 5) globT threshold - * 6) globE threshold - * 7) Z - * 8) autocut setting - * 9) do_xnu flag - * 10) do_forward flag - * 11) do_null2 flag - * 12) alphabet type - * We receive the broadcast and open the files. - ******************************************************************/ - - master_tid = pvm_parent(); /* who's our master? */ - SQD_DPRINTF1(("I know my master is %d\n", master_tid)); - - pvm_recv(master_tid, HMMPVM_INIT); - pvm_upkint(&len, 1, 1); - hmmfile = MallocOrDie(sizeof(char *) * (len+1)); - pvm_upkstr(hmmfile); - pvm_upkint(&len, 1, 1); - seq = MallocOrDie(sizeof(char *) * (len+1)); - pvm_upkstr(seq); - pvm_upkfloat(&(thresh.globT), 1, 1); - pvm_upkdouble(&(thresh.globE), 1, 1); - pvm_upkint(&(thresh.Z), 1, 1); - pvm_upkint((int *) &(thresh.autocut), 1, 1); - pvm_upkint(&do_xnu, 1, 1); - pvm_upkint(&do_forward, 1, 1); - pvm_upkint(&do_null2, 1, 1); - pvm_upkint(&alphatype, 1, 1); - SQD_DPRINTF1(("My master has told me how to initialize, and I am happy.\n")); - - SetAlphabet(alphatype); - /* Open HMM file (maybe in HMMERDB) */ - code = HMMPVM_OK; - if ((hmmfp = HMMFileOpen(hmmfile, "HMMERDB")) == NULL) - code = HMMPVM_NO_HMMFILE; - else if (hmmfp->ssi == NULL) - code = HMMPVM_NO_INDEX; - - /* report our status. - */ - pvm_initsend(PvmDataDefault); - pvm_pkint(&code, 1, 1); - PVMPackString(RELEASE); /* proofing against bug#1 */ - pvm_send(master_tid, HMMPVM_RESULTS); - SQD_DPRINTF1(("I have told my master my initialization status and I await his command.\n")); - - dsq = DigitizeSequence(seq, len); - if (do_xnu) XNU(dsq, len); - - /***************************************************************** - * Main loop. - * Receive an integer 0..nhmm-1 for which HMM to search against. - * If we receive a -1, we shut down. - *****************************************************************/ - - for (;;) - { - pvm_recv(master_tid, HMMPVM_WORK); - pvm_upkint(&nhmm, 1, 1); - if (my_idx < 0) my_idx = nhmm; /* first time thru, remember what index we are. */ - - if (nhmm == -1) { /* shutdown signal */ - SQD_DPRINTF1(("I've been told to shut down.")); - break; - } - - /* move to our assigned HMM in the HMM file, and read it - */ - SQD_DPRINTF1(("The master says to do HMM #%d - I hear and obey\n", nhmm)); - if (! HMMFilePositionByIndex(hmmfp, nhmm)) Die("didn't position the HMM file"); - if (! HMMFileRead(hmmfp, &hmm)) Die("unexpected end of HMM file"); - if (hmm == NULL) Die("unexpected failure to parse HMM file"); - P7Logoddsify(hmm, TRUE); - - /* set Pfam specific score thresholds if needed */ - if (! SetAutocuts(&thresh, hmm)) - Die("HMM %s doesn't have the score cutoffs you wanted", hmm->name); - - /* Score sequence, do alignment (Viterbi), recover trace - */ - if (P7ViterbiSize(len, hmm->M) <= RAMLIMIT) - { - SQD_DPRINTF1(("P7Viterbi(): Estimated size %d Mb\n", P7ViterbiSize(len, hmm->M))); - sc = P7Viterbi(dsq, len, hmm, &tr); - } - else - { - SQD_DPRINTF1(("P7SmallViterbi() called; %d Mb > %d\n", P7ViterbiSize(len, hmm->M), RAMLIMIT)); - sc = P7SmallViterbi(dsq, len, hmm, &tr); - } - - /* The Forward score override. - * See comments in hmmpfam.c in serial version. - */ - if (do_forward) { - sc = P7Forward(dsq, len, hmm, NULL); - if (do_null2) sc -= TraceScoreCorrection(hmm, tr, dsq); - } - - pvalue = PValue(hmm, sc); - evalue = thresh.Z ? (double) thresh.Z * pvalue : (double) nhmm * pvalue; - send_trace = (sc >= thresh.globT && evalue <= thresh.globE) ? 1 : 0; - - /* return output - */ - pvm_initsend(PvmDataDefault); - pvm_pkint(&my_idx, 1, 1); /* tell master who we are */ - pvm_pkstr(hmm->name); /* double check that we did the right thing */ - pvm_pkfloat(&sc, 1, 1); - pvm_pkdouble(&pvalue, 1, 1); - pvm_pkint(&send_trace, 1, 1); /* flag for whether a trace structure is coming */ - if (send_trace) PVMPackTrace(tr); - pvm_send(master_tid, HMMPVM_RESULTS); - - /* cleanup - */ - FreePlan7(hmm); - P7FreeTrace(tr); - } - - /*********************************************** - * Cleanup, return. - ***********************************************/ - - HMMFileClose(hmmfp); - free(seq); - free(dsq); - free(hmmfile); - return 0; -} - - -/* Function: leave_pvm() - * - * Purpose: Cleanup function, to deal with crashes. We register - * this function using atexit() so it gets called before - * the slave dies. - */ -static void leave_pvm(void) -{ - SQD_DPRINTF1(("slave leaving PVM.\n")); - pvm_exit(); -} - - - -#else /* if HMMER_PVM not defined: include a dummy */ - -#include -int main(void) -{ - printf("hmmpfam-slave is disabled. PVM support was not compiled into HMMER.\n"); - exit(0); -} - -#endif - diff --git a/forester/archive/RIO/others/hmmer/src/hmmpfam.c b/forester/archive/RIO/others/hmmer/src/hmmpfam.c deleted file mode 100644 index 4d49f71..0000000 --- a/forester/archive/RIO/others/hmmer/src/hmmpfam.c +++ /dev/null @@ -1,1094 +0,0 @@ -/************************************************************ - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - ************************************************************/ - -/* hmmpfam.c - * SRE, Mon Aug 25 17:03:14 1997 [Denver] - * - * Search a single sequence against an HMM database. - * Conditionally includes PVM parallelization when HMMER_PVM is defined - * at compile time; hmmpfam --pvm runs the PVM version. - * - * CVS $Id: hmmpfam.c,v 1.1.1.1 2005/03/22 08:34:13 cmzmasek Exp $ - */ - -#include -#include -#include -#include -#include -#ifdef HMMER_THREADS -#include -#endif -#ifdef HMMER_PVM -#include -#endif - -#include "squid.h" /* general sequence analysis library */ -#include "config.h" /* compile-time configuration constants */ -#include "structs.h" /* data structures, macros, #define's */ -#include "funcs.h" /* function declarations */ -#include "globals.h" /* alphabet global variables */ -#include "version.h" /* version info */ - -static char banner[] = "hmmpfam - search one or more sequences against HMM database"; - -static char usage[] = "\ -Usage: hmmpfam [-options] \n\ - Available options are:\n\ - -h : help; print brief help on version and usage\n\ - -n : nucleic acid models/sequence (default protein)\n\ - -A : sets alignment output limit to best domain alignments\n\ - -E : sets E value cutoff (globE) to ; default 10\n\ - -T : sets T bit threshold (globT) to ; no threshold by default\n\ - -Z : sets Z (# models) for E-value calculation\n\ -"; - -static char experts[] = "\ - --acc : use HMM accession numbers instead of names in output\n\ - --compat : make best effort to use last version's output style\n\ - --cpu : run threads in parallel (if threaded)\n\ - --cut_ga : use Pfam GA gathering threshold cutoffs\n\ - --cut_nc : use Pfam NC noise threshold cutoffs\n\ - --cut_tc : use Pfam TC trusted threshold cutoffs\n\ - --domE : sets domain Eval cutoff (2nd threshold) to \n\ - --domT : sets domain T bit thresh (2nd threshold) to \n\ - --forward : use the full Forward() algorithm instead of Viterbi\n\ - --informat : sequence file is in format , not FASTA\n\ - --null2 : turn OFF the post hoc second null model\n\ - --pvm : run on a PVM (Parallel Virtual Machine) cluster\n\ - --xnu : turn ON XNU filtering of query protein sequence\n\ -\n"; - - -static struct opt_s OPTIONS[] = { - { "-h", TRUE, sqdARG_NONE }, - { "-n", TRUE, sqdARG_NONE }, - { "-A", TRUE, sqdARG_INT }, - { "-E", TRUE, sqdARG_FLOAT}, - { "-T", TRUE, sqdARG_FLOAT}, - { "-Z", TRUE, sqdARG_INT }, - { "--acc", FALSE, sqdARG_NONE }, - { "--compat", FALSE, sqdARG_NONE }, - { "--cpu", FALSE, sqdARG_INT }, - { "--cut_ga", FALSE, sqdARG_NONE }, - { "--cut_nc", FALSE, sqdARG_NONE }, - { "--cut_tc", FALSE, sqdARG_NONE }, - { "--domE", FALSE, sqdARG_FLOAT}, - { "--domT", FALSE, sqdARG_FLOAT}, - { "--forward", FALSE, sqdARG_NONE }, - { "--informat",FALSE, sqdARG_STRING}, - { "--null2", FALSE, sqdARG_NONE }, - { "--pvm", FALSE, sqdARG_NONE }, - { "--xnu", FALSE, sqdARG_NONE }, -}; -#define NOPTIONS (sizeof(OPTIONS) / sizeof(struct opt_s)) - - - -#ifdef HMMER_THREADS -/* POSIX threads version: - * the threads share a workpool_s structure amongst themselves, - * for obtaining locks on input HMM file and output histogram and - * tophits structures. - */ -struct workpool_s { - /* Shared configuration resources that don't change: - */ - char *hmmfile; /* name of HMM file */ - char *dsq; /* digitized query sequence */ - char *seqname; /* sequence name */ - int L; /* length of dsq */ - int do_forward; /* TRUE to score using Forward */ - int do_null2; /* TRUE to apply null2 correction */ - struct threshold_s *thresh; /* score/evalue cutoff information */ - - /* Shared (mutex-protected) input resources: - */ - HMMFILE *hmmfp; /* ptr to open HMM file */ - int nhmm; /* number of HMMs searched so far */ - pthread_mutex_t input_lock; /* mutex for locking input */ - - /* Shared (mutex-protected) output resources: - */ - struct tophit_s *ghit; /* per-sequence top hits */ - struct tophit_s *dhit; /* per-domain top hits */ - pthread_mutex_t output_lock; /* mutex for locking output */ - - /* Thread pool information - */ - pthread_t *thread; /* our pool of threads */ - int num_threads; /* number of threads */ -}; - -static struct workpool_s *workpool_start(char *hmmfile, HMMFILE *hmmfp, - char *dsq, char *seqname, int L, - int do_forward, int do_null2, - struct threshold_s *thresh, - struct tophit_s *ghit, struct tophit_s *dhit, - int num_threads); -static void workpool_stop(struct workpool_s *wpool); -static void workpool_free(struct workpool_s *wpool); -static void *worker_thread(void *ptr); -#endif /* HMMER_THREADS */ - - -#ifdef HMMER_PVM -static void main_loop_pvm(char *hmmfile, HMMFILE *hmmfp, char *seq, SQINFO *sqinfo, - struct threshold_s *thresh, int do_xnu, int do_forward, int do_null2, - struct tophit_s *ghit, struct tophit_s *dhit, int *ret_nhmm); -#endif -static void main_loop_serial(char *hmmfile, HMMFILE *hmmfp, char *seq, SQINFO *sqinfo, - struct threshold_s *thresh, int do_xnu, int do_forward, int do_null2, - int num_threads, - struct tophit_s *ghit, struct tophit_s *dhit, int *nhmm); - -int -main(int argc, char **argv) -{ - char *hmmfile; /* file to read HMMs from */ - HMMFILE *hmmfp; /* opened hmmfile for reading */ - char *seqfile; /* file to read target sequence from */ - SQFILE *sqfp; /* opened seqfile for reading */ - int format; /* format of seqfile */ - char *seq; /* target sequence */ - SQINFO sqinfo; /* optional info for seq */ - struct fancyali_s *ali; /* an alignment for display */ - struct tophit_s *ghit; /* list of top hits and alignments for seq */ - struct tophit_s *dhit; /* list of top hits/alignments for domains */ - - float sc; /* log-odds score in bits */ - double pvalue; /* pvalue of an HMM score */ - double evalue; /* evalue of an HMM score */ - double motherp; /* pvalue of a whole seq HMM score */ - float mothersc; /* score of a whole seq parent of domain */ - int sqfrom, sqto; /* coordinates in sequence */ - int hmmfrom, hmmto; /* coordinate in HMM */ - char *name, *acc, *desc; /* hit HMM name, accession, description */ - int hmmlen; /* length of HMM hit */ - int nhmm; /* number of HMMs searched */ - int domidx; /* number of this domain */ - int ndom; /* total # of domains in this seq */ - int namewidth; /* max width of printed HMM name */ - int descwidth; /* max width of printed description */ - - int Alimit; /* A parameter limiting output alignments */ - struct threshold_s thresh; /* contains all threshold (cutoff) info */ - - char *optname; /* name of option found by Getopt() */ - char *optarg; /* argument found by Getopt() */ - int optind; /* index in argv[] */ - int do_forward; /* TRUE to use Forward() not Viterbi() */ - int do_nucleic; /* TRUE to do DNA/RNA instead of protein */ - int do_null2; /* TRUE to adjust scores with null model #2 */ - int do_pvm; /* TRUE to run on PVM */ - int do_xnu; /* TRUE to do XNU filtering */ - int be_backwards; /* TRUE to be backwards-compatible in output*/ - int show_acc; /* TRUE to sub HMM accessions for names */ - int i; - int nreported; - - int num_threads; /* number of worker threads */ - - /*********************************************** - * Parse command line - ***********************************************/ - - format = SQFILE_UNKNOWN; /* default: autodetect format w/ Babelfish */ - do_forward = FALSE; - do_nucleic = FALSE; - do_null2 = TRUE; - do_pvm = FALSE; - do_xnu = FALSE; - be_backwards= FALSE; - show_acc = FALSE; - - Alimit = INT_MAX; /* no limit on alignment output */ - thresh.globE = 10.0; /* use a reasonable Eval threshold; */ - thresh.globT = -FLT_MAX; /* but no bit threshold, */ - thresh.domT = -FLT_MAX; /* no domain bit threshold, */ - thresh.domE = FLT_MAX; /* and no domain Eval threshold. */ - thresh.autocut = CUT_NONE; /* and no Pfam cutoffs used. */ - thresh.Z = 0; /* Z not preset, so determined by # of HMMs */ - -#ifdef HMMER_THREADS - num_threads = ThreadNumber(); /* only matters if we're threaded */ -#else - num_threads = 0; -#endif - - while (Getopt(argc, argv, OPTIONS, NOPTIONS, usage, - &optind, &optname, &optarg)) { - if (strcmp(optname, "-n") == 0) do_nucleic = TRUE; - else if (strcmp(optname, "-A") == 0) Alimit = atoi(optarg); - else if (strcmp(optname, "-E") == 0) thresh.globE = atof(optarg); - else if (strcmp(optname, "-T") == 0) thresh.globT = atof(optarg); - else if (strcmp(optname, "-Z") == 0) thresh.Z = atoi(optarg); - else if (strcmp(optname, "--acc") == 0) show_acc = TRUE; - else if (strcmp(optname, "--compat") == 0) be_backwards = TRUE; - else if (strcmp(optname, "--cpu") == 0) num_threads = atoi(optarg); - else if (strcmp(optname, "--cut_ga") == 0) thresh.autocut = CUT_GA; - else if (strcmp(optname, "--cut_nc") == 0) thresh.autocut = CUT_NC; - else if (strcmp(optname, "--cut_tc") == 0) thresh.autocut = CUT_TC; - else if (strcmp(optname, "--domE") == 0) thresh.domE = atof(optarg); - else if (strcmp(optname, "--domT") == 0) thresh.domT = atof(optarg); - else if (strcmp(optname, "--forward") == 0) do_forward = TRUE; - else if (strcmp(optname, "--null2") == 0) do_null2 = FALSE; - else if (strcmp(optname, "--pvm") == 0) do_pvm = TRUE; - else if (strcmp(optname, "--xnu") == 0) do_xnu = TRUE; - else if (strcmp(optname, "--informat") == 0) { - format = String2SeqfileFormat(optarg); - if (format == SQFILE_UNKNOWN) - Die("unrecognized sequence file format \"%s\"", optarg); - } - else if (strcmp(optname, "-h") == 0) { - Banner(stdout, banner); - puts(usage); - puts(experts); - exit(0); - } - } - if (argc - optind != 2) - Die("Incorrect number of arguments.\n%s\n", usage); - - hmmfile = argv[optind++]; - seqfile = argv[optind++]; - -#ifndef HMMER_PVM - if (do_pvm) Die("PVM support is not compiled into HMMER; --pvm doesn't work."); -#endif -#ifndef HMMER_THREADS - if (num_threads) Die("Posix threads support is not compiled into HMMER; --cpu doesn't have any effect"); -#endif - - /*********************************************** - * Open sequence database (must be in curr directory); - * get target sequence. - ***********************************************/ - - if (do_nucleic) SetAlphabet(hmmNUCLEIC); - else SetAlphabet(hmmAMINO); - - if (do_nucleic && do_xnu) - Die("You can't use -n and --xnu together: I can't xnu DNA data."); - - if ((sqfp = SeqfileOpen(seqfile, format, NULL)) == NULL) - Die("Failed to open sequence file %s\n%s\n", seqfile, usage); - - /*********************************************** - * Open HMM database (might be in HMMERDB or current directory) - ***********************************************/ - - if ((hmmfp = HMMFileOpen(hmmfile, "HMMERDB")) == NULL) - Die("Failed to open HMM database %s\n%s", hmmfile, usage); - - /*********************************************** - * Show the banner - ***********************************************/ - - Banner(stdout, banner); - printf( "HMM file: %s\n", hmmfile); - printf( "Sequence file: %s\n", seqfile); - if (do_pvm) - printf( "PVM: ACTIVE\n"); - printf("- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n"); - - /*********************************************** - * Search each HMM against each sequence - ***********************************************/ - - while (ReadSeq(sqfp, format, &seq, &sqinfo)) - { - ghit = AllocTophits(20); /* keeps full seq scores */ - dhit = AllocTophits(20); /* keeps domain scores */ - - /* 1. Search sequence against all HMMs. - * Significant scores+alignments accumulate in ghit, dhit. - */ - if (!do_pvm) - main_loop_serial(hmmfile, hmmfp, seq, &sqinfo, - &thresh, do_xnu, do_forward, do_null2, num_threads, - ghit, dhit, &nhmm); -#ifdef HMMER_PVM - else if (do_pvm) - { - SQD_DPRINTF1(("Entering PVM main loop\n")); - main_loop_pvm(hmmfile, hmmfp, seq, &sqinfo, - &thresh, do_xnu, do_forward, do_null2, - ghit, dhit, &nhmm); - } -#endif - else Die("wait. that can't happen. I didn't do anything."); - - /* set Z for good now that we're done */ - if (!thresh.Z) thresh.Z = nhmm; - - /* 2. (Done searching all HMMs for this query seq; start output) - * Report the overall sequence hits, sorted by significance. - */ - if (be_backwards) - { - printf("Query: %s %s\n", sqinfo.name, - sqinfo.flags & SQINFO_DESC ? sqinfo.desc : ""); - } - else - { - printf("\nQuery sequence: %s\n", sqinfo.name); - printf("Accession: %s\n", sqinfo.flags &SQINFO_ACC ? sqinfo.acc : "[none]"); - printf("Description: %s\n", sqinfo.flags &SQINFO_DESC? sqinfo.desc : "[none]"); - } - /* We'll now sort the global hit list by evalue... - * (not score! that was bug #12. in hmmpfam, score and evalue are not - * monotonic.) - */ - FullSortTophits(ghit); - namewidth = MAX(8, TophitsMaxName(ghit)); /* must print whole name, no truncation */ - descwidth = MAX(52-namewidth, 11); /* may truncate desc, but avoid neg len! */ - - printf("\nScores for sequence family classification (score includes all domains):\n"); - printf("%-*s %-*s %7s %10s %3s\n", namewidth, "Model", descwidth, "Description", "Score", "E-value", " N "); - printf("%-*s %-*s %7s %10s %3s\n", namewidth, "--------", descwidth, "-----------", "-----", "-------", "---"); - for (i = 0, nreported = 0; i < ghit->num; i++) - { - char *safedesc; - GetRankedHit(ghit, i, - &pvalue, &sc, NULL, NULL, - &name, &acc, &desc, - NULL, NULL, NULL, /* seq positions */ - NULL, NULL, NULL, /* HMM positions */ - NULL, &ndom, /* domain info */ - NULL); /* alignment info*/ - - evalue = pvalue * (double) thresh.Z; - - /* safedesc is a workaround for an apparent Linux printf() - * bug with the *.*s format. dbmalloc crashes with a memchr() ptr out of bounds - * flaw if the malloc'ed space for desc is short. The workaround - * is to make sure the ptr for *.* has a big malloc space. - */ - if (desc != NULL && strlen(desc) < 80) - { - safedesc = MallocOrDie(sizeof(char) * 80); - strcpy(safedesc, desc); - } - else safedesc = Strdup(desc); - - /* sneaky trick warning: - * if we're using dynamic Pfam score cutoffs (GA, TC, NC), - * then the list of hits is already correct and does not - * need any score cutoffs. Unset the thresholds. They'll - * be reset in the main_loop if we still have sequences - * to process. - */ - if (thresh.autocut != CUT_NONE) { - thresh.globE = thresh.domE = FLT_MAX; - thresh.globT = thresh.domT = -FLT_MAX; - } - - if (evalue <= thresh.globE && sc >= thresh.globT) - { - printf("%-*s %-*.*s %7.1f %10.2g %3d\n", - namewidth, - (show_acc && acc != NULL) ? acc : name, - descwidth, descwidth, safedesc != NULL ? safedesc : "", - sc, evalue, ndom); - nreported++; - } - free(safedesc); - } - if (nreported == 0) printf("\t[no hits above thresholds]\n"); - - /* 3. Report domain hits (sorted on sqto coordinate) - */ - FullSortTophits(dhit); - namewidth = MAX(8, TophitsMaxName(dhit)); /* must print whole name, no truncation */ - - printf("\nParsed for domains:\n"); - printf("%-*s %7s %5s %5s %5s %5s %7s %8s\n", - namewidth, "Model", "Domain ", "seq-f", "seq-t", "hmm-f", "hmm-t", "score", "E-value"); - printf("%-*s %7s %5s %5s %5s %5s %7s %8s\n", - namewidth, "--------", "-------", "-----", "-----", "-----", "-----", "-----", "-------"); - - for (i = 0, nreported = 0; i < dhit->num; i++) - { - GetRankedHit(dhit, i, - &pvalue, &sc, &motherp, &mothersc, - &name, &acc, NULL, - &sqfrom, &sqto, NULL, - &hmmfrom, &hmmto, &hmmlen, - &domidx, &ndom, - NULL); - evalue = pvalue * (double) thresh.Z; - - /* Does the "mother" (complete) sequence satisfy global thresholds? */ - if (motherp * (double)thresh. Z > thresh.globE || mothersc < thresh.globT) - continue; - else if (evalue <= thresh.domE && sc >= thresh.domT) { - printf("%-*s %3d/%-3d %5d %5d %c%c %5d %5d %c%c %7.1f %8.2g\n", - namewidth, - (show_acc && acc != NULL) ? acc : name, - domidx, ndom, - sqfrom, sqto, - sqfrom == 1 ? '[' : '.', sqto == sqinfo.len ? ']' : '.', - hmmfrom, hmmto, - hmmfrom == 1 ? '[':'.', hmmto == hmmlen ? ']' : '.', - sc, evalue); - nreported++; - } - } - if (nreported == 0) printf("\t[no hits above thresholds]\n"); - - - /* 3. Alignment output, also by domain. - * dhits is already sorted and namewidth is set, from above code. - * Number of displayed alignments is limited by Alimit parameter; - * also by domE (evalue threshold), domT (score theshold). - */ - if (Alimit != 0) - { - printf("\nAlignments of top-scoring domains:\n"); - for (i = 0, nreported = 0; i < dhit->num; i++) - { - if (nreported == Alimit) break; /* limit to Alimit output alignments */ - GetRankedHit(dhit, i, - &pvalue, &sc, &motherp, &mothersc, - &name, &acc, NULL, - &sqfrom, &sqto, NULL, /* seq position info */ - &hmmfrom, &hmmto, &hmmlen, /* HMM position info */ - &domidx, &ndom, /* domain info */ - &ali); /* alignment info */ - evalue = pvalue * (double) thresh.Z; - - if (motherp * (double) thresh.Z > thresh.globE || mothersc < thresh.globT) - continue; - else if (evalue <= thresh.domE && sc >= thresh.domT) - { - printf("%s: domain %d of %d, from %d to %d: score %.1f, E = %.2g\n", - (show_acc && acc != NULL) ? acc : name, - domidx, ndom, sqfrom, sqto, sc, evalue); - PrintFancyAli(stdout, ali); - nreported++; - } - } - if (nreported == 0) printf("\t[no hits above thresholds]\n"); - if (nreported == Alimit) printf("\t[output cut off at A = %d top alignments]\n", Alimit); - } - - - printf("//\n"); - FreeSequence(seq, &sqinfo); - FreeTophits(ghit); - FreeTophits(dhit); - - HMMFileRewind(hmmfp); - } - - /*********************************************** - * Clean-up and exit. - ***********************************************/ - SeqfileClose(sqfp); - HMMFileClose(hmmfp); - SqdClean(); - - return 0; -} - - -/* Function: main_loop_serial() - * Date: SRE, Fri Aug 7 13:46:48 1998 [St. Louis] - * - * Purpose: Search a sequence against an HMM database; - * main loop for the serial (non-PVM, non-threads) - * version. - * - * On return, ghit and dhit contain info for all hits - * that satisfy the set thresholds. If an evalue - * cutoff is used at all, the lists will be overestimated -- - * because the evalue will be underestimated until - * we know the final Z. (Thus the main program must recheck - * thresholds before printing any results.) If only - * score cutoffs are used, then the lists are correct, - * and may be printed exactly as they come (after - * appropriate sorting, anyway). This is especially - * important for dynamic thresholding using Pfam - * score cutoffs -- the main caller cannot afford to - * rescan the HMM file just to get the GA/TC/NC cutoffs - * back out for each HMM, and neither do I want to - * burn the space to store them as I make a pass thru - * Pfam. - * - * Args: hmmfile - name of HMM file - * hmmfp - open HMM file (and at start of file) - * dsq - digitized sequence - * sqinfo - ptr to SQINFO optional info for dsq - * thresh - score/evalue threshold information - * do_xnu - TRUE to apply XNU filter to sequence - * do_forward - TRUE to use Forward() scores - * do_null2 - TRUE to adjust scores w/ ad hoc null2 model - * num_threads- number of threads, if threaded - * ghit - global hits list - * dhit - domain hits list - * ret_nhmm - number of HMMs searched. - * - * Returns: (void) - */ -static void -main_loop_serial(char *hmmfile, HMMFILE *hmmfp, char *seq, SQINFO *sqinfo, - struct threshold_s *thresh, int do_xnu, int do_forward, int do_null2, - int num_threads, - struct tophit_s *ghit, struct tophit_s *dhit, int *ret_nhmm) -{ - char *dsq; /* digitized sequence */ - int nhmm; /* number of HMMs searched */ -#ifdef HMMER_THREADS - struct workpool_s *wpool; /* pool of worker threads */ -#endif - struct plan7_s *hmm; /* current HMM to search with */ - struct p7trace_s *tr; /* traceback of alignment */ - float sc; /* an alignment score */ - double pvalue; /* pvalue of an HMM score */ - double evalue; /* evalue of an HMM score */ - - /* Prepare sequence. - */ - dsq = DigitizeSequence(seq, sqinfo->len); - if (do_xnu && Alphabet_type == hmmAMINO) XNU(dsq, sqinfo->len); - -#ifdef HMMER_THREADS - if (num_threads > 0) { - wpool = workpool_start(hmmfile, hmmfp, dsq, sqinfo->name, sqinfo->len, - do_forward, do_null2, thresh, - ghit, dhit, num_threads); - workpool_stop(wpool); - nhmm = wpool->nhmm; - workpool_free(wpool); - - free(dsq); - *ret_nhmm = nhmm; - return; - } -#endif - /* unthreaded code: */ - nhmm = 0; - while (HMMFileRead(hmmfp, &hmm)) { - if (hmm == NULL) - Die("HMM file %s may be corrupt or in incorrect format; parse failed", hmmfile); - P7Logoddsify(hmm, !(do_forward)); - - if (! SetAutocuts(thresh, hmm)) - Die("HMM %s did not contain the GA, TC, or NC cutoffs you needed", - hmm->name); - - /* Score sequence, do alignment (Viterbi), recover trace - */ - if (P7ViterbiSize(sqinfo->len, hmm->M) <= RAMLIMIT) - sc = P7Viterbi(dsq, sqinfo->len, hmm, &tr); - else - sc = P7SmallViterbi(dsq, sqinfo->len, hmm, &tr); - - /* Implement do_forward; we'll override the whole_sc with a P7Forward() - * calculation. - * HMMER is so trace- (alignment-) dependent that this gets a bit hacky. - * Some important implications: - * 1) if --do_forward is selected, the domain (Viterbi) scores do not - * necessarily add up to the whole sequence (Forward) score. - * 2) The implementation of null2 for a Forward score is undefined, - * since the null2 correction is trace-dependent. As a total hack, - * we use a null2 correction derived from the whole trace - * (which was the behavior of HMMER 2.1.1 and earlier, anyway). - * This could put the sum of domain scores and whole seq score even - * further in disagreement. - * - * Note that you can't move the Forward calculation into - * PostprocessSignificantHit(). The Forward score will exceed the - * Viterbi score, so you can't apply thresholds until you - * know the Forward score. Also, since PostprocessSignificantHit() - * is wrapped by a mutex in the threaded implementation, - * you'd destroy all useful parallelism if PostprocessSignificantHit() - * did anything compute intensive. - */ - if (do_forward) { - sc = P7Forward(dsq, sqinfo->len, hmm, NULL); - if (do_null2) sc -= TraceScoreCorrection(hmm, tr, dsq); - } - - /* Store scores/pvalue for each HMM aligned to this sequence, overall - */ - pvalue = PValue(hmm, sc); - evalue = thresh->Z ? (double) thresh->Z * pvalue : (double) nhmm * pvalue; - if (sc >= thresh->globT && evalue <= thresh->globE) { - PostprocessSignificantHit(ghit, dhit, - tr, hmm, dsq, sqinfo->len, - sqinfo->name, NULL, NULL, /* won't need acc or desc even if we have 'em */ - do_forward, sc, - do_null2, - thresh, - TRUE); /* TRUE -> hmmpfam mode */ - } - P7FreeTrace(tr); - FreePlan7(hmm); - nhmm++; - } - - free(dsq); - *ret_nhmm = nhmm; - return; -} - - -#ifdef HMMER_PVM -/***************************************************************** - * PVM specific functions - ****************************************************************/ - -/* Function: main_loop_pvm() - * Date: SRE, Fri Aug 7 13:58:34 1998 [St. Louis] - * - * Purpose: Search a sequence against an HMM database; - * main loop for the PVM version. - * - * Args: hmmfile - name of HMM file - * hmmfp - open HMM file (and at start of file) - * seq - sequence to search against - * sqinfo - ptr to SQINFO optional info for dsq - * thresh - score/evalue threshold settings - * do_xnu - TRUE to apply XNU filter to sequence - * do_forward - TRUE to use Forward() scores - * do_null2 - TRUE to adjust scores w/ ad hoc null2 model - * ghit - global hits list - * dhit - domain hits list - * nhmm - number of HMMs searched. - * - * Returns: (void) - */ -static void -main_loop_pvm(char *hmmfile, HMMFILE *hmmfp, char *seq, SQINFO *sqinfo, - struct threshold_s *thresh, int do_xnu, int do_forward, int do_null2, - struct tophit_s *ghit, struct tophit_s *dhit, int *ret_nhmm) -{ - struct plan7_s *hmm; /* HMM that was searched with */ - struct p7trace_s *tr; /* a traceback structure */ - char *dsq; /* digitized sequence */ - float sc; /* score of an HMM match */ - int master_tid; /* master's ID */ - int *slave_tid; /* array of slave IDs */ - int *hmmlist; /* array of hmm indexes being worked on by slaves */ - int nslaves; /* number of slaves in virtual machine */ - int nhmm; /* number of HMMs searched */ - int slaveidx; /* index of a slave wanting work */ - int slave, msg; - int sent_trace; /* TRUE if slave sent us a trace */ - char slavename[32]; /* name of HMM that slave actually did */ - double pvalue; /* pvalue of HMM score */ - int arglen; - - /* Sanity checks. - */ - if (hmmfp->ssi == NULL) - Die("HMM file %s needs an SSI index to use PVM. See: hmmindex.", hmmfile); - - /* Prepare sequence. - */ - dsq = DigitizeSequence(seq, sqinfo->len); - if (do_xnu && Alphabet_type == hmmAMINO) XNU(dsq, sqinfo->len); - - /* Initialize PVM - */ - master_tid = pvm_mytid(); -#if DEBUGLEVEL >= 1 - pvm_catchout(stderr); /* catch output for debugging */ -#endif - SQD_DPRINTF1(("Spawning slaves...\n")); - PVMSpawnSlaves("hmmpfam-pvm", &slave_tid, &nslaves); - hmmlist = MallocOrDie(sizeof(int) * nslaves); - SQD_DPRINTF1(("Spawned a total of %d slaves...\n", nslaves)); - - /* Initialize the slaves - */ - SQD_DPRINTF1(("Broadcasting to %d slaves...\n", nslaves)); - pvm_initsend(PvmDataDefault); - arglen = strlen(hmmfile); - pvm_pkint(&arglen, 1, 1); - pvm_pkstr(hmmfile); - pvm_pkint(&(sqinfo->len), 1, 1); - pvm_pkstr(seq); - pvm_pkfloat(&(thresh->globT), 1, 1); - pvm_pkdouble(&(thresh->globE), 1, 1); - pvm_pkint(&(thresh->Z), 1, 1); - pvm_pkint((int *)&(thresh->autocut), 1, 1); - pvm_pkint(&do_xnu, 1, 1); - pvm_pkint(&do_forward, 1, 1); - pvm_pkint(&do_null2, 1, 1); - pvm_pkint(&Alphabet_type, 1, 1); - pvm_mcast(slave_tid, nslaves, HMMPVM_INIT); - SQD_DPRINTF1(("Slaves should be ready...\n")); - /* get their OK codes. */ - PVMConfirmSlaves(slave_tid, nslaves); - SQD_DPRINTF1(("Slaves confirm that they're ok...\n")); - - /* Load the slaves. - * For efficiency reasons, we don't want the master to - * load HMMs from disk until she absolutely needs them. - */ - for (nhmm = 0; nhmm < nslaves && nhmm < hmmfp->ssi->nprimary; nhmm++) { - pvm_initsend(PvmDataDefault); - pvm_pkint(&nhmm, 1, 1); /* side effect: also tells him what number he is. */ - pvm_send(slave_tid[nhmm], HMMPVM_WORK); - hmmlist[nhmm] = nhmm; - } - SQD_DPRINTF1(("%d slaves are loaded\n", nhmm)); - - - /* Receive/send loop - */ - for (; nhmm < hmmfp->ssi->nprimary; nhmm++) - { - /* check slaves before blocking */ - PVMCheckSlaves(slave_tid, nslaves); - /* receive output */ - SQD_DPRINTF1(("Waiting for a slave to give me output...\n")); - pvm_recv(-1, HMMPVM_RESULTS); - pvm_upkint(&slaveidx, 1, 1); /* # of slave who's sending us stuff */ - pvm_upkstr(slavename); /* name of HMM that slave did */ - pvm_upkfloat(&sc, 1, 1); /* score */ - pvm_upkdouble(&pvalue, 1, 1); /* P-value */ - pvm_upkint(&sent_trace, 1, 1); /* TRUE if trace is coming */ - tr = (sent_trace) ? PVMUnpackTrace() : NULL; - SQD_DPRINTF1(("Slave %d finished %s for me...\n", slaveidx, slavename)); - - /* send new work */ - pvm_initsend(PvmDataDefault); - pvm_pkint(&nhmm, 1, 1); - pvm_send(slave_tid[slaveidx], HMMPVM_WORK); - SQD_DPRINTF1(("Assigned %d -> slave %d\n", nhmm, slaveidx)); - - /* process output */ - /* 1b. Store scores/pvalue for each HMM aligned to this sequence, overall - */ - SQD_DPRINTF1(("%15s : %2d : %f\n", slavename, slaveidx, sc)); - if (sent_trace) - { - /* now load the HMM, because the hit is significant */ - HMMFilePositionByIndex(hmmfp, hmmlist[slaveidx]); - if (!HMMFileRead(hmmfp, &hmm)) - { pvm_exit(); Die("Unexpected failure to read HMM file %s", hmmfile); } - if (hmm == NULL) - { pvm_exit(); Die("HMM file %s may be corrupt; parse failed", hmmfile); } - P7Logoddsify(hmm, TRUE); - if (! SetAutocuts(thresh, hmm)) - Die("HMM %s did not contain your GA, NC, or TC cutoffs", hmm->name); - - PostprocessSignificantHit(ghit, dhit, - tr, hmm, dsq, sqinfo->len, - sqinfo->name, - sqinfo->flags & SQINFO_ACC ? sqinfo->acc : NULL, - sqinfo->flags & SQINFO_DESC ? sqinfo->desc : NULL, - do_forward, sc, - do_null2, - thresh, - TRUE); /* TRUE -> hmmpfam mode */ - - FreePlan7(hmm); - P7FreeTrace(tr); - } - hmmlist[slaveidx] = nhmm; - } - - /* Collect the output. all n slaves are still working, so wait for them. - */ - for (slave = 0; slave < nslaves && slave < nhmm; slave++) - { - /* don't check slaves (they're exiting normally); - window of vulnerability here to slave crashes */ - /* receive output */ - pvm_recv(-1, HMMPVM_RESULTS); - pvm_upkint(&slaveidx, 1, 1); /* slave who's sending us stuff */ - pvm_upkstr(slavename); - pvm_upkfloat(&sc, 1, 1); /* one score */ - pvm_upkdouble(&pvalue, 1, 1); /* P-value */ - pvm_upkint(&sent_trace, 1, 1); /* TRUE if trace is coming */ - tr = (sent_trace) ? PVMUnpackTrace() : NULL; - - /* process output */ - SQD_DPRINTF1(("%15s : %2d : %f\n", slavename, slaveidx, sc)); - if (sent_trace) - { - /* now load the HMM, because the hit is significant */ - HMMFilePositionByIndex(hmmfp, hmmlist[slaveidx]); - if (!HMMFileRead(hmmfp, &hmm)) - { pvm_exit(); Die("Unexpected failure to read HMM file %s", hmmfile);} - if (hmm == NULL) - { pvm_exit(); Die("HMM file %s may be corrupt; parse failed", hmmfile); } - P7Logoddsify(hmm, TRUE); - if (! SetAutocuts(thresh, hmm)) - Die("HMM %s did not contain your GA, NC, or TC cutoffs", hmm->name); - - PostprocessSignificantHit(ghit, dhit, - tr, hmm, dsq, sqinfo->len, - sqinfo->name, NULL, NULL, /* won't need acc or desc even if we have 'em */ - do_forward, sc, - do_null2, - thresh, - TRUE); /* TRUE -> hmmpfam mode */ - - FreePlan7(hmm); - P7FreeTrace(tr); - } - /* send cleanup/shutdown flag */ - pvm_initsend(PvmDataDefault); - msg = -1; - pvm_pkint(&msg, 1, 1); - pvm_send(slave_tid[slaveidx], HMMPVM_WORK); - } - - /* Cleanup; quit the VM; and return - */ - free(slave_tid); - free(hmmlist); - free(dsq); - pvm_exit(); - *ret_nhmm = nhmm; - return; -} - -#endif /*HMMER_PVM*/ - - -#ifdef HMMER_THREADS -/***************************************************************** - * POSIX threads implementation. - * - * API: - * workpool_start() (makes a workpool_s structure. Starts calculations.) - * workpool_stop() (waits for threads to finish.) - * workpool_free() (destroys the structure) - * - * Threads: - * worker_thread() (the actual parallelized worker thread). - *****************************************************************/ - -/* Function: workpool_start() - * Date: SRE, Mon Sep 28 11:10:58 1998 [St. Louis] - * - * Purpose: Initialize a workpool_s structure, and return it. - * - * Args: hmmfile - name of HMM file - * hmmfp - open HMM file, at start - * dsq - ptr to sequence to search - * seqname - ptr to name of dsq - * L - length of dsq - * do_forward - TRUE to score using Forward - * do_null2 - TRUE to apply null2 ad hoc correction - * threshold - evalue/score threshold settings - * ghit - per-seq hit list - * dhit - per-domain hit list - * num_threads- number of worker threads to run. - * - * Returns: ptr to struct workpool_s. - * Caller must wait for threads to finish with workpool_stop(), - * then free the structure with workpool_free(). - */ -static struct workpool_s * -workpool_start(char *hmmfile, HMMFILE *hmmfp, char *dsq, char *seqname, int L, - int do_forward, int do_null2, struct threshold_s *thresh, - struct tophit_s *ghit, struct tophit_s *dhit, - int num_threads) -{ - struct workpool_s *wpool; - pthread_attr_t attr; - int i; - int rtn; - - wpool = MallocOrDie(sizeof(struct workpool_s)); - wpool->thread = MallocOrDie(num_threads * sizeof(pthread_t)); - wpool->hmmfile = hmmfile; - wpool->dsq = dsq; - wpool->L = L; - wpool->seqname = seqname; - wpool->do_forward = do_forward; - wpool->do_null2 = do_null2; - wpool->thresh = thresh; - - wpool->hmmfp = hmmfp; - wpool->nhmm = 0; - if ((rtn = pthread_mutex_init(&(wpool->input_lock), NULL)) != 0) - Die("pthread_mutex_init FAILED; %s\n", strerror(rtn)); - - wpool->ghit = ghit; - wpool->dhit = dhit; - if ((rtn = pthread_mutex_init(&(wpool->output_lock), NULL)) != 0) - Die("pthread_mutex_init FAILED; %s\n", strerror(rtn)); - - wpool->num_threads= num_threads; - - /* Create slave threads. See comments in hmmcalibrate.c at - * this step regarding concurrency and system scope. - */ - pthread_attr_init(&attr); -#ifndef __sgi -#ifdef HAVE_PTHREAD_ATTR_SETSCOPE - pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM); -#endif -#endif -#ifdef HAVE_PTHREAD_SETCONCURRENCY - pthread_setconcurrency(num_threads+1); -#endif - for (i = 0; i < num_threads; i++) - if ((rtn = pthread_create(&(wpool->thread[i]), &attr, - worker_thread , (void *) wpool)) != 0) - Die("Failed to create thread %d; return code %d\n", i, rtn); - - pthread_attr_destroy(&attr); - return wpool; -} - -/* Function: workpool_stop() - * Date: SRE, Thu Jul 16 11:20:16 1998 [St. Louis] - * - * Purpose: Waits for threads in a workpool to finish. - * - * Args: wpool -- ptr to the workpool structure - * - * Returns: (void) - */ -static void -workpool_stop(struct workpool_s *wpool) -{ - int i; - /* wait for threads to stop */ - for (i = 0; i < wpool->num_threads; i++) - if (pthread_join(wpool->thread[i],NULL) != 0) - Die("pthread_join failed"); - return; -} - -/* Function: workpool_free() - * Date: SRE, Thu Jul 16 11:26:27 1998 [St. Louis] - * - * Purpose: Free a workpool_s structure, after the threads - * have finished. - * - * Args: wpool -- ptr to the workpool. - * - * Returns: (void) - */ -static void -workpool_free(struct workpool_s *wpool) -{ - free(wpool->thread); - free(wpool); - return; -} - - -/* Function: worker_thread() - * Date: SRE, Mon Sep 28 10:48:29 1998 [St. Louis] - * - * Purpose: The procedure executed by the worker threads. - * - * Args: ptr - (void *) that is recast to a pointer to - * the workpool. - * - * Returns: (void *) - */ -void * -worker_thread(void *ptr) -{ - struct workpool_s *wpool; /* our working threads structure */ - struct plan7_s *hmm; /* an HMM to search with */ - struct p7trace_s *tr; /* traceback from an alignment */ - float sc; /* score of an alignment */ - int rtn; /* a return code from pthreads lib */ - double pvalue; /* P-value of score */ - double evalue; /* E-value of a score */ - struct threshold_s thresh; /* a local copy of thresholds */ - - wpool = (struct workpool_s *) ptr; - /* Because we might dynamically change the thresholds using - * Pfam GA/NC/TC cutoffs, we make a local copy of the threshold - * structure in this thread. - */ - thresh.globT = wpool->thresh->globT; - thresh.globE = wpool->thresh->globE; - thresh.domT = wpool->thresh->domT; - thresh.domE = wpool->thresh->domE; - thresh.autocut = wpool->thresh->autocut; - thresh.Z = wpool->thresh->Z; - for (;;) { - - /* 1. acquire lock on HMM input, and get - * the next HMM to work on. - */ - /* acquire a lock */ - if ((rtn = pthread_mutex_lock(&(wpool->input_lock))) != 0) - Die("pthread_mutex_lock failure: %s\n", strerror(rtn)); - wpool->nhmm++; - - if (! HMMFileRead(wpool->hmmfp, &hmm)) - { /* we're done. release lock, exit thread */ - if ((rtn = pthread_mutex_unlock(&(wpool->input_lock))) != 0) - Die("pthread_mutex_unlock failure: %s\n", strerror(rtn)); - pthread_exit(NULL); - } - SQD_DPRINTF1(("a thread is working on %s\n", hmm->name)); - /* release the lock */ - if ((rtn = pthread_mutex_unlock(&(wpool->input_lock))) != 0) - Die("pthread_mutex_unlock failure: %s\n", strerror(rtn)); - - if (hmm == NULL) - Die("HMM file %s may be corrupt or in incorrect format; parse failed", wpool->hmmfile); - P7Logoddsify(hmm, !(wpool->do_forward)); - - if (!SetAutocuts(&thresh, hmm)) - Die("HMM %s did not have the right GA, NC, or TC cutoffs", hmm->name); - - /* 2. We have an HMM in score form. - * Score the sequence. - */ - if (P7ViterbiSize(wpool->L, hmm->M) <= RAMLIMIT) - sc = P7Viterbi(wpool->dsq, wpool->L, hmm, &tr); - else - sc = P7SmallViterbi(wpool->dsq, wpool->L, hmm, &tr); - - /* The Forward score override (see comments in serial vers) - */ - if (wpool->do_forward) { - sc = P7Forward(wpool->dsq, wpool->L, hmm, NULL); - if (wpool->do_null2) sc -= TraceScoreCorrection(hmm, tr, wpool->dsq); - } - - /* 3. Save the output in tophits structures, after acquiring a lock - */ - if ((rtn = pthread_mutex_lock(&(wpool->output_lock))) != 0) - Die("pthread_mutex_lock failure: %s\n", strerror(rtn)); - SQD_DPRINTF1(("model %s scores %f\n", hmm->name, sc)); - - pvalue = PValue(hmm, sc); - evalue = thresh.Z ? (double) thresh.Z * pvalue : (double) wpool->nhmm * pvalue; - if (sc >= thresh.globT && evalue <= thresh.globE) - { - PostprocessSignificantHit(wpool->ghit, wpool->dhit, - tr, hmm, wpool->dsq, wpool->L, - wpool->seqname, - NULL, NULL, /* won't need seq's acc or desc */ - wpool->do_forward, sc, - wpool->do_null2, - &thresh, - TRUE); /* TRUE -> hmmpfam mode */ - } - if ((rtn = pthread_mutex_unlock(&(wpool->output_lock))) != 0) - Die("pthread_mutex_unlock failure: %s\n", strerror(rtn)); - - P7FreeTrace(tr); - FreePlan7(hmm); - - } /* end 'infinite' loop over HMMs in this thread */ -} - -#endif /* HMMER_THREADS */ diff --git a/forester/archive/RIO/others/hmmer/src/hmmpostal.c b/forester/archive/RIO/others/hmmer/src/hmmpostal.c deleted file mode 100644 index 3e56af5..0000000 --- a/forester/archive/RIO/others/hmmer/src/hmmpostal.c +++ /dev/null @@ -1,1108 +0,0 @@ -/************************************************************ - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - ************************************************************/ - -/* Derived from code developed by Ian Holmes (Sanger Centre and UC Berkeley) - * Copyright (C) 1998 Ian Holmes - * Distributed under the GNU General Public License - */ - -#include -#include -#include - -#include "structs.h" /* data structures, macros, #define's */ -#include "config.h" /* compile-time configuration constants */ -#include "funcs.h" /* function declarations */ -#include "globals.h" /* alphabet global variables */ -#include "squid.h" /* general sequence analysis library */ - -static char banner[] = "hmmbuild - build a hidden Markov model from an alignment"; - -static char usage[] = "\ -Usage: hmmbuildpost [-options] \n\ - Available options are:\n\ - -h : help; print brief help on version and usage\n\ - -n : name; name this HMM \n\ - -r : read HMM from instead of building\n\ - -m : save HMM to \n\ - -o : re-save annotated alignment to \n\ - -A : append; append this HMM to \n\ - -F : force; allow overwriting of \n\ -\n\ - Alternative search algorithm styles: (default: hmmls domain alignment)\n\ - -f : multi-hit local (hmmfs style)\n\ - -g : global alignment (hmms style, Needleman/Wunsch)\n\ - -s : local alignment (hmmsw style, Smith/Waterman)\n\ -"; - -static char experts[] = "\ - Optional re-alignment of sequences to model:\n\ - --viterbi : standard max-likelihood (Viterbi) algorithm\n\ - --optacc : optimal accuracy algorithm\n\ -\n\ - Alternative model construction strategies: (default: MAP)\n\ - --fast : Krogh/Haussler fast heuristic construction (see --gapmax)\n\ - --hand : manual construction (requires SELEX file, #=RF annotation)\n\ -\n\ - Expert customization of parameters and priors:\n\ - --null : read null (random sequence) model from \n\ - --pam : heuristic PAM-based prior, using BLAST PAM matrix in \n\ - --prior : read Dirichlet prior parameters from \n\ -\n\ - Alternative sequence weighting strategies: (default: GSC weights)\n\ - --wblosum : Henikoff simple filter weights (see --idlevel)\n\ - --wgsc : Gerstein/Sonnhammer/Chothia tree weights (default)\n\ - --wme : maximum entropy (ME)\n\ - --wvoronoi : Sibbald/Argos Voronoi weights\n\ - --wnone : don't do any weighting\n\ - --noeff : don't use effective sequence number; just use nseq\n\ -\n\ - Forcing an alphabet: (normally autodetected)\n\ - --amino : override autodetection, assert that seqs are protein\n\ - --nucleic : override autodetection, assert that seqs are DNA/RNA\n\ -\n\ - Other expert options:\n\ - --archpri : set architecture size prior to {0.85} [0..1]\n\ - --binary : save the model in binary format, not ASCII text\n\ - --cfile : save count vectors to \n\ - --gapmax : max fraction of gaps in mat column {0.50} [0..1]\n\ - --idlevel : set frac. id level used by eff. nseq and --wblosum {0.62}\n\ - --informat : input alignment is in format , not Stockholm\n\ - --pamwgt : set weight on PAM-based prior to {20.}[>=0]\n\ - --swentry : set S/W aggregate entry prob. to {0.5}\n\ - --swexit : set S/W aggregate exit prob. to {0.5}\n\ - --verbose : print a lot of boring information\n\ -\n"; - -static struct opt_s OPTIONS[] = { - { "-f", TRUE, sqdARG_NONE }, - { "-g", TRUE, sqdARG_NONE }, - { "-h", TRUE, sqdARG_NONE }, - { "-n", TRUE, sqdARG_STRING}, - { "-r", TRUE, sqdARG_STRING}, - { "-m", TRUE, sqdARG_STRING}, - { "-o", TRUE, sqdARG_STRING}, - { "-s", TRUE, sqdARG_NONE }, - { "-A", TRUE, sqdARG_NONE }, - { "-F", TRUE, sqdARG_NONE }, - { "--amino", FALSE, sqdARG_NONE }, - { "--archpri", FALSE, sqdARG_FLOAT }, - { "--binary", FALSE, sqdARG_NONE }, - { "--cfile", FALSE, sqdARG_STRING}, - { "--fast", FALSE, sqdARG_NONE}, - { "--gapmax", FALSE, sqdARG_FLOAT }, - { "--hand", FALSE, sqdARG_NONE}, - { "--idlevel", FALSE, sqdARG_FLOAT }, - { "--informat",FALSE, sqdARG_STRING }, - { "--noeff", FALSE, sqdARG_NONE }, - { "--nucleic", FALSE, sqdARG_NONE }, - { "--null", FALSE, sqdARG_STRING }, - { "--optacc", FALSE, sqdARG_NONE }, - { "--pam", FALSE, sqdARG_STRING }, - { "--pamwgt", FALSE, sqdARG_FLOAT }, - { "--prior", FALSE, sqdARG_STRING }, - { "--swentry", FALSE, sqdARG_FLOAT }, - { "--swexit", FALSE, sqdARG_FLOAT }, - { "--verbose", FALSE, sqdARG_NONE }, - { "--viterbi", FALSE, sqdARG_NONE }, - { "--wgsc", FALSE, sqdARG_NONE }, - { "--wblosum", FALSE, sqdARG_NONE }, - { "--wme", FALSE, sqdARG_NONE }, - { "--wnone", FALSE, sqdARG_NONE }, - { "--wvoronoi",FALSE, sqdARG_NONE }, -}; -#define NOPTIONS (sizeof(OPTIONS) / sizeof(struct opt_s)) - -static void save_model(struct plan7_s *hmm, char *hmmfile, int do_append, int do_binary); -static void print_all_scores(FILE *fp, struct plan7_s *hmm, - AINFO *ainfo, char **dsq, int nseq, - struct p7trace_s **tr); -static void save_countvectors(char *cfile, struct plan7_s *hmm); -static void position_average_score(struct plan7_s *hmm, char **seq, float *wgt, - int nseq, struct p7trace_s **tr, float *pernode, - float *ret_avg); -static float frag_trace_score(struct plan7_s *hmm, char *dsq, struct p7trace_s *tr, - float *pernode, float expected); -static void maximum_entropy(struct plan7_s *hmm, char **dsq, AINFO *ainfo, - int nseq, float eff_nseq, - struct p7prior_s *prior, struct p7trace_s **tr); - -extern void Postcode(int L, struct dpmatrix_s *mx, struct p7trace_s *tr); - -int -main(int argc, char **argv) -{ - char *seqfile; /* seqfile to read alignment from */ - int format; /* format of seqfile */ - MSAFILE *afp; /* open alignment file */ - MSA *msa; /* a multiple sequence alignment */ - char **dsq; /* digitized unaligned aseq's */ - struct plan7_s *hmm; /* constructed HMM; written to hmmfile */ - struct p7prior_s *pri; /* Dirichlet priors to use */ - struct p7trace_s **tr; /* fake tracebacks for aseq's */ - char *readfile; /* file to read HMM from */ - HMMFILE *hmmfp; /* opened hmmfile for reading */ - char *hmmfile; /* file to write HMM to */ - FILE *fp; /* OUTPUT file handle (misc.) */ - char *name; /* name of the HMM */ - int idx; /* counter for sequences */ - float randomseq[MAXABET]; /* null sequence model */ - float p1; /* null sequence model p1 transition */ - - char *optname; /* name of option found by Getopt() */ - char *optarg; /* argument found by Getopt() */ - int optind; /* index in argv[] */ - enum p7_construction c_strategy; /* construction strategy choice */ - enum p7_weight { /* weighting strategy */ - WGT_NONE, WGT_GSC, WGT_BLOSUM, WGT_VORONOI, WGT_ME} w_strategy; - enum p7_config { /* algorithm configuration strategy */ - P7_BASE_CONFIG, P7_LS_CONFIG, P7_FS_CONFIG, P7_SW_CONFIG } cfg_strategy; - float gapmax; /* max frac gaps in mat col for -k */ - int overwrite_protect; /* TRUE to prevent overwriting HMM file */ - enum realignment_strategy { /* re-alignment strategy */ - REALIGN_NONE, REALIGN_VITERBI, REALIGN_OPTACC } r_strategy; - int verbose; /* TRUE to show a lot of output */ - char *align_ofile; /* name of output alignment file */ - char *rndfile; /* random sequence model file to read */ - char *prifile; /* Dirichlet prior file to read */ - char *pamfile; /* PAM matrix file for heuristic prior */ - char *cfile; /* output file for count vectors */ - float archpri; /* "architecture" prior on model size */ - float pamwgt; /* weight on PAM for heuristic prior */ - int do_append; /* TRUE to append to hmmfile */ - int do_binary; /* TRUE to write in binary format */ - float blosumlevel; /* BLOSUM frac id filtering level [0.62] */ - float swentry; /* S/W aggregate entry probability */ - float swexit; /* S/W aggregate exit probability */ - int do_eff; /* TRUE to set an effective seq number */ - float eff_nseq; /* effective sequence number */ - int checksum; - int len; - - struct dpmatrix_s *forward_mx; /* Forward matrix */ - struct dpmatrix_s *backward_mx; /* Backward matrix */ - struct dpmatrix_s *posterior_mx; /* Posterior matrix */ - struct dpmatrix_s *optacc_mx; /* Optimal accuracy matrix */ - - /*********************************************** - * Parse command line - ***********************************************/ - - format = MSAFILE_UNKNOWN; - c_strategy = P7_MAP_CONSTRUCTION; - w_strategy = WGT_GSC; - blosumlevel = 0.62; - cfg_strategy = P7_LS_CONFIG; - gapmax = 0.5; - overwrite_protect = TRUE; - r_strategy = REALIGN_NONE; - verbose = FALSE; - readfile = NULL; - hmmfile = NULL; - align_ofile = NULL; - rndfile = NULL; - prifile = NULL; - pamfile = NULL; - cfile = NULL; - archpri = 0.85; - pamwgt = 20.; - Alphabet_type = hmmNOTSETYET; /* initially unknown */ - name = NULL; - do_append = FALSE; - swentry = 0.5; - swexit = 0.5; - do_eff = TRUE; - do_binary = FALSE; - - while (Getopt(argc, argv, OPTIONS, NOPTIONS, usage, - &optind, &optname, &optarg)) { - if (strcmp(optname, "-f") == 0) cfg_strategy = P7_FS_CONFIG; - else if (strcmp(optname, "-g") == 0) cfg_strategy = P7_BASE_CONFIG; - else if (strcmp(optname, "-n") == 0) name = Strdup(optarg); - else if (strcmp(optname, "-r") == 0) readfile = optarg; - else if (strcmp(optname, "-m") == 0) hmmfile = optarg; - else if (strcmp(optname, "-o") == 0) align_ofile = optarg; - else if (strcmp(optname, "-r") == 0) rndfile = optarg; - else if (strcmp(optname, "-s") == 0) cfg_strategy = P7_SW_CONFIG; - else if (strcmp(optname, "-A") == 0) do_append = TRUE; - else if (strcmp(optname, "-F") == 0) overwrite_protect = FALSE; - else if (strcmp(optname, "--amino") == 0) SetAlphabet(hmmAMINO); - else if (strcmp(optname, "--archpri") == 0) archpri = atof(optarg); - else if (strcmp(optname, "--binary") == 0) do_binary = TRUE; - else if (strcmp(optname, "--cfile") == 0) cfile = optarg; - else if (strcmp(optname, "--fast") == 0) c_strategy = P7_FAST_CONSTRUCTION; - else if (strcmp(optname, "--hand") == 0) c_strategy = P7_HAND_CONSTRUCTION; - else if (strcmp(optname, "--gapmax") == 0) gapmax = atof(optarg); - else if (strcmp(optname, "--idlevel") == 0) blosumlevel = atof(optarg); - else if (strcmp(optname, "--noeff") == 0) do_eff = FALSE; - else if (strcmp(optname, "--nucleic") == 0) SetAlphabet(hmmNUCLEIC); - else if (strcmp(optname, "--optacc") == 0) r_strategy = REALIGN_OPTACC; - else if (strcmp(optname, "--pam") == 0) pamfile = optarg; - else if (strcmp(optname, "--pamwgt") == 0) pamwgt = atof(optarg); - else if (strcmp(optname, "--prior") == 0) prifile = optarg; - else if (strcmp(optname, "--swentry") == 0) swentry = atof(optarg); - else if (strcmp(optname, "--swexit") == 0) swexit = atof(optarg); - else if (strcmp(optname, "--verbose") == 0) verbose = TRUE; - else if (strcmp(optname, "--viterbi") == 0) r_strategy = REALIGN_VITERBI; - else if (strcmp(optname, "--wgsc") == 0) w_strategy = WGT_GSC; - else if (strcmp(optname, "--wblosum") == 0) w_strategy = WGT_BLOSUM; - else if (strcmp(optname, "--wme") == 0) w_strategy = WGT_ME; - else if (strcmp(optname, "--wnone") == 0) w_strategy = WGT_NONE; - else if (strcmp(optname, "--wvoronoi")== 0) w_strategy = WGT_VORONOI; - else if (strcmp(optname, "--informat") == 0) { - format = String2SeqfileFormat(optarg); - if (format == MSAFILE_UNKNOWN) - Die("unrecognized sequence file format \"%s\"", optarg); - if (! IsAlignmentFormat(format)) - Die("%s is an unaligned format, can't read as an alignment", optarg); - } - else if (strcmp(optname, "-h") == 0) { - Banner(stdout, banner); - puts(usage); - puts(experts); - exit(0); - } - } - if (argc - optind != 1) - Die("Incorrect number of arguments.\n%s\n", usage); - - seqfile = argv[optind++]; - - if (readfile != NULL && r_strategy == REALIGN_NONE) - r_strategy = REALIGN_VITERBI; - - if (gapmax < 0. || gapmax > 1.) - Die("--gapmax must be a value from 0 to 1\n%s\n", usage); - if (archpri < 0. || archpri > 1.) - Die("--archpri must be a value from 0 to 1\n%s\n", usage); - if (overwrite_protect && hmmfile && !do_append && FileExists(hmmfile)) - Die("HMM file %s already exists. Rename or delete it.", hmmfile); - if (overwrite_protect && align_ofile != NULL && FileExists(align_ofile)) - Die("Alignment resave file %s exists. Rename or delete it.", align_ofile); - - /*********************************************** - * Get sequence data - ***********************************************/ - - /* Open the alignment */ - if ((afp = MSAFileOpen(seqfile, format, NULL)) == NULL) - Die("Alignment file %s could not be opened for reading", seqfile); - - /* read the alignment from file */ - if ((msa = MSAFileRead(afp)) == NULL) - Die("Failed to read aligned sequence file %s", seqfile); - for (idx = 0; idx < msa->nseq; idx++) - s2upper(msa->aseq[idx]); - MSAFileClose(afp); - /* Set up the alphabet globals */ - if (Alphabet_type == hmmNOTSETYET) - DetermineAlphabet(msa->aseq, msa->nseq); - - /* Set up Dirichlet priors */ - if (prifile == NULL) pri = P7DefaultPrior(); - else pri = P7ReadPrior(prifile); - - if (pamfile != NULL) PAMPrior(pamfile, pri, pamwgt); - - /* Set up the null/random seq model */ - if (rndfile == NULL) P7DefaultNullModel(randomseq, &p1); - else P7ReadNullModel(rndfile, randomseq, &p1); - - /* Prepare sequences for internal use */ - DigitizeAlignment(msa, &dsq); - - /* In some respects we treat DNA more crudely... */ - if (Alphabet_type == hmmNUCLEIC) - { - do_eff = FALSE; /* don't do effective seq #; it's calibrated for protein */ - } - - /*********************************************** - * Either read in an HMM or build from alignment, - * depending on user specifications. - ***********************************************/ - - if (readfile != NULL) { - - /*********************************************** - * Open HMM file (might be in HMMERDB or current directory). - * Read a single HMM from it. - ***********************************************/ - - if ((hmmfp = HMMFileOpen(readfile, "HMMERDB")) == NULL) - Die("Failed to open HMM file %s\n%s", readfile, usage); - if (!HMMFileRead(hmmfp, &hmm)) - Die("Failed to read any HMMs from %s\n", readfile); - HMMFileClose(hmmfp); - if (hmm == NULL) - Die("HMM file %s corrupt or in incorrect format? Parse failed", readfile); - - tr = (struct p7trace_s **) MallocOrDie (sizeof(struct p7trace_s *) * msa->nseq); - for (idx = 0; idx < msa->nseq; idx++) - tr[idx] = 0; - - } else { - - /*********************************************** - * Build an HMM - ***********************************************/ - - /* Determine the effective sequence number to use (optional) - */ - eff_nseq = (float) msa->nseq; - if (do_eff) - { - float *wgt; - printf("%-40s ... ", "Determining effective sequence number"); - fflush(stdout); - /* dummy weights array to feed BlosumWeights*/ - wgt = MallocOrDie(sizeof(float) * msa->nseq); - BlosumWeights(msa->aseq, msa->nseq, msa->alen, blosumlevel, wgt); - eff_nseq = FSum(wgt, msa->nseq); - - free(wgt); - printf("done. [%.0f]\n", eff_nseq); - } - - - /* Weight the sequences (optional), - */ - /* Weight the sequences (optional), - */ - if (w_strategy == WGT_GSC || - w_strategy == WGT_BLOSUM || - w_strategy == WGT_VORONOI) - { - printf("%-40s ... ", "Weighting sequences heuristically"); - fflush(stdout); - - if (w_strategy == WGT_GSC) - GSCWeights(msa->aseq, msa->nseq, msa->alen, msa->wgt); - else if (w_strategy == WGT_BLOSUM) - BlosumWeights(msa->aseq, msa->nseq, msa->alen, blosumlevel, msa->wgt); - else if (w_strategy == WGT_VORONOI) - VoronoiWeights(msa->aseq, msa->nseq, msa->alen, msa->wgt); - - printf("done.\n"); - } - - /* Set the effective sequence number (if do_eff is FALSE, eff_nseq - * was set to nseq). - */ - FNorm(msa->wgt, msa->nseq); - FScale(msa->wgt, msa->nseq, eff_nseq); - - - /* Build a model architecture. - * If we're not doing MD or ME, that's all we need to do. - * We get an allocated, counts-based HMM back. - */ - printf("%-40s ... ", "Constructing model architecture"); - fflush(stdout); - checksum = GCGMultchecksum(msa->aseq, msa->nseq); - if (c_strategy == P7_FAST_CONSTRUCTION) - P7Fastmodelmaker(msa, dsq, gapmax, &hmm, &tr); - else if (c_strategy == P7_HAND_CONSTRUCTION) - P7Handmodelmaker(msa, dsq, &hmm, &tr); - else - P7Maxmodelmaker(msa, dsq, gapmax, - pri, randomseq, p1, archpri, &hmm, &tr); - hmm->checksum = checksum; - printf("done.\n"); - - /* Save the count vectors if asked. Used primarily for - * making the data files for training priors. - */ - if (cfile != NULL) - { - save_countvectors(cfile, hmm); - } - - /* Record the null model in the HMM; - * add prior contributions in pseudocounts and renormalize. - */ - Plan7SetNullModel(hmm, randomseq, p1); - P7PriorifyHMM(hmm, pri); - - - /* Model configuration, temporary. - * hmmbuild assumes that it's given an alignment of single domains, - * and the alignment may contain fragments. So, for the purpose of - * scoring the sequences (or, optionally, MD/ME weighting), - * configure the model into hmmsw mode. Later we'll - * configure the model according to how the user wants to - * use it. - */ - Plan7SWConfig(hmm, 0.5, 0.5); - - /* Do model-dependent "weighting" strategies. - */ - /* - if (w_strategy == WGT_ME) - { - maximum_entropy(hmm, dsq, &ainfo, ainfo.nseq, eff_nseq, pri, tr); - } - */ - - /* Give the model a name; by default, the name of the alignment file - * without any filename extension (i.e. "globins.slx" becomes "globins" - */ - if (name == NULL) name = FileTail(seqfile, TRUE); - Plan7SetName(hmm, name); - Plan7ComlogAppend(hmm, argc, argv); - Plan7SetCtime(hmm); - hmm->nseq = msa->nseq; - free(name); - - /* Configure the model for chosen algorithm - */ - switch (cfg_strategy) { - case P7_BASE_CONFIG: Plan7GlobalConfig(hmm); break; - case P7_SW_CONFIG: Plan7SWConfig(hmm, swentry, swexit); break; - case P7_LS_CONFIG: Plan7LSConfig(hmm); break; - case P7_FS_CONFIG: Plan7FSConfig(hmm, swentry, swexit); break; - default: Die("bogus configuration choice"); - } - - } - - /* Optionally save new HMM to disk: open a file for appending or writing. - */ - P7Logoddsify(hmm, TRUE); - if (hmmfile) - save_model(hmm, hmmfile, do_append, do_binary); - - /* Display posterior probabilities for each sequence, - re-aligning them to the model if user requested that - */ - - for (idx = 0; idx < msa->nseq; idx++) { - printf ("#\n# Sequence %d: %s\n#\n", idx + 1, msa->sqname[idx]); - - len = DealignedLength(msa->aseq[idx]); - if (P7ViterbiSize(len, hmm->M) * 2 > RAMLIMIT) - Die("insufficient memory"); - - (void) P7Forward (dsq[idx], len, hmm, &forward_mx); - (void) P7Backward (dsq[idx],len, hmm, &backward_mx); - - if (r_strategy == REALIGN_VITERBI) { - - if (tr[idx]) P7FreeTrace (tr[idx]); - - if (P7ViterbiSize(len, hmm->M) * 3 <= RAMLIMIT) - (void) P7Viterbi(dsq[idx], len, hmm, &(tr[idx])); - else - (void) P7SmallViterbi(dsq[idx], len, hmm, &(tr[idx])); - - } else if (r_strategy == REALIGN_OPTACC) { - - if (tr[idx]) P7FreeTrace (tr[idx]); - - if (P7ViterbiSize(len, hmm->M) * 4 > RAMLIMIT) - Die("insufficient memory"); - - posterior_mx = AllocPlan7Matrix (len + 1, hmm->M, 0, 0, 0, 0); - P7EmitterPosterior (len, hmm, forward_mx, backward_mx, - posterior_mx); - - optacc_mx = AllocPlan7Matrix (len + 1, hmm->M, 0, 0, 0, 0); - (void) P7FillOptimalAccuracy (len, hmm->M, posterior_mx, - optacc_mx, &(tr[idx])); - - FreePlan7Matrix (posterior_mx); - FreePlan7Matrix (optacc_mx); - - } - - posterior_mx = AllocPlan7Matrix (len + 1, hmm->M, 0, 0, 0, 0); - P7EmitterPosterior (len, hmm, forward_mx, backward_mx, - posterior_mx); - - Postcode(len, posterior_mx, tr[idx]); - /* DisplayPlan7Matrix(dsq[idx], len, hmm, posterior_mx); */ - - - /* DisplayPlan7PostAlign (len, hmm, - forward_mx, backward_mx, - &(tr[idx]), 1); - */ - - FreePlan7Matrix (backward_mx); - FreePlan7Matrix (forward_mx); - - } - - /* the annotated alignment may be resaved */ - if (align_ofile != NULL) { - MSA *new_msa; - SQINFO *sqinfo; - - sqinfo = MSAToSqinfo(msa); - new_msa = P7Traces2Alignment(dsq, sqinfo, msa->wgt, msa->nseq, - hmm->M, tr, FALSE); - if ((fp = fopen(align_ofile, "w")) == NULL) { - Warn("Failed to open alignment resave file %s; using stdout instead", - align_ofile); - fp = stdout; - } - WriteStockholm(fp, new_msa); - MSAFree(new_msa); - for (idx = 0; idx < msa->nseq; idx++) - FreeSequence(NULL, &(sqinfo[idx])); - free(sqinfo); - if (fp != stdout) fclose(fp); - } - - /* Verbose output; show scores for each sequence - */ - /* - if (verbose) - print_all_scores(stdout, hmm, dsq, msq, tr); - */ - - /* Clean up and exit - */ - for (idx = 0; idx < msa->nseq; idx++) P7FreeTrace(tr[idx]); - free(tr); - FreePlan7(hmm); - P7FreePrior(pri); - Free2DArray((void **) dsq, msa->nseq); - MSAFree(msa); - SqdClean(); - - return 0; -} - -/* Function: save_model() - * - * Purpose: Save the new model to a file. - * - * Args: hmm - model to save - * hmmfile - file to save to (if NULL, use stdout) - * do_append - TRUE to append to file - * do_binary - TRUE to write a binary file - * - * Return: (void) - */ -static void -save_model(struct plan7_s *hmm, char *hmmfile, int do_append, int do_binary) -{ - FILE *fp; - - if (hmmfile == NULL) - fp = stdout; - else if (do_append) - { - /* check that it looks like an HMM file */ -#ifdef REMOVED /* This code induces an unresolved Linux/SGI NFS bug! */ - if (FileExists(hmmfile)) - { - HMMFILE *hmmfp; - hmmfp = HMMFileOpen(hmmfile, NULL); - if (hmmfp == NULL) { - Warn("%s not an HMM file; can't append to it; using stdout instead", - hmmfile); - fp = stdout; - puts(""); /* do a newline before stdout HMM starts */ - } else { - HMMFileClose(hmmfp); - } - } -#endif - - if ((fp = fopen(hmmfile, "a")) == NULL) { - Warn("hey, where'd your HMM file go? Using stdout instead."); - fp = stdout; - puts(""); /* do a newline before stdout HMM starts */ - } - } - else - { - if ((fp = fopen(hmmfile, "w")) == NULL) { - Warn("Failed to open HMM save file %s; using stdout instead", hmmfile); - fp = stdout; - puts(""); /* do a newline before stdout HMM starts */ - } - } - - if (do_binary) WriteBinHMM(fp, hmm); - else WriteAscHMM(fp, hmm); - - if (fp != stdout) fclose(fp); - return; -} - - - - - -/* Function: print_all_scores() - * - * Purpose: For each training sequence, print its score under - * the final model. - * - * Args: fp - where to print the output (usu. stdout) - * hmm - newly constructed HMM, with prob's. - * ainfo- info with aseq - * dsq - digitized unaligned training sequences. - * nseq - number of training sequences - * tr - array of tracebacks - * - * Return: (void) - */ -static void -print_all_scores(FILE *fp, struct plan7_s *hmm, - AINFO *ainfo, char **dsq, int nseq, struct p7trace_s **tr) -{ - int idx; /* counter for sequences */ - - /* make sure model scores are ready */ - P7Logoddsify(hmm, TRUE); - /* header */ - fputs("**\n", fp); - fputs("Individual training sequence scores:\n", fp); - /* score for each sequence */ - for (idx = 0; idx < nseq; idx++) - { - fprintf(fp, "%7.2f %-12s %s\n", - P7TraceScore(hmm, dsq[idx], tr[idx]), - ainfo->sqinfo[idx].name, - (ainfo->sqinfo[idx].flags & SQINFO_DESC) ? - ainfo->sqinfo[idx].desc : ""); - P7PrintTrace(fp, tr[idx], hmm, dsq[idx]); - } - fputs("\n", fp); -} - - - -/* Function: save_countvectors() - * - * Purpose: Save emission/transition count vectors to a file. - * Used for gathering the data on which to train a - * prior (e.g. mixture Dirichlet, etc.) - * - * The format of the file is one vector per line: - * M ...: 20 match emission counts in order AC..WY. - * I ...: 20 insert emission counts in order AC..WY. - * T ...: 7 transition counts in order TMM, TMI, TMD, - * TIM, TII, TDM, TDD. (see structs.h) - * - * Args: cfile - counts file to make - * hmm - counts-based HMM - */ -static void -save_countvectors(char *cfile, struct plan7_s *hmm) -{ - FILE *fp; - int k, x; - - if ((fp = fopen(cfile, "w")) == NULL) - Die("failed to open count vector file %s for writing", cfile); - - /* match emission vectors */ - for (k = 1; k <= hmm->M; k++) - { - fputs("M ", fp); - for (x = 0; x < Alphabet_size; x++) - fprintf(fp, "%.2f ", hmm->mat[k][x]); - fputs("\n", fp); - } - /* insert emission vectors */ - for (k = 1; k < hmm->M; k++) - { - fputs("I ", fp); - for (x = 0; x < Alphabet_size; x++) - fprintf(fp, "%.2f ", hmm->ins[k][x]); - fputs("\n", fp); - } - /* transition vectors */ - for (k = 1; k < hmm->M; k++) - { - fputs("T ", fp); - for (x = 0; x < 7; x++) - fprintf(fp, "%.2f ", hmm->t[k][x]); - fputs("\n", fp); - } - - fclose(fp); -} - - -/* Function: position_average_score() - * Date: Wed Dec 31 09:36:35 1997 [StL] - * - * Purpose: Calculate scores from tracebacks, keeping them - * in a position specific array. The final array - * is normalized position-specifically too, according - * to how many sequences contributed data to this - * position. Used for compensating for sequence - * fragments in ME and MD score optimization. - * Very much ad hoc. - * - * Code related to (derived from) TraceScore(). - * - * Args: hmm - HMM structure, scores valid - * dsq - digitized unaligned sequences - * wgt - weights on the sequences - * nseq - number of sequences - * tr - array of nseq tracebacks that aligns each dsq to hmm - * pernode - RETURN: [0]1..M array of position-specific avg scores - * ret_avg - RETURN: overall average full-length, one-domain score - * - * Return: 1 on success, 0 on failure. - * pernode is malloc'ed [0]1..M by CALLER and filled here. - */ -static void -position_average_score(struct plan7_s *hmm, - char **dsq, - float *wgt, - int nseq, - struct p7trace_s **tr, - float *pernode, - float *ret_avg) -{ - int pos; /* position in seq */ - int sym; - int tpos; /* position in trace/state sequence */ - float *counts; /* counts at each position */ - float avg; /* RETURN: average overall */ - int k; /* counter for model position */ - int idx; /* counter for sequence number */ - - /* Allocations - */ - counts = MallocOrDie ((hmm->M+1) * sizeof(float)); - FSet(pernode, hmm->M+1, 0.); - FSet(counts, hmm->M+1, 0.); - - /* Loop over traces, accumulate weighted scores per position - */ - for (idx = 0; idx < nseq; idx++) - for (tpos = 0; tpos < tr[idx]->tlen; tpos++) - { - pos = tr[idx]->pos[tpos]; - sym = (int) dsq[idx][tr[idx]->pos[tpos]]; - k = tr[idx]->nodeidx[tpos]; - - /* Counts: how many times did we use this model position 1..M? - * (weighted) - */ - if (tr[idx]->statetype[tpos] == STM || tr[idx]->statetype[tpos] == STD) - counts[k] += wgt[idx]; - - /* Emission scores. - */ - if (tr[idx]->statetype[tpos] == STM) - pernode[k] += wgt[idx] * Scorify(hmm->msc[sym][k]); - else if (tr[idx]->statetype[tpos] == STI) - pernode[k] += wgt[idx] * Scorify(hmm->isc[sym][k]); - - /* Transition scores. - */ - if (tr[idx]->statetype[tpos] == STM || - tr[idx]->statetype[tpos] == STD || - tr[idx]->statetype[tpos] == STI) - pernode[k] += wgt[idx] * - Scorify(TransitionScoreLookup(hmm, tr[idx]->statetype[tpos], tr[idx]->nodeidx[tpos], - tr[idx]->statetype[tpos+1],tr[idx]->nodeidx[tpos+1])); - } - - /* Divide accumulated scores by accumulated weighted counts - */ - avg = 0.; - for (k = 1; k <= hmm->M; k++) - { - pernode[k] /= counts[k]; - avg += pernode[k]; - } - - free(counts); - *ret_avg = avg; - return; -} - - -/* Function: frag_trace_score() - * Date: SRE, Wed Dec 31 10:03:47 1997 [StL] - * - * Purpose: Allow MD/ME optimization to be used for alignments - * that include fragments and multihits -- estimate a full-length - * per-domain score. - * - * - * - * Return: "corrected" score. - */ -static float -frag_trace_score(struct plan7_s *hmm, char *dsq, struct p7trace_s *tr, - float *pernode, float expected) -{ - float sc; /* corrected score */ - float fragexp; /* expected score for a trace like this */ - int tpos; /* position in trace */ - - /* get uncorrected score */ - sc = P7TraceScore(hmm, dsq, tr); - - /* calc expected score for trace like this */ - fragexp = 0.; - for (tpos = 0; tpos < tr->tlen; tpos++) - if (tr->statetype[tpos] == STM || tr->statetype[tpos] == STD) - fragexp += pernode[tr->nodeidx[tpos]]; - - /* correct for multihits */ - fragexp /= (float) TraceDomainNumber(tr); - - /* extrapolate to full-length, one-hit score */ - sc = sc * expected / fragexp; - return sc; -} - - -/* Function: maximum_entropy() - * Date: SRE, Fri Jan 2 10:56:00 1998 [StL] - * - * Purpose: Optimizes a model according to maximum entropy weighting. - * See Krogh and Mitchison (1995). - * - * [Actually, we do minimum relative entropy, rather than - * maximum entropy. Same thing, though we refer to "ME" - * weights and models. The optimization is a steepest - * descents minimization of the relative entropy.] - * - * Expects to be called shortly after a Maxmodelmaker() - * or Handmodelmaker(), so that both a new model architecture - * (with MAP parameters) and fake tracebacks are available. - * - * Prints a summary of optimization progress to stdout. - * - * Args: hmm - model. allocated, set with initial MAP parameters. - * dsq - dealigned digitized seqs the model is based on - * ainfo - extra info for aseqs - * nseq - number of aseqs - * eff_nseq- effective sequence number; weights normalize up to this. - * prior - prior distributions for parameterizing model - * tr - array of fake traces for each sequence - * - * Return: (void) - * hmm changed to an ME HMM - * ainfo changed, contains ME weights - */ -static void -maximum_entropy(struct plan7_s *hmm, char **dsq, AINFO *ainfo, int nseq, - float eff_nseq, struct p7prior_s *prior, struct p7trace_s **tr) -{ - float *wgt; /* current best set of ME weights */ - float *new_wgt; /* new set of ME weights to try */ - float *sc; /* log-odds score of each sequence */ - float *grad; /* gradient */ - float epsilon; /* steepness of descent */ - float relative_entropy; /* current best relative entropy */ - float new_entropy; /* relative entropy at new weights */ - float last_new_entropy; /* last new_entropy we calc'ed */ - float use_epsilon; /* current epsilon value in use */ - int idx; /* counter over sequences */ - int i1, i2; /* counters for iterations */ - - float converge_criterion; - float minw, maxw; /* min, max weight */ - int posw, highw; /* number of positive weights */ - float mins, maxs, avgs; /* min, max, avg score */ - float *pernode; /* expected score per node of HMM */ - float expscore; /* expected score of complete HMM */ - int max_iter; /* bulletproof against infinite loop bugs */ - - epsilon = 0.2; /* works fine */ - max_iter = 666; - - /* Allocations - */ - sc = MallocOrDie (sizeof(float) * nseq); - wgt = MallocOrDie (sizeof(float) * nseq); - new_wgt = MallocOrDie (sizeof(float) * nseq); - grad = MallocOrDie (sizeof(float) * nseq); - pernode = MallocOrDie (sizeof(float) * (hmm->M+1)); - - /* Initialization. Start with all weights == 1.0. - * Find relative entropy and gradient. - */ - Plan7SWConfig(hmm, 0.5, 0.5); - P7Logoddsify(hmm, TRUE); - - FSet(wgt, nseq, 1.0); - position_average_score(hmm, dsq, wgt, nseq, tr, pernode, &expscore); - for (idx = 0; idx < nseq; idx++) - sc[idx] = frag_trace_score(hmm, dsq[idx], tr[idx], pernode, expscore); - relative_entropy = FSum(sc, nseq) / (float) nseq; - for (idx = 0; idx < nseq; idx++) - grad[idx] = relative_entropy - sc[idx]; - - - /* - * printf statements commented out: - * - * printf("iter avg-sc min-sc max-sc min-wgt max-wgt +wgt ++wgt rel.ent convergence\n"); - * printf("---- ------ ------ ------ ------- ------- ---- ----- ------- -----------\n"); - * - */ - mins = maxs = avgs = sc[0]; - for (idx = 1; idx < nseq; idx++) - { - if (sc[idx] < mins) mins = sc[idx]; - if (sc[idx] > maxs) maxs = sc[idx]; - avgs += sc[idx]; - } - avgs /= nseq; - - /* - * printf statement commented out: - * - * printf("%4d %6.1f %6.1f %6.1f %7.2f %7.2f %4d %5d %7.2f %8s\n", - * 0, avgs, mins, maxs, 1.0, 1.0, nseq, 0, relative_entropy, "-"); - * - */ - - - /* Steepest descents optimization; - * iterate until relative entropy converges. - */ - i1 = 0; - while (++i1 < max_iter) - { - /* Gradient gives us a line of steepest descents. - * (Roughly speaking, anyway. We actually have a constraint - * that weights are nonnegative and normalized, and the - * gradient doesn't take these into account.) - * Look along this line, a distance of epsilon * gradient: - * if new point is better, accept; if new point is worse, - * move back along the line by half the distance and re-evaluate. - */ - use_epsilon = epsilon; - new_entropy = relative_entropy + 1.0; /* just ensure new > old */ - - i2 = 0; - while (new_entropy > relative_entropy && ++i2 < max_iter) - { - last_new_entropy = new_entropy; - - /* find a new point in weight space */ - for (idx = 0; idx < nseq; idx++) - { - new_wgt[idx] = wgt[idx] + use_epsilon * grad[idx]; - if (new_wgt[idx] < 0.) new_wgt[idx] = 0.0; - } - FNorm(new_wgt, nseq); - FScale(new_wgt, nseq, (float) nseq); - - /* Make new HMM using these weights */ - ZeroPlan7(hmm); - for (idx = 0; idx < nseq; idx++) - P7TraceCount(hmm, dsq[idx], new_wgt[idx], tr[idx]); - P7PriorifyHMM(hmm, prior); - - - /* Evaluate new point */ - Plan7SWConfig(hmm, 0.5, 0.5); - P7Logoddsify(hmm, TRUE); - position_average_score(hmm, dsq, new_wgt, nseq, tr, pernode, &expscore); - for (idx = 0; idx < nseq; idx++) - sc[idx] = frag_trace_score(hmm, dsq[idx], tr[idx], pernode, expscore); - new_entropy = FDot(sc, new_wgt, nseq) / nseq; - - use_epsilon /= 2.0; - /* Failsafe: we're not converging. Set epsilon to zero, - * do one more round. - */ - if (use_epsilon < 1e-6) use_epsilon = 0.0; - if (use_epsilon == 0.0) break; - - /* Failsafe: avoid infinite loops. Sometimes the - new entropy converges without ever being better - than the previous point, probably as a result - of minor roundoff error. */ - if (last_new_entropy == new_entropy) break; - } - /* - * printf statement commented out: - * - * if (i2 == max_iter) printf(" -- exceeded maximum iterations; giving up --\n"); - * - */ - - /* Evaluate convergence before accepting the new weights; - * then, accept the new point and evaluate the gradient there. - */ - converge_criterion = fabs((relative_entropy-new_entropy)/relative_entropy); - relative_entropy = new_entropy; - FCopy(wgt, new_wgt, nseq); - for (idx = 0; idx < nseq; idx++) - grad[idx] = relative_entropy - sc[idx]; - - /* Print some statistics about this iteration - */ - mins = maxs = avgs = sc[0]; - minw = maxw = wgt[0]; - posw = (wgt[0] > 0.0) ? 1 : 0; - highw = (wgt[0] > 1.0) ? 1 : 0; - for (idx = 1; idx < nseq; idx++) - { - if (sc[idx] < mins) mins = sc[idx]; - if (sc[idx] > maxs) maxs = sc[idx]; - if (wgt[idx] < minw) minw = wgt[idx]; - if (wgt[idx] > maxw) maxw = wgt[idx]; - if (wgt[idx] > 0.0) posw++; - if (wgt[idx] > 1.0) highw++; - avgs += sc[idx]; - } - avgs /= nseq; - - - /* - * printf statement commented out: - * - * printf("%4d %6.1f %6.1f %6.1f %7.2f %7.2f %4d %5d %7.2f %8.5f\n", - * i1, - * avgs, mins, maxs, - * minw, maxw, posw, highw, - * relative_entropy, converge_criterion); - * - */ - - if (converge_criterion < 1e-5) break; - } - /* - * printf statement commented out: - * - * if (i1 == max_iter) printf(" -- exceeded maximum iterations; giving up --\n"); - * - */ - - /* Renormalize weights to sum to eff_nseq, and save. - */ - FNorm(wgt, nseq); - FScale(wgt, nseq, (float) eff_nseq); - FCopy(ainfo->wgt, wgt, nseq); - /* Make final HMM using these adjusted weights */ - ZeroPlan7(hmm); - for (idx = 0; idx < nseq; idx++) - P7TraceCount(hmm, dsq[idx], wgt[idx], tr[idx]); - P7PriorifyHMM(hmm, prior); - - /* Cleanup and return - */ - free(pernode); - free(new_wgt); - free(grad); - free(wgt); - free(sc); - return; -} diff --git a/forester/archive/RIO/others/hmmer/src/hmmsearch-pvm.c b/forester/archive/RIO/others/hmmer/src/hmmsearch-pvm.c deleted file mode 100644 index 7acde9b..0000000 --- a/forester/archive/RIO/others/hmmer/src/hmmsearch-pvm.c +++ /dev/null @@ -1,180 +0,0 @@ -/************************************************************ - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - ************************************************************/ - -#ifdef HMMER_PVM - -/* hmmsearch-pvm.c - * SRE, Wed Sep 23 09:30:53 1998 - * - * PVM slave for hmmsearch. - * RCS $Id: hmmsearch-pvm.c,v 1.1.1.1 2005/03/22 08:34:12 cmzmasek Exp $ - */ - -#include -#include -#include -#include - -#include "version.h" -#include "structs.h" /* data structures, macros, #define's */ -#include "config.h" /* compile-time configuration constants */ -#include "funcs.h" /* function declarations */ -#include "globals.h" /* alphabet global variables */ -#include "squid.h" /* general sequence analysis library */ - -static void leave_pvm(void); - -int -main(void) -{ - struct plan7_s *hmm; /* HMM to search with */ - struct p7trace_s *tr; /* trace structure for a Viterbi alignment */ - int master_tid; /* PVM TID of our master */ - int alphatype; /* alphabet type */ - int code; /* status code for whether we're ok */ - int my_idx; /* my slave index: 0..nslaves-1, master assigns */ - int L; /* length of sequence */ - char *dsq; /* digitized sequence 1..L */ - float sc; /* log odds score for seq + HMM */ - double pvalue; /* P-value of sc */ - double evalue; /* bounded E-value of sc (we don't know nseq yet) */ - int do_forward; /* TRUE to score using Forward() */ - int do_null2; /* TRUE to use null2 ad hoc correction */ - float globT; /* T parameter: keep only hits > globT bits */ - double globE; /* E parameter: keep hits < globE E-value */ - int Z; /* nseq to base E value calculation on */ - int nseq; /* actual nseq so far (master keeps updating this) */ - int send_trace; /* TRUE if sc looks significant and we return tr */ - - /* Register leave_pvm() cleanup function so any exit() call - * first calls pvm_exit(). - */ - if (atexit(leave_pvm) != 0) { pvm_exit(); Die("slave couldn't register leave_pvm()"); } - - /***************************************************************** - * Initialization. - * Master broadcasts the problem to us: - * globT, globE, Z, do_forward, do_null2, alphabet type, HMM, - ******************************************************************/ - - master_tid = pvm_parent(); /* who's our master? */ - my_idx = -1; - - /* wait for a HMMPVM_INIT message, and unpack it; - * get options, set alphabet type, get HMM. - */ - pvm_recv(master_tid, HMMPVM_INIT); - pvm_upkfloat(&globT, 1, 1); - pvm_upkdouble(&globE, 1, 1); - pvm_upkint(&Z, 1, 1); - pvm_upkint(&do_forward, 1, 1); - pvm_upkint(&do_null2, 1, 1); - pvm_upkint(&alphatype, 1, 1); - SetAlphabet(alphatype); - hmm = PVMUnpackHMM(); - - P7Logoddsify(hmm, TRUE); - - /* tell the master we're OK and ready to go (or not) - */ - code = HMMPVM_OK; - if (hmm == NULL) code = HMMPVM_BAD_INIT; - pvm_initsend(PvmDataDefault); - pvm_pkint(&code, 1, 1); - PVMPackString(RELEASE); - pvm_send(master_tid, HMMPVM_RESULTS); - - /***************************************************************** - * Main loop. - * Receive a digitized sequence to search against. - *****************************************************************/ - - for (;;) - { - SQD_DPRINTF1(("Slave about to do a blocking receive, waiting for input.\n")); - pvm_recv(master_tid, HMMPVM_WORK); - pvm_upkint(&nseq, 1, 1); - if (nseq == -1) break; /* shutdown signal */ - if (my_idx == -1) my_idx = nseq; - pvm_upkint(&L, 1, 1); - SQD_DPRINTF1(("Slave received nseq=%d L=%d my_idx=%d\n", nseq, L, my_idx)); - dsq = MallocOrDie(sizeof(char) * (L + 2)); - pvm_upkbyte(dsq, L+2, 1); - SQD_DPRINTF1(("Slave unpacked a seq of %d bytes; beginning processing\n", L+2)); - - /* Score sequence, do alignment (Viterbi), recover trace - */ - if (P7ViterbiSize(L, hmm->M) <= RAMLIMIT) - { - SQD_DPRINTF1(("Slave doing Viterbi after estimating %d MB\n", (P7ViterbiSize(L, hmm->M)))); - sc = P7Viterbi(dsq, L, hmm, &tr); - } - else - { - SQD_DPRINTF1(("Slave going small after estimating %d MB\n", (P7ViterbiSize(L, hmm->M)))); - sc = P7SmallViterbi(dsq, L, hmm, &tr); - } - - if (do_forward) sc = P7Forward(dsq, L, hmm, NULL); - if (do_null2) sc -= TraceScoreCorrection(hmm, tr, dsq); - - pvalue = PValue(hmm, sc); - evalue = Z ? (double) Z * pvalue : (double) nseq * pvalue; - send_trace = (sc >= globT && evalue <= globE) ? 1 : 0; - - /* return output - */ - SQD_DPRINTF1(("Slave has a result (sc = %.1f); sending back to master\n", sc)); - pvm_initsend(PvmDataDefault); - pvm_pkint (&my_idx, 1, 1); - pvm_pkfloat (&sc, 1, 1); - pvm_pkdouble(&pvalue, 1, 1); - pvm_pkint(&send_trace, 1, 1); /* flag for whether a trace structure is coming */ - if (send_trace) PVMPackTrace(tr); - pvm_send(master_tid, HMMPVM_RESULTS); - - /* cleanup - */ - free(dsq); - P7FreeTrace(tr); - } - - /*********************************************** - * Cleanup, return. - ***********************************************/ - - SQD_DPRINTF1(("Slave is done; performing a normal exit.\n")); - FreePlan7(hmm); - exit(0); /* pvm_exit() gets called by atexit() registration. */ -} - -/* Function: leave_pvm() - * - * Purpose: Cleanup function, to deal with crashes. We register - * this function using atexit() so it gets called before - * the slave dies. - */ -void leave_pvm(void) -{ - SQD_DPRINTF1(("slave leaving PVM.\n")); - pvm_exit(); -} - - -#else /* if HMMER_PVM not defined: include a dummy */ - -#include -int main(void) -{ - printf("hmmsearch-pvm is disabled. PVM support was not compiled into HMMER.\n"); - exit(0); -} - -#endif diff --git a/forester/archive/RIO/others/hmmer/src/hmmsearch.c b/forester/archive/RIO/others/hmmer/src/hmmsearch.c deleted file mode 100644 index 1c5ba7a..0000000 --- a/forester/archive/RIO/others/hmmer/src/hmmsearch.c +++ /dev/null @@ -1,1101 +0,0 @@ -/************************************************************ - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - ************************************************************/ - -/* hmmsearch.c - * SRE, Tue Jan 7 17:19:20 1997 [St. Louis] - * - * Search a sequence database with a profile HMM. - * Conditionally includes PVM parallelization when HMMER_PVM is defined - * at compile time; hmmsearch --pvm runs the PVM version. - * - * CVS $Id: hmmsearch.c,v 1.1.1.1 2005/03/22 08:34:05 cmzmasek Exp $ - */ - -#include -#include -#include -#include -#include -#ifdef HMMER_THREADS -#include -#endif -#ifdef HMMER_PVM -#include -#endif - -#include "squid.h" /* general sequence analysis library */ -#include "config.h" /* compile-time configuration constants */ -#include "structs.h" /* data structures, macros, #define's */ -#include "funcs.h" /* function declarations */ -#include "globals.h" /* alphabet global variables */ -#include "version.h" /* version info */ - -static char banner[] = "hmmsearch - search a sequence database with a profile HMM"; - -static char usage[] = "\ -Usage: hmmsearch [-options] \n\ - Available options are:\n\ - -h : help; print brief help on version and usage\n\ - -A : sets alignment output limit to best domain alignments\n\ - -E : sets E value cutoff (globE) to <= x\n\ - -T : sets T bit threshold (globT) to >= x\n\ - -Z : sets Z (# seqs) for E-value calculation\n\ -"; - -static char experts[] = "\ - --compat : make best effort to use last version's output style\n\ - --cpu : run threads in parallel (if threaded)\n\ - --cut_ga : use Pfam GA gathering threshold cutoffs\n\ - --cut_nc : use Pfam NC noise threshold cutoffs\n\ - --cut_tc : use Pfam TC trusted threshold cutoffs\n\ - --domE : sets domain Eval cutoff (2nd threshold) to <= x\n\ - --domT : sets domain T bit thresh (2nd threshold) to >= x\n\ - --forward : use the full Forward() algorithm instead of Viterbi\n\ - --informat : sequence file is in format , not FASTA\n\ - --null2 : turn OFF the post hoc second null model\n\ - --pvm : run on a Parallel Virtual Machine (PVM)\n\ - --xnu : turn ON XNU filtering of target protein sequences\n\ -"; - -static struct opt_s OPTIONS[] = { - { "-h", TRUE, sqdARG_NONE }, - { "-A", TRUE, sqdARG_INT }, - { "-E", TRUE, sqdARG_FLOAT}, - { "-T", TRUE, sqdARG_FLOAT}, - { "-Z", TRUE, sqdARG_INT }, - { "--compat", FALSE, sqdARG_NONE }, - { "--cpu", FALSE, sqdARG_INT }, - { "--cut_ga", FALSE, sqdARG_NONE }, - { "--cut_nc", FALSE, sqdARG_NONE }, - { "--cut_tc", FALSE, sqdARG_NONE }, - { "--domE", FALSE, sqdARG_FLOAT}, - { "--domT", FALSE, sqdARG_FLOAT}, - { "--forward", FALSE, sqdARG_NONE }, - { "--informat",FALSE, sqdARG_STRING}, - { "--null2", FALSE, sqdARG_NONE }, - { "--pvm", FALSE, sqdARG_NONE }, - { "--xnu", FALSE, sqdARG_NONE }, - -}; -#define NOPTIONS (sizeof(OPTIONS) / sizeof(struct opt_s)) - - -#ifdef HMMER_THREADS -/* POSIX threads version: - * the threads share a workpool_s structure amongst themselves, - * for obtaining locks on input HMM file and output histogram and - * tophits structures. - */ -struct workpool_s { - /* Shared configuration resources which don't change: - */ - struct plan7_s *hmm; /* HMM to search with */ - int do_xnu; /* TRUE to apply XNU filter */ - int do_forward; /* TRUE to score using Forward */ - int do_null2; /* TRUE to apply null2 ad hoc correction */ - struct threshold_s *thresh; /* score/evalue threshold info */ - - /* Shared (mutex-protected) input resources: - */ - SQFILE *sqfp; /* ptr to open sequence file */ - int nseq; /* number of seqs searched so far */ - pthread_mutex_t input_lock; /* mutex for locking input */ - - /* Shared (mutex-protected) output resources: - */ - struct tophit_s *ghit; /* per-sequence top hits */ - struct tophit_s *dhit; /* per-domain top hits */ - struct histogram_s *hist; /* histogram of scores */ - pthread_mutex_t output_lock; /* mutex for locking output */ - - /* Thread pool information - */ - pthread_t *thread; /* our pool of threads */ - int num_threads; /* number of threads */ -}; -static struct workpool_s *workpool_start(struct plan7_s *hmm, SQFILE *sqfp, - int do_xnu, int do_forward, int do_null2, - struct threshold_s *thresh, - struct tophit_s *ghit, struct tophit_s *dhit, - struct histogram_s *hist, int num_threads); -static void workpool_stop(struct workpool_s *wpool); -static void workpool_free(struct workpool_s *wpool); -static void *worker_thread(void *ptr); -#endif /* HMMER_THREADS */ - -static void main_loop_serial(struct plan7_s *hmm, SQFILE *sqfp, struct threshold_s *thresh, int do_forward, - int do_null2, int do_xnu, int num_threads, - struct histogram_s *histogram, struct tophit_s *ghit, - struct tophit_s *dhit, int *ret_nseq); -#ifdef HMMER_PVM -static void main_loop_pvm(struct plan7_s *hmm, SQFILE *sqfp, struct threshold_s *thresh, int do_forward, - int do_null2, int do_xnu, struct histogram_s *histogram, - struct tophit_s *ghit, struct tophit_s *dhit, int *ret_nseq); -#endif - - -int -main(int argc, char **argv) -{ - char *hmmfile; /* file to read HMM(s) from */ - HMMFILE *hmmfp; /* opened hmmfile for reading */ - char *seqfile; /* file to read target sequence(s) from */ - SQFILE *sqfp; /* opened seqfile for reading */ - int format; /* format of seqfile */ - int i; - struct plan7_s *hmm; /* HMM to search with */ - struct histogram_s *histogram;/* histogram of all scores */ - struct fancyali_s *ali; /* displayed alignment info */ - struct tophit_s *ghit; /* list of top hits for whole sequences */ - struct tophit_s *dhit; /* list of top hits for domains */ - - float sc; /* score of an HMM search */ - double pvalue; /* pvalue of an HMM score */ - double evalue; /* evalue of an HMM score */ - double motherp; /* pvalue of a whole seq HMM score */ - float mothersc; /* score of a whole seq parent of domain */ - int sqfrom, sqto; /* coordinates in sequence */ - int hmmfrom, hmmto; /* coordinate in HMM */ - char *name, *acc, *desc; /* hit sequence name and description */ - int sqlen; /* length of seq that was hit */ - int nseq; /* number of sequences searched */ - int Z; /* # of seqs for purposes of E-val calc */ - int domidx; /* number of this domain */ - int ndom; /* total # of domains in this seq */ - int namewidth; /* max width of sequence name */ - int descwidth; /* max width of description */ - int nreported; /* # of hits reported in a list */ - - int Alimit; /* A parameter limiting output alignments */ - struct threshold_s thresh; /* contains all threshold (cutoff) info */ - - char *optname; /* name of option found by Getopt() */ - char *optarg; /* argument found by Getopt() */ - int optind; /* index in argv[] */ - int do_null2; /* TRUE to adjust scores with null model #2 */ - int do_forward; /* TRUE to use Forward() not Viterbi() */ - int do_xnu; /* TRUE to filter sequences thru XNU */ - int do_pvm; /* TRUE to run on Parallel Virtual Machine */ - int be_backwards; /* TRUE to be backwards-compatible in output*/ - int num_threads; /* number of worker threads */ - - /*********************************************** - * Parse command line - ***********************************************/ - - format = SQFILE_UNKNOWN; /* default: autodetect seq file format w/ Babelfish */ - do_forward = FALSE; - do_null2 = TRUE; - do_xnu = FALSE; - do_pvm = FALSE; - Z = 0; - be_backwards= FALSE; - - Alimit = INT_MAX; /* no limit on alignment output */ - thresh.globE = 10.0; /* use a reasonable Eval threshold; */ - thresh.globT = -FLT_MAX; /* but no bit threshold, */ - thresh.domT = -FLT_MAX; /* no domain bit threshold, */ - thresh.domE = FLT_MAX; /* and no domain Eval threshold. */ - thresh.autocut = CUT_NONE; /* and no Pfam cutoffs used */ - thresh.Z = 0; /* Z not preset; use actual # of seqs */ - -#ifdef HMMER_THREADS - num_threads = ThreadNumber(); /* only matters if we're threaded */ -#else - num_threads = 0; -#endif - - while (Getopt(argc, argv, OPTIONS, NOPTIONS, usage, - &optind, &optname, &optarg)) { - if (strcmp(optname, "-A") == 0) Alimit = atoi(optarg); - else if (strcmp(optname, "-E") == 0) thresh.globE = atof(optarg); - else if (strcmp(optname, "-T") == 0) thresh.globT = atof(optarg); - else if (strcmp(optname, "-Z") == 0) thresh.Z = atoi(optarg); - else if (strcmp(optname, "--compat") == 0) be_backwards = TRUE; - else if (strcmp(optname, "--cpu") == 0) num_threads = atoi(optarg); - else if (strcmp(optname, "--cut_ga") == 0) thresh.autocut = CUT_GA; - else if (strcmp(optname, "--cut_nc") == 0) thresh.autocut = CUT_NC; - else if (strcmp(optname, "--cut_tc") == 0) thresh.autocut = CUT_TC; - else if (strcmp(optname, "--domE") == 0) thresh.domE = atof(optarg); - else if (strcmp(optname, "--domT") == 0) thresh.domT = atof(optarg); - else if (strcmp(optname, "--forward") == 0) do_forward = TRUE; - else if (strcmp(optname, "--null2") == 0) do_null2 = FALSE; - else if (strcmp(optname, "--pvm") == 0) do_pvm = TRUE; - else if (strcmp(optname, "--xnu") == 0) do_xnu = TRUE; - else if (strcmp(optname, "--informat") == 0) { - format = String2SeqfileFormat(optarg); - if (format == SQFILE_UNKNOWN) - Die("unrecognized sequence file format \"%s\"", optarg); - } - else if (strcmp(optname, "-h") == 0) { - Banner(stdout, banner); - puts(usage); - puts(experts); - exit(0); - } - } - if (argc - optind != 2) - Die("Incorrect number of arguments.\n%s\n", usage); - - hmmfile = argv[optind++]; - seqfile = argv[optind++]; - -#ifndef HMMER_PVM - if (do_pvm) Die("PVM support is not compiled into your HMMER software; --pvm doesn't work."); -#endif -#ifndef HMMER_THREADS - if (num_threads) Die("Posix threads support is not compiled into HMMER; --cpu doesn't have any effect"); -#endif - - - /*********************************************** - * Open sequence database (might be in BLASTDB or current directory) - ***********************************************/ - - if ((sqfp = SeqfileOpen(seqfile, format, "BLASTDB")) == NULL) - Die("Failed to open sequence database file %s\n%s\n", seqfile, usage); - - /*********************************************** - * Open HMM file (might be in HMMERDB or current directory). - * Read a single HMM from it. (Config HMM, if necessary). - * Alphabet globals are set by reading the HMM. - ***********************************************/ - - if ((hmmfp = HMMFileOpen(hmmfile, "HMMERDB")) == NULL) - Die("Failed to open HMM file %s\n%s", hmmfile, usage); - if (!HMMFileRead(hmmfp, &hmm)) - Die("Failed to read any HMMs from %s\n", hmmfile); - if (hmm == NULL) - Die("HMM file %s corrupt or in incorrect format? Parse failed", hmmfile); - P7Logoddsify(hmm, !do_forward); - - if (do_xnu && Alphabet_type == hmmNUCLEIC) - Die("The HMM is a DNA model, and you can't use the --xnu filter on DNA data"); - - /***************************************************************** - * Set up optional Pfam score thresholds. - * Can do this before starting any searches, since we'll only use 1 HMM. - *****************************************************************/ - - if (! SetAutocuts(&thresh, hmm)) - Die("HMM %s did not contain the GA, TC, or NC cutoffs you needed", - hmm->name); - - /*********************************************** - * Show the banner - ***********************************************/ - - Banner(stdout, banner); - printf( "HMM file: %s [%s]\n", hmmfile, hmm->name); - printf( "Sequence database: %s\n", seqfile); - if (do_pvm) - printf( "PVM: ACTIVE\n"); - printf( "per-sequence score cutoff: "); - if (thresh.globT == -FLT_MAX) printf("[none]\n"); - else { - printf(">= %.1f", thresh.globT); - if (thresh.autocut == CUT_GA) printf(" [GA1]\n"); - else if (thresh.autocut == CUT_NC) printf(" [NC1]\n"); - else if (thresh.autocut == CUT_TC) printf(" [TC1]\n"); - else printf("\n"); - } - printf( "per-domain score cutoff: "); - if (thresh.domT == -FLT_MAX) printf("[none]\n"); - else { - printf(">= %.1f", thresh.domT); - if (thresh.autocut == CUT_GA) printf(" [GA2]\n"); - else if (thresh.autocut == CUT_NC) printf(" [NC2]\n"); - else if (thresh.autocut == CUT_TC) printf(" [TC2]\n"); - else printf("\n"); - } - printf( "per-sequence Eval cutoff: "); - if (thresh.globE == FLT_MAX) printf("[none]\n"); - else printf("<= %-10.2g\n", thresh.globE); - - printf( "per-domain Eval cutoff: "); - if (thresh.domE == FLT_MAX) printf("[none]\n"); - else printf("<= %10.2g\n", thresh.domE); - printf("- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n"); - - /*********************************************** - * Search HMM against each sequence - ***********************************************/ - - /* set up structures for storing output */ - histogram = AllocHistogram(-200, 200, 100); /* keeps full histogram */ - ghit = AllocTophits(200); /* per-seq hits: 200=lumpsize */ - dhit = AllocTophits(200); /* domain hits: 200=lumpsize */ - - if (! do_pvm) - main_loop_serial(hmm, sqfp, &thresh, do_forward, do_null2, do_xnu, num_threads, - histogram, ghit, dhit, &nseq); -#ifdef HMMER_PVM - else - main_loop_pvm(hmm, sqfp, &thresh, do_forward, do_null2, do_xnu, - histogram, ghit, dhit, &nseq); -#endif - - /*********************************************** - * Process hit lists, produce text output - ***********************************************/ - - /* Set the theoretical EVD curve in our histogram using - * calibration in the HMM, if available. - */ - if (hmm->flags & PLAN7_STATS) - ExtremeValueSetHistogram(histogram, hmm->mu, hmm->lambda, - histogram->lowscore, histogram->highscore, 0); - if (!thresh.Z) thresh.Z = nseq; /* set Z for good now that we're done. */ - - /* Format and report our output - */ - /* 1. Report overall sequence hits (sorted on E-value) */ - if (be_backwards) - { - printf("\nQuery HMM: %s|%s|%s\n", - hmm->name, - hmm->flags & PLAN7_ACC ? hmm->acc : "", - hmm->flags & PLAN7_DESC ? hmm->desc : ""); - } - else - { - printf("\nQuery HMM: %s\n", hmm->name); - printf("Accession: %s\n", hmm->flags & PLAN7_ACC ? hmm->acc : "[none]"); - printf("Description: %s\n", hmm->flags & PLAN7_DESC ? hmm->desc : "[none]"); - } - - if (hmm->flags & PLAN7_STATS) - printf(" [HMM has been calibrated; E-values are empirical estimates]\n"); - else - printf(" [No calibration for HMM; E-values are upper bounds]\n"); - - FullSortTophits(ghit); - namewidth = MAX(8, TophitsMaxName(ghit)); /* cannot truncate name. */ - descwidth = MAX(52-namewidth, 11);/* may truncate desc, but need strlen("Description") */ - - printf("\nScores for complete sequences (score includes all domains):\n"); - printf("%-*s %-*s %7s %10s %3s\n", namewidth, "Sequence", descwidth, "Description", "Score", "E-value", " N "); - printf("%-*s %-*s %7s %10s %3s\n", namewidth, "--------", descwidth, "-----------", "-----", "-------", "---"); - for (i = 0, nreported = 0; i < ghit->num; i++) - { - char *safedesc; - GetRankedHit(ghit, i, - &pvalue, &sc, NULL, NULL, - &name, NULL, &desc, - NULL, NULL, NULL, /* sequence positions */ - NULL, NULL, NULL, /* HMM positions */ - NULL, &ndom, /* domain info */ - NULL); /* alignment info */ - evalue = pvalue * (double) thresh.Z; - - /* safedesc is a workaround for an apparent Linux printf() - * bug with the *.*s format. dbmalloc crashes with a memchr() ptr out of bounds - * flaw if the malloc'ed space for desc is short. The workaround - * is to make sure the ptr for *.* has a big malloc space. - */ - if (desc != NULL && strlen(desc) < 80) - { - safedesc = MallocOrDie(sizeof(char) * 80); - strcpy(safedesc, desc); - } - else safedesc = Strdup(desc); - - if (evalue <= thresh.globE && sc >= thresh.globT) { - printf("%-*s %-*.*s %7.1f %10.2g %3d\n", - namewidth, name, - descwidth, descwidth, safedesc != NULL ? safedesc : "", - sc, evalue, ndom); - nreported++; - } - free(safedesc); - } - if (nreported == 0) printf("\t[no hits above thresholds]\n"); - - - /* 2. Report domain hits (also sorted on E-value) */ - FullSortTophits(dhit); - namewidth = MAX(8, TophitsMaxName(dhit)); - - printf("\nParsed for domains:\n"); - printf("%-*s %7s %5s %5s %5s %5s %7s %8s\n", - namewidth, "Sequence", "Domain ", "seq-f", "seq-t", "hmm-f", "hmm-t", "score", "E-value"); - printf("%-*s %7s %5s %5s %5s %5s %7s %8s\n", - namewidth, "--------", "-------", "-----", "-----", "-----", "-----", "-----", "-------"); - - for (i = 0, nreported = 0; i < dhit->num; i++) - { - GetRankedHit(dhit, i, - &pvalue, &sc, &motherp, &mothersc, - &name, NULL, NULL, - &sqfrom, &sqto, &sqlen, /* seq position info */ - &hmmfrom, &hmmto, NULL, /* HMM position info */ - &domidx, &ndom, /* domain info */ - NULL); /* alignment info */ - evalue = pvalue * (double) thresh.Z; - - if (motherp * (double) thresh.Z > thresh.globE || mothersc < thresh.globT) - continue; - else if (evalue <= thresh.domE && sc >= thresh.domT) { - printf("%-*s %3d/%-3d %5d %5d %c%c %5d %5d %c%c %7.1f %8.2g\n", - namewidth, name, - domidx, ndom, - sqfrom, sqto, - sqfrom == 1 ? '[' : '.', sqto == sqlen ? ']' : '.', - hmmfrom, hmmto, - hmmfrom == 1 ? '[':'.', hmmto == hmm->M ? ']' : '.', - sc, evalue); - nreported++; - } - } - if (nreported == 0) printf("\t[no hits above thresholds]\n"); - - - /* 3. Alignment output, also by domain. - * dhits is already sorted and namewidth is set, from above code. - * Number of displayed alignments is limited by Alimit parameter; - * also by domE (evalue threshold), domT (score theshold). - */ - if (Alimit != 0) - { - printf("\nAlignments of top-scoring domains:\n"); - for (i = 0, nreported = 0; i < dhit->num; i++) - { - if (nreported == Alimit) break; /* limit to Alimit output alignments */ - GetRankedHit(dhit, i, - &pvalue, &sc, &motherp, &mothersc, - &name, NULL, NULL, - &sqfrom, &sqto, &sqlen, /* seq position info */ - &hmmfrom, &hmmto, NULL, /* HMM position info */ - &domidx, &ndom, /* domain info */ - &ali); /* alignment info */ - evalue = pvalue * (double) thresh.Z; - - if (motherp * (double) thresh.Z > thresh.globE || mothersc < thresh.globT) - continue; - else if (evalue <= thresh.domE && sc >= thresh.domT) - { - printf("%s: domain %d of %d, from %d to %d: score %.1f, E = %.2g\n", - name, domidx, ndom, sqfrom, sqto, sc, evalue); - PrintFancyAli(stdout, ali); - nreported++; - } - } - if (nreported == 0) printf("\t[no hits above thresholds]\n"); - if (nreported == Alimit) printf("\t[output cut off at A = %d top alignments]\n", Alimit); - } - - /* 4. Histogram output */ - printf("\nHistogram of all scores:\n"); - PrintASCIIHistogram(stdout, histogram); - - /* 5. Tophits summaries, while developing... - */ - printf("\nTotal sequences searched: %d\n", nseq); - printf("\nWhole sequence top hits:\n"); - TophitsReport(ghit, thresh.globE, nseq); - printf("\nDomain top hits:\n"); - TophitsReport(dhit, thresh.domE, nseq); - - /*********************************************** - * Clean-up and exit. - ***********************************************/ - - FreeHistogram(histogram); - HMMFileClose(hmmfp); - SeqfileClose(sqfp); - FreeTophits(ghit); - FreeTophits(dhit); - FreePlan7(hmm); - SqdClean(); - - return 0; -} - - -/* Function: main_loop_serial() - * Date: SRE, Wed Sep 23 10:20:49 1998 [St. Louis] - * - * Purpose: Search an HMM against a sequence database. - * main loop for the serial (non-PVM, non-threads) - * version. - * - * In: HMM and open sqfile, plus options - * Out: histogram, global hits list, domain hits list, nseq. - * - * Args: hmm - the HMM to search with. - * sqfp - open SQFILE for sequence database - * thresh - score/evalue threshold info - * do_forward - TRUE to score using Forward() - * do_null2 - TRUE to use ad hoc null2 score correction - * do_xnu - TRUE to apply XNU mask - * num_threads- number of worker threads to start, or 0 - * histogram - RETURN: score histogram - * ghit - RETURN: ranked global scores - * dhit - RETURN: ranked domain scores - * ret_nseq - RETURN: actual number of seqs searched - * - * Returns: (void) - */ -static void -main_loop_serial(struct plan7_s *hmm, SQFILE *sqfp, struct threshold_s *thresh, int do_forward, - int do_null2, int do_xnu, int num_threads, - struct histogram_s *histogram, - struct tophit_s *ghit, struct tophit_s *dhit, int *ret_nseq) -{ -#ifdef HMMER_THREADS - struct workpool_s *wpool; /* pool of worker threads */ -#else - struct p7trace_s *tr; /* traceback */ - char *seq; /* target sequence */ - char *dsq; /* digitized target sequence */ - SQINFO sqinfo; /* optional info for seq */ - float sc; /* score of an HMM search */ - double pvalue; /* pvalue of an HMM score */ - double evalue; /* evalue of an HMM score */ -#endif - int nseq; /* number of sequences searched */ - -#ifdef HMMER_THREADS - wpool = workpool_start(hmm, sqfp, do_xnu, do_forward, do_null2, thresh, - ghit, dhit, histogram, num_threads); - workpool_stop(wpool); - nseq = wpool->nseq; - workpool_free(wpool); - -#else /* unthreaded code: */ - nseq = 0; - while (ReadSeq(sqfp, sqfp->format, &seq, &sqinfo)) - { - /* Silently skip length 0 seqs. - * What, you think this doesn't occur? Welcome to genomics, - * young grasshopper. - */ - if (sqinfo.len == 0) continue; - - nseq++; - dsq = DigitizeSequence(seq, sqinfo.len); - - if (do_xnu && Alphabet_type == hmmAMINO) XNU(dsq, sqinfo.len); - - /* 1. Recover a trace by Viterbi. - */ - if (P7ViterbiSize(sqinfo.len, hmm->M) <= RAMLIMIT) - sc = P7Viterbi(dsq, sqinfo.len, hmm, &tr); - else - sc = P7SmallViterbi(dsq, sqinfo.len, hmm, &tr); - - /* 2. If we're using Forward scores, calculate the - * whole sequence score; this overrides anything - * PostprocessSignificantHit() is going to do to the per-seq score. - */ - if (do_forward) { - sc = P7Forward(dsq, sqinfo.len, hmm, NULL); - if (do_null2) sc -= TraceScoreCorrection(hmm, tr, dsq); - } - -#if DEBUGLEVEL >= 2 - P7PrintTrace(stdout, tr, hmm, dsq); -#endif - - /* 2. Store score/pvalue for global alignment; will sort on score, - * which in hmmsearch is monotonic with E-value. - * Keep all domains in a significant sequence hit. - * We can only make a lower bound estimate of E-value since - * we don't know the final value of nseq yet, so the list - * of hits we keep in memory is >= the list we actually - * output. - */ - pvalue = PValue(hmm, sc); - evalue = thresh->Z ? (double) thresh->Z * pvalue : (double) nseq * pvalue; - if (sc >= thresh->globT && evalue <= thresh->globE) - { - PostprocessSignificantHit(ghit, dhit, - tr, hmm, dsq, sqinfo.len, - sqinfo.name, - sqinfo.flags & SQINFO_ACC ? sqinfo.acc : NULL, - sqinfo.flags & SQINFO_DESC ? sqinfo.desc : NULL, - do_forward, sc, - do_null2, - thresh, - FALSE); /* FALSE-> not hmmpfam mode, hmmsearch mode */ - } - AddToHistogram(histogram, sc); - FreeSequence(seq, &sqinfo); - P7FreeTrace(tr); - free(dsq); - } -#endif - - *ret_nseq = nseq; - return; -} - - - -#ifdef HMMER_PVM -/***************************************************************** - * PVM specific functions - ****************************************************************/ - -/* Function: main_loop_pvm() - * Date: SRE, Wed Sep 23 10:36:44 1998 [St. Louis] - * - * Purpose: Search an HMM against a sequence database. - * main loop for the PVM version. - * - * In: HMM and open sqfile, plus options - * Out: histogram, global hits list, domain hits list, nseq. - * - * Args: hmm - the HMM to search with. scoring form. - * sqfp - open SQFILE for sequence database - * thresh - score/evalue threshold information - * do_forward - TRUE to score using Forward() - * do_null2 - TRUE to use ad hoc null2 score correction - * do_xnu - TRUE to apply XNU mask - * histogram - RETURN: score histogram - * ghit - RETURN: ranked global scores - * dhit - RETURN: ranked domain scores - * ret_nseq - RETURN: actual number of seqs searched - * - * Returns: (void) - */ -static void -main_loop_pvm(struct plan7_s *hmm, SQFILE *sqfp, struct threshold_s *thresh, int do_forward, - int do_null2, int do_xnu, struct histogram_s *histogram, - struct tophit_s *ghit, struct tophit_s *dhit, int *ret_nseq) -{ - char *seq; /* target sequence */ - char *dsq; /* digitized target seq */ - SQINFO sqinfo; /* optional info about target seq */ - int master_tid; /* master's (my) PVM TID */ - int *slave_tid; /* array of slave TID's */ - int nslaves; /* number of slaves */ - int code; /* status code rec'd from a slave */ - int nseq; /* number of sequences searched */ - int sent_trace; /* TRUE if slave gave us a trace */ - char **dsqlist; /* remember what seqs slaves are doing */ - char **namelist; /* remember what seq names slaves are doing */ - char **acclist ; /* remember what seq accessions slaves are doing */ - char **desclist; /* remember what seq desc's slaves are doing */ - int *lenlist; /* remember lengths of seqs slaves are doing */ - int slaveidx; /* counter for slaves */ - float sc; /* score of an alignment */ - double pvalue; /* P-value of a score of an alignment */ - struct p7trace_s *tr; /* Viterbi traceback of an alignment */ - int i; /* generic counter */ - - /* Initialize PVM. - */ - SQD_DPRINTF1(("Requesting master TID...\n")); - master_tid = pvm_mytid(); -#if DEBUGLEVEL >= 1 - pvm_catchout(stderr); /* catch output for debugging */ -#endif - SQD_DPRINTF1(("Spawning slaves...\n")); - PVMSpawnSlaves("hmmsearch-pvm", &slave_tid, &nslaves); - SQD_DPRINTF1(("Spawned a total of %d slaves...\n", nslaves)); - - /* Initialize the slaves by broadcast. - */ - SQD_DPRINTF1(("Broadcasting to %d slaves...\n", nslaves)); - pvm_initsend(PvmDataDefault); - pvm_pkfloat(&(thresh->globT), 1, 1); - pvm_pkdouble(&(thresh->globE), 1, 1); - pvm_pkint(&(thresh->Z), 1, 1); - pvm_pkint(&do_forward, 1, 1); - pvm_pkint(&do_null2, 1, 1); - pvm_pkint(&Alphabet_type, 1, 1); - PVMPackHMM(hmm); - pvm_mcast(slave_tid, nslaves, HMMPVM_INIT); - SQD_DPRINTF1(("Slaves should be ready...\n")); - - /* Confirm slaves' OK status. - */ - PVMConfirmSlaves(slave_tid, nslaves); - SQD_DPRINTF1(("Slaves confirm that they're ok...\n")); - - /* Alloc arrays for remembering what seq each - * slave was working on. - */ - namelist = MallocOrDie(sizeof(char *) * nslaves); - acclist = MallocOrDie(sizeof(char *) * nslaves); - desclist = MallocOrDie(sizeof(char *) * nslaves); - dsqlist = MallocOrDie(sizeof(char *) * nslaves); - lenlist = MallocOrDie(sizeof(int) * nslaves); - - /* Load the slaves. - * Give them all a sequence number and a digitized sequence - * to work on. - * A side effect of the seq number is that we assign each slave - * a number from 0..nslaves-1. - */ - for (nseq = 0; nseq < nslaves; nseq++) - { - if (! ReadSeq(sqfp, sqfp->format, &seq, &sqinfo)) break; - if (sqinfo.len == 0) { nseq--; continue; } - - dsq = DigitizeSequence(seq, sqinfo.len); - if (do_xnu && Alphabet_type == hmmAMINO) XNU(dsq, sqinfo.len); - - pvm_initsend(PvmDataDefault); - pvm_pkint(&nseq, 1, 1); - pvm_pkint(&(sqinfo.len), 1, 1); - pvm_pkbyte(dsq, sqinfo.len+2, 1); - pvm_send(slave_tid[nseq], HMMPVM_WORK); - SQD_DPRINTF1(("sent a dsq : %d bytes\n", sqinfo.len+2)); - - namelist[nseq] = Strdup(sqinfo.name); - acclist[nseq] = (sqinfo.flags & SQINFO_ACC) ? Strdup(sqinfo.acc) : NULL; - desclist[nseq] = (sqinfo.flags & SQINFO_DESC) ? Strdup(sqinfo.desc) : NULL; - lenlist[nseq] = sqinfo.len; - dsqlist[nseq] = dsq; - - FreeSequence(seq, &sqinfo); - } - SQD_DPRINTF1(("%d slaves are loaded\n", nseq)); - - /* main receive/send loop - */ - while (ReadSeq(sqfp, sqfp->format, &seq, &sqinfo)) - { - if (sqinfo.len == 0) { continue; } - nseq++; - /* check slaves before blocking */ - PVMCheckSlaves(slave_tid, nslaves); - - /* receive output */ - SQD_DPRINTF1(("Waiting for a slave to give me output...\n")); - pvm_recv(-1, HMMPVM_RESULTS); - pvm_upkint(&slaveidx, 1, 1); /* # of slave who's sending us stuff */ - pvm_upkfloat(&sc, 1, 1); /* score */ - pvm_upkdouble(&pvalue, 1, 1); /* P-value */ - pvm_upkint(&sent_trace, 1, 1); /* TRUE if trace is coming */ - tr = (sent_trace) ? PVMUnpackTrace() : NULL; - SQD_DPRINTF1(("Slave %d finished %s for me...\n", slaveidx, namelist[slaveidx])); - - /* send new work */ - dsq = DigitizeSequence(seq, sqinfo.len); - if (do_xnu) XNU(dsq, sqinfo.len); - - pvm_initsend(PvmDataDefault); - pvm_pkint(&nseq, 1, 1); - pvm_pkint(&(sqinfo.len), 1, 1); - pvm_pkbyte(dsq, sqinfo.len+2, 1); - pvm_send(slave_tid[slaveidx], HMMPVM_WORK); - - /* process output */ - if (sent_trace) - { - PostprocessSignificantHit(ghit, dhit, - tr, hmm, dsqlist[slaveidx], lenlist[slaveidx], - namelist[slaveidx], acclist[slaveidx], desclist[slaveidx], - do_forward, sc, - do_null2, - thresh, - FALSE); /* FALSE-> not hmmpfam mode, hmmsearch mode */ - P7FreeTrace(tr); - } - AddToHistogram(histogram, sc); - - /* record seq info for seq we just sent */ - free(namelist[slaveidx]); - if (acclist[slaveidx] != NULL) free(acclist[slaveidx]); - if (desclist[slaveidx] != NULL) free(desclist[slaveidx]); - free(dsqlist[slaveidx]); - - dsqlist[slaveidx] = dsq; - namelist[slaveidx] = Strdup(sqinfo.name); - acclist[slaveidx] = (sqinfo.flags & SQINFO_ACC) ? Strdup(sqinfo.acc) : NULL; - desclist[slaveidx] = (sqinfo.flags & SQINFO_DESC) ? Strdup(sqinfo.desc) : NULL; - lenlist[slaveidx] = sqinfo.len; - - FreeSequence(seq, &sqinfo); - } - SQD_DPRINTF1(("End of receive/send loop\n")); - - /* Collect the output. All n slaves are still working. - */ - for (i = 0; i < nslaves && i < nseq; i++) - { - /* don't check slaves (they're exiting normally); - window of vulnerability here to slave crashes */ - /* receive output */ - pvm_recv(-1, HMMPVM_RESULTS); - pvm_upkint(&slaveidx, 1, 1); /* # of slave who's sending us stuff */ - pvm_upkfloat(&sc, 1, 1); /* score */ - pvm_upkdouble(&pvalue, 1, 1); /* P-value */ - pvm_upkint(&sent_trace, 1, 1); /* TRUE if trace is coming */ - tr = (sent_trace) ? PVMUnpackTrace() : NULL; - SQD_DPRINTF1(("Slave %d finished %s for me...\n", slaveidx, namelist[slaveidx])); - - /* process output */ - if (sent_trace) - { - PostprocessSignificantHit(ghit, dhit, - tr, hmm, dsqlist[slaveidx], lenlist[slaveidx], - namelist[slaveidx], acclist[slaveidx], desclist[slaveidx], - do_forward, sc, - do_null2, - thresh, - FALSE); /* FALSE-> not hmmpfam mode, hmmsearch mode */ - P7FreeTrace(tr); - } - AddToHistogram(histogram, sc); - - /* free seq info */ - free(namelist[slaveidx]); - if (acclist[slaveidx] != NULL) free(acclist[slaveidx]); - if (desclist[slaveidx] != NULL) free(desclist[slaveidx]); - free(dsqlist[slaveidx]); - - /* send cleanup/shutdown flag to slave */ - pvm_initsend(PvmDataDefault); - code = -1; - pvm_pkint(&code, 1, 1); - pvm_send(slave_tid[slaveidx], HMMPVM_WORK); - } - - - /* Cleanup; quit the VM; and return - */ - free(slave_tid); - free(dsqlist); - free(namelist); - free(acclist); - free(desclist); - free(lenlist); - pvm_exit(); - *ret_nseq = nseq; - return; -} -#endif /* HMMER_PVM */ - -#ifdef HMMER_THREADS -/***************************************************************** - * POSIX threads implementation. - * - * API: - * workpool_start() (makes a workpool_s structure. Starts calculations.) - * workpool_stop() (waits for threads to finish.) - * workpool_free() (destroys the structure) - * - * Threads: - * worker_thread() (the actual parallelized worker thread). - *****************************************************************/ - -/* Function: workpool_start() - * Date: SRE, Mon Oct 5 16:44:53 1998 - * - * Purpose: Initialize a workpool_s structure, and return it. - * - * Args: sqfp - open sequence file, at start - * do_xnu - TRUE to apply XNU filter - * do_forward - TRUE to score using Forward - * do_null2 - TRUE to apply null2 ad hoc correction - * thresh - score/evalue threshold info - * ghit - per-seq hit list - * dhit - per-domain hit list - * hist - histogram (alloced but empty) - * num_threads- number of worker threads to run. - * - * Returns: ptr to struct workpool_s. - * Caller must wait for threads to finish with workpool_stop(), - * then free the structure with workpool_free(). - */ -static struct workpool_s * -workpool_start(struct plan7_s *hmm, SQFILE *sqfp, int do_xnu, - int do_forward, int do_null2, struct threshold_s *thresh, - struct tophit_s *ghit, struct tophit_s *dhit, - struct histogram_s *hist, int num_threads) -{ - struct workpool_s *wpool; - pthread_attr_t attr; - int i; - int rtn; - - wpool = MallocOrDie(sizeof(struct workpool_s)); - wpool->thread = MallocOrDie(num_threads * sizeof(pthread_t)); - wpool->hmm = hmm; - - wpool->do_xnu = do_xnu; - wpool->do_forward = do_forward; - wpool->do_null2 = do_null2; - wpool->thresh = thresh; - - wpool->sqfp = sqfp; - wpool->nseq = 0; - if ((rtn = pthread_mutex_init(&(wpool->input_lock), NULL)) != 0) - Die("pthread_mutex_init FAILED; %s\n", strerror(rtn)); - - wpool->ghit = ghit; - wpool->dhit = dhit; - wpool->hist = hist; - if ((rtn = pthread_mutex_init(&(wpool->output_lock), NULL)) != 0) - Die("pthread_mutex_init FAILED; %s\n", strerror(rtn)); - - wpool->num_threads= num_threads; - - /* Create slave threads. See comments in hmmcalibrate.c at this - * step, regarding concurrency, system scope, and portability - * amongst various UNIX implementations of pthreads. - */ - pthread_attr_init(&attr); -#ifndef __sgi -#ifdef HAVE_PTHREAD_ATTR_SETSCOPE - pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM); -#endif -#endif -#ifdef HAVE_PTHREAD_SETCONCURRENCY - pthread_setconcurrency(num_threads+1); -#endif - /* pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM); */ - for (i = 0; i < num_threads; i++) - if ((rtn = pthread_create(&(wpool->thread[i]), &attr, - worker_thread , (void *) wpool)) != 0) - Die("Failed to create thread %d; return code %d\n", i, rtn); - - pthread_attr_destroy(&attr); - return wpool; -} -/* Function: workpool_stop() - * Date: SRE, Thu Jul 16 11:20:16 1998 [St. Louis] - * - * Purpose: Waits for threads in a workpool to finish. - * - * Args: wpool -- ptr to the workpool structure - * - * Returns: (void) - */ -static void -workpool_stop(struct workpool_s *wpool) -{ - int i; - /* wait for threads to stop */ - for (i = 0; i < wpool->num_threads; i++) - if (pthread_join(wpool->thread[i],NULL) != 0) - Die("pthread_join failed"); - return; -} - -/* Function: workpool_free() - * Date: SRE, Thu Jul 16 11:26:27 1998 [St. Louis] - * - * Purpose: Free a workpool_s structure, after the threads - * have finished. - * - * Args: wpool -- ptr to the workpool. - * - * Returns: (void) - */ -static void -workpool_free(struct workpool_s *wpool) -{ - free(wpool->thread); - free(wpool); - return; -} - - -/* Function: worker_thread() - * Date: SRE, Mon Sep 28 10:48:29 1998 [St. Louis] - * - * Purpose: The procedure executed by the worker threads. - * - * Args: ptr - (void *) that is recast to a pointer to - * the workpool. - * - * Returns: (void *) - */ -void * -worker_thread(void *ptr) -{ - struct workpool_s *wpool; /* our working threads structure */ - char *seq; /* target sequence */ - SQINFO sqinfo; /* information assoc w/ seq */ - char *dsq; /* digitized sequence */ - struct p7trace_s *tr; /* traceback from an alignment */ - float sc; /* score of an alignment */ - int rtn; /* a return code from pthreads lib */ - double pvalue; /* P-value of score */ - double evalue; /* E-value of score */ - - wpool = (struct workpool_s *) ptr; - for (;;) { - - /* 1. acquire lock on sequence input, and get - * the next seq to work on. - */ - /* acquire a lock */ - if ((rtn = pthread_mutex_lock(&(wpool->input_lock))) != 0) - Die("pthread_mutex_lock failure: %s\n", strerror(rtn)); - if (! ReadSeq(wpool->sqfp, wpool->sqfp->format, &seq, &sqinfo)) - { /* we're done. release lock, exit thread */ - if ((rtn = pthread_mutex_unlock(&(wpool->input_lock))) != 0) - Die("pthread_mutex_unlock failure: %s\n", strerror(rtn)); - pthread_exit(NULL); - } - SQD_DPRINTF1(("a thread is working on %s\n", sqinfo.name)); - wpool->nseq++; - /* release the lock */ - if ((rtn = pthread_mutex_unlock(&(wpool->input_lock))) != 0) - Die("pthread_mutex_unlock failure: %s\n", strerror(rtn)); - - if (sqinfo.len == 0) continue; /* silent skip of len=0 seqs (wormpep!?!) */ - - dsq = DigitizeSequence(seq, sqinfo.len); - if (wpool->do_xnu) XNU(dsq, sqinfo.len); - - /* 1. Recover a trace by Viterbi. - */ - if (P7ViterbiSize(sqinfo.len, wpool->hmm->M) <= RAMLIMIT) - sc = P7Viterbi(dsq, sqinfo.len, wpool->hmm, &tr); - else - sc = P7SmallViterbi(dsq, sqinfo.len, wpool->hmm, &tr); - - /* 2. If we're using Forward scores, do another DP - * to get it; else, we already have a Viterbi score - * in sc. - */ - if (wpool->do_forward) sc = P7Forward(dsq, sqinfo.len, wpool->hmm, NULL); - if (wpool->do_null2) sc -= TraceScoreCorrection(wpool->hmm, tr, dsq); - - /* 3. Save the output in tophits and histogram structures, after acquiring a lock - */ - if ((rtn = pthread_mutex_lock(&(wpool->output_lock))) != 0) - Die("pthread_mutex_lock failure: %s\n", strerror(rtn)); - SQD_DPRINTF1(("seq %s scores %f\n", sqinfo.name, sc)); - - pvalue = PValue(wpool->hmm, sc); - evalue = wpool->thresh->Z ? (double) wpool->thresh->Z * pvalue : (double) wpool->nseq * pvalue; - - if (sc >= wpool->thresh->globT && evalue <= wpool->thresh->globE) - { - PostprocessSignificantHit(wpool->ghit, wpool->dhit, - tr, wpool->hmm, dsq, sqinfo.len, - sqinfo.name, - sqinfo.flags & SQINFO_ACC ? sqinfo.acc : NULL, - sqinfo.flags & SQINFO_DESC ? sqinfo.desc : NULL, - wpool->do_forward, sc, - wpool->do_null2, - wpool->thresh, - FALSE); /* FALSE-> not hmmpfam mode, hmmsearch mode */ - } - AddToHistogram(wpool->hist, sc); - if ((rtn = pthread_mutex_unlock(&(wpool->output_lock))) != 0) - Die("pthread_mutex_unlock failure: %s\n", strerror(rtn)); - - P7FreeTrace(tr); - FreeSequence(seq, &sqinfo); - free(dsq); - } /* end 'infinite' loop over seqs in this thread */ -} - -#endif /* HMMER_THREADS */ - diff --git a/forester/archive/RIO/others/hmmer/src/masks.c b/forester/archive/RIO/others/hmmer/src/masks.c deleted file mode 100644 index 68eb09f..0000000 --- a/forester/archive/RIO/others/hmmer/src/masks.c +++ /dev/null @@ -1,367 +0,0 @@ -/************************************************************ - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - ************************************************************/ - -/* masks.c - * SRE, Tue Nov 18 10:12:28 1997 - * - * Sequence masking routines; corrections for biased composition - * target sequences. - * - * The Claverie/States XNU code is not used by default because I - * consider X'ing out sequence to be too black/white and too - * aggressive, but it's available as an option. - * - * The Wooton/Federhen SEG code was studied, but deemed too - * nonportable to include; it would've suffered the same drawback - * as XNU. - * - * The TraceScoreCorrection() code is the default. - * - * RCS $Id: masks.c,v 1.1.1.1 2005/03/22 08:34:02 cmzmasek Exp $ - */ - -#include -#include -#include - -#include "squid.h" -#include "config.h" -#include "structs.h" -#include "funcs.h" - -#ifdef MEMDEBUG -#include "dbmalloc.h" -#endif - -/* The PAM120 score matrix, in HMMER's AMINO_ALPHABET alphabetic order - */ -static int xpam120[23][23] = { - { 3, -3, 0, 0, -4, 1, -3, -1, -2, -3, -2, -1, 1, -1, -3, 1, 1, 0, -7, -4, 1, 0, 0 }, - {-3, 9, -7, -7, -6, -4, -4, -3, -7, -7, -6, -5, -4, -7, -4, 0, -3, -3, -8, -1, -4, -6, 0 }, - { 0, -7, 5, 3, -7, 0, 0, -3, -1, -5, -4, 2, -3, 1, -3, 0, -1, -3, -8, -5, 5, 3, 0 }, - { 0, -7, 3, 5, -7, -1, -1, -3, -1, -4, -3, 1, -2, 2, -3, -1, -2, -3, -8, -5, 3, 5, 0 }, - {-4, -6, -7, -7, 8, -5, -3, 0, -7, 0, -1, -4, -5, -6, -5, -3, -4, -3, -1, 4, -4, -5, 0 }, - { 1, -4, 0, -1, -5, 5, -4, -4, -3, -5, -4, 0, -2, -3, -4, 1, -1, -2, -8, -6, 1, -1, 0 }, - {-3, -4, 0, -1, -3, -4, 7, -4, -2, -3, -4, 2, -1, 3, 1, -2, -3, -3, -3, -1, 2, 2, 0 }, - {-1, -3, -3, -3, 0, -4, -4, 6, -3, 1, 1, -2, -3, -3, -2, -2, 0, 3, -6, -2, -2, -2, 0 }, - {-2, -7, -1, -1, -7, -3, -2, -3, 5, -4, 0, 1, -2, 0, 2, -1, -1, -4, -5, -5, 1, 0, 0 }, - {-3, -7, -5, -4, 0, -5, -3, 1, -4, 5, 3, -4, -3, -2, -4, -4, -3, 1, -3, -2, -3, -2, 0 }, - {-2, -6, -4, -3, -1, -4, -4, 1, 0, 3, 8, -3, -3, -1, -1, -2, -1, 1, -6, -4, -3, -1, 0 }, - {-1, -5, 2, 1, -4, 0, 2, -2, 1, -4, -3, 4, -2, 0, -1, 1, 0, -3, -4, -2, 4, 1, 0 }, - { 1, -4, -3, -2, -5, -2, -1, -3, -2, -3, -3, -2, 6, 0, -1, 1, -1, -2, -7, -6, -1, 0, 0 }, - {-1, -7, 1, 2, -6, -3, 3, -3, 0, -2, -1, 0, 0, 6, 1, -2, -2, -3, -6, -5, 1, 5, 0 }, - {-3, -4, -3, -3, -5, -4, 1, -2, 2, -4, -1, -1, -1, 1, 6, -1, -2, -3, 1, -5, -1, 0, 0 }, - { 1, 0, 0, -1, -3, 1, -2, -2, -1, -4, -2, 1, 1, -2, -1, 3, 2, -2, -2, -3, 1, 0, 0 }, - { 1, -3, -1, -2, -4, -1, -3, 0, -1, -3, -1, 0, -1, -2, -2, 2, 4, 0, -6, -3, 1, -1, 0 }, - { 0, -3, -3, -3, -3, -2, -3, 3, -4, 1, 1, -3, -2, -3, -3, -2, 0, 5, -8, -3, -2, -2, 0 }, - {-7, -8, -8, -8, -1, -8, -3, -6, -5, -3, -6, -4, -7, -6, 1, -2, -6, -8, 12, -2, -5, -6, 0 }, - {-4, -1, -5, -5, 4, -6, -1, -2, -5, -2, -4, -2, -6, -5, -5, -3, -3, -3, -2, 8, -2, -4, 0 }, - { 1, -4, 5, 3, -4, 1, 2, -2, 1, -3, -3, 4, -1, 1, -1, 1, 1, -2, -5, -2, 6, 4, 0 }, - { 0, -6, 3, 5, -5, -1, 2, -2, 0, -2, -1, 1, 0, 5, 0, 0, -1, -2, -6, -4, 4, 6, 0 }, - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, -}; - - -/* Function: XNU() - * Date: 18 Nov 1997 [StL] - * - * Purpose: x-out of repetitive sequence. XNU tends to be - * good at x'ing out short period tandem repeats. - * - * Note: Apply /only/ to protein sequence. - * - * Args: dsq: 1..len digitized sequence - * len: length of dsq - * - * Return: number of characters x'ed out. - */ -int -XNU(char *dsq, int len) -{ - int i,k,off,sum,beg,end,top; - int topcut,fallcut; - double s0; - int noff = 4; /* maximum search offset */ - int mcut = 1; - double pcut = 0.01; - int *hit; - double lambda = 0.346574; - double K = 0.2; - double H = 0.664; - int xnum = 0; - - if (len == 0) return 0; - - hit = MallocOrDie(sizeof(int) * (len+1)); - for (i=1; i<=len; i++) hit[i]=0; - - /* - ** Determine the score cutoff so that pcut will be the fraction - ** of random sequence eliminated assuming lambda, K, and H are - ** characteristic of the database as a whole - */ - s0 = - log( pcut*H / (noff*K) ) / lambda; - if (s0>0) topcut = floor(s0 + log(s0)/lambda + 0.5); - else topcut = 0; - fallcut = (int)log(K/0.001)/lambda; - - for (off=mcut; off<=noff; off++) { - sum=top=0; - beg=off; - end=0; - - for (i=off+1; i<=len; i++) { - sum += xpam120[(int) dsq[i]][(int) dsq[i-off]]; - if (sum>top) { - top=sum; - end=i; - } - if (top>=topcut && top-sum>fallcut) { - for (k=beg; k<=end; k++) - hit[k] = hit[k-off] = 1; - sum=top=0; - beg=end=i+1; - } else if (top-sum>fallcut) { - sum=top=0; - beg=end=i+1; - } - if (sum<0) { - beg=end=i+1; - sum=top=0; - } - } - if (top>=topcut) { - for (k=beg; k<=end; k++) - hit[k] = hit[k-off] = 1; - } - } - - /* Now mask off detected repeats - */ - for (i=1; i<=len; i++) - if (hit[i]) { xnum++; dsq[i] = Alphabet_iupac-1;} /* e.g. 'X' */ - - free(hit); - return xnum; -} - - -/* Function: TraceScoreCorrection() - * Date: Sun Dec 21 12:05:47 1997 [StL] - * - * Purpose: Calculate a correction (in integer log_2 odds) to be - * applied to a sequence, using a second null model, - * based on a traceback. M/I emissions are corrected; - * C/N/J are not -- as if the nonmatching part and - * matching part were each generated by the best null model. - * The null model is constructed /post hoc/ as the - * average over all the M,I distributions used by the trace. - * - * Return: the log_2-odds score correction. - */ -float -TraceScoreCorrection(struct plan7_s *hmm, struct p7trace_s *tr, char *dsq) -{ - float p[MAXABET]; /* null model distribution */ - int sc[MAXCODE]; /* null model scores */ - int x; - int tpos; - int score; - - /* Set up model: average over the emission distributions of - * all M, I states that appear in the trace. Ad hoc? Sure, you betcha. - */ - FSet(p, Alphabet_size, 0.0); - for (tpos = 0; tpos < tr->tlen; tpos++) - if (tr->statetype[tpos] == STM) - FAdd(p, hmm->mat[tr->nodeidx[tpos]], Alphabet_size); - else if (tr->statetype[tpos] == STI) - FAdd(p, hmm->ins[tr->nodeidx[tpos]], Alphabet_size); - FNorm(p, Alphabet_size); - - for (x = 0; x < Alphabet_size; x++) - sc[x] = Prob2Score(p[x], hmm->null[x]); - /* could avoid this chunk if we knew - we didn't need any degenerate char scores */ - for (x = Alphabet_size; x < Alphabet_iupac; x++) - sc[x] = DegenerateSymbolScore(p, hmm->null, x); - - - /* Score all the M,I state emissions that appear in the trace. - */ - score = 0; - for (tpos = 0; tpos < tr->tlen; tpos++) - if (tr->statetype[tpos] == STM || tr->statetype[tpos] == STI) - score += sc[(int) dsq[tr->pos[tpos]]]; - - /* Apply an ad hoc 8 bit fudge factor penalty; - * interpreted as a prior, saying that the second null model is - * 1/2^8 (1/256) as likely as the standard null model - */ - score -= 8 * INTSCALE; - - /* Return the correction to the bit score. - */ - return Scorify(ILogsum(0, score)); -} - - -/* THE FOLLOWING CODE IS IN DEVELOPMENT. - * it is commented out of the current release deliberately. - * If you activate it, I'm not responsible for the consequences. - */ -#if MICHAEL_JORDAN_BUYS_THE_PACERS -/* Function: NewTraceScoreCorrection() - * Date: Wed Feb 17 14:32:45 1999 [StL] - * - * Purpose: Calculate a correction (in integer log_2 odds) to be - * applied to a sequence, using a second null model, - * based on sequence endpoints. M/I emissions are corrected; - * C/N/J are not -- as if the nonmatching part and - * matching part were each generated by the best null model. - * Each null model is constructed /post hoc/ from the - * sequence composition of each matching domain (e.g. - * a null2 model is constructed for each domain in a - * multihit trace). - * - * Constraints on the construction of this function include: - * 1) Paracel hardware can't deal with trace-dependent - * null2 models. Original implementation of - * TraceScoreCorrection() was dependent on traceback - * and could not be reproduced on GeneMatcher. - * GeneMatcher may be able to deal w/ sequence endpoint - * dependent rescoring, though. - * Although this function looks like it's trace- - * dependent (because it's being passed a p7trace_s - * structure), it's really not; only the sequence - * endpoints are being used. - * - * 2) It is desirable that for multihit traces, - * per-domain scores sum to the per-sequence score. - * Otherwise people see this as a "bug" (cf. - * bug #2, David Kerk, NRC). HMMER calculates the - * per-domain scores by going through a separate - * TraceScore() call for each one and separately - * correcting them with TraceScoreCorrection(), - * so we have to do each domain in a full trace - * by a similar mechanism -- even if this means that - * we're adopting a very dubiously post hoc - * null model. - * - * Return: the log_2-odds score correction. - */ -float -NewTraceScoreCorrection(struct plan7_s *hmm, struct p7trace_s *tr, char *dsq) -{ - float ct[MAXABET]; /* counts of observed residues */ - float p[MAXABET]; /* null2 model distribution (also counts) */ - float sc[MAXCODE]; /* null2 model scores (as floats not int) */ - - int x; - int tpos; - int score; /* tmp score for real HMM, integer logodds */ - float hmmscore; /* score for real HMM for this domain */ - float null2score; /* score for null2 model for this domain */ - - - float totscore; /* overall score for trace */ - float maxscore; /* best score so far for single domain */ - int in_domain; /* flag for whether we're counting this domain */ - int sym; /* digitized symbol in dsq */ - int ndom; /* number of domains counted towards score */ - - int nsym; /* number of symbols in this alignment */ - - totscore = 0.; - maxscore = -FLT_MAX; - in_domain = FALSE; - ndom = 0; - for (tpos = 0; tpos < tr->tlen; tpos++) - { - /* detect start of domain; start at N or J */ - if (tpos < tr->tlen-1 && tr->statetype[tpos+1] == STB) - { - FCopy(ct, hmm->null, Alphabet_size); /* simple Dirichlet prior */ - score = 0; - null2score = 0.; - nsym = 0; - in_domain = TRUE; - } - /* Count stuff in domain starting with N->B or J->B transition */ - if (in_domain) { - sym = (int) dsq[tr->pos[tpos]]; - - /* count emitted symbols in domain */ - if (tr->statetype[tpos] == STM || tr->statetype[tpos] == STI) - { - P7CountSymbol(ct, sym, 1.0); - nsym++; - } - - /* score emitted symbols in domain towards HMM */ - if (tr->statetype[tpos] == STM) - score += hmm->msc[sym][tr->nodeidx[tpos]]; - else if (tr->statetype[tpos] == STI) - score += hmm->isc[sym][tr->nodeidx[tpos]]; - /* score transitions in domain towards HMM */ - score += TransitionScoreLookup(hmm, - tr->statetype[tpos], tr->nodeidx[tpos], - tr->statetype[tpos+1], tr->nodeidx[tpos+1]); - } - - - if (tr->statetype[tpos] == STE) /* done w/ a domain; calc its score */ - { - /* convert counts to null2 prob distribution */ - FCopy(p, ct, Alphabet_size); - FNorm(p, Alphabet_size); - /* Convert probs to log-odds_e scores */ - /* p can't be zero, because of prior */ - for (x = 0; x < Alphabet_size; x++) - sc[x] = log(p[x] / hmm->null[x]); - /* null2 score = counts \dot scores */ - null2score = FDot(ct, sc, Alphabet_size); - - printf("NSYM = %d NULL2 = %.1f\n", nsym, null2score); - - /* Apply an ad hoc 12 bit fudge factor penalty, per domain. - * Interpreted probabilistically, saying that there's about - * a 1/256 probability to transition into the second null model. - */ - null2score -= 12.; - - /* Now correct score1 using the null2 score. - * If it's still > 0, add it to accumulated score. - */ - hmmscore = Scorify(score); - hmmscore -= 1.44269504 * LogSum(0, null2score); - if (hmmscore > 0.) { totscore += hmmscore; ndom++; } - if (hmmscore > maxscore) maxscore = hmmscore; - - in_domain = FALSE; - } - } - - /* Single domain special case. - */ - if (ndom == 0) totscore = maxscore; - - /* Return the correction to the bit score - */ - return (P7TraceScore(hmm, dsq, tr) - totscore); -} -#endif /*0*/ - - -float -SantaCruzCorrection(struct plan7_s *hmm, struct p7trace_s *tr, char *dsq) -{ - return 0.0; /* UNFINISHED CODE */ -} diff --git a/forester/archive/RIO/others/hmmer/src/mathsupport.c b/forester/archive/RIO/others/hmmer/src/mathsupport.c deleted file mode 100644 index 5938463..0000000 --- a/forester/archive/RIO/others/hmmer/src/mathsupport.c +++ /dev/null @@ -1,362 +0,0 @@ -/************************************************************ - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - ************************************************************/ - - -/* mathsupport.c - * SRE, Mon Nov 11 15:07:33 1996 - * - * Miscellaneous mathematical functions. - * General functions are in the SQUID library sre_math.c. - * These functions are too HMM-specific to warrant being in the - * SQUID library. - * - */ - - -#include -#include -#ifdef HMMER_THREADS -#include -#endif -#include "funcs.h" -#include "config.h" -#include "structs.h" -#include "squid.h" - -/* Function: Prob2Score() - * - * Purpose: Convert a probability to a scaled integer log_2 odds score. - * Round to nearest integer (i.e. note use of +0.5 and floor()) - * Return the score. - */ -int -Prob2Score(float p, float null) -{ - if (p == 0.0) return -INFTY; - else return (int) floor(0.5 + INTSCALE * sreLOG2(p/null)); -} - -/* Function: Score2Prob() - * - * Purpose: Convert an integer log_2 odds score back to a probability; - * needs the null model probability, if any, to do the conversion. - */ -float -Score2Prob(int sc, float null) -{ - if (sc == -INFTY) return 0.; - else return (null * sreEXP2((float) sc / INTSCALE)); -} - - -/* Function: Scorify() - * - * Purpose: Convert a scaled integer log-odds score to a floating - * point score for output. (could be a macro but who cares.) - */ -float -Scorify(int sc) -{ - return ((float) sc / INTSCALE); -} - - -/* Function: PValue() - * Date: SRE, Mon Oct 27 12:21:02 1997 [Sanger Centre, UK] - * - * Purpose: Convert an HMM score to a P-value. - * We know P(S>x) is bounded by 1 / (1 + exp_2^x) for a bit score of x. - * We can also use EVD parameters for a tighter bound if we have - * them available. - * - * Args: hmm - model structure, contains EVD parameters - * sc - score in bits - * - * Returns: P value for score significance. - */ -double -PValue(struct plan7_s *hmm, float sc) -{ - double pval; - double pval2; - /* the bound from Bayes */ - if (sc >= sreLOG2(DBL_MAX)) pval = 0.0; - else pval = 1. / (1.+sreEXP2(sc)); - - /* try for a better estimate from EVD fit */ - if (hmm != NULL && (hmm->flags & PLAN7_STATS)) - { - pval2 = ExtremeValueP(sc, hmm->mu, hmm->lambda); - if (pval2 < pval) pval = pval2; - } - return pval; -} - -/* Function: LogSum() - * - * Purpose: Returns the log of the sum of two log probabilities. - * log(exp(p1)+exp(p2)) = p1 + log(1 + exp(p2-p1)) for p1 > p2 - * Note that this is in natural log space, not log_2. - */ -float -LogSum(float p1, float p2) -{ - if (p1 > p2) - return (p1-p2 > 50.) ? p1 + log(1. + exp(p2-p1)) : p1; - else - return (p2-p1 > 50.) ? p2 + log(1. + exp(p1-p2)) : p2; -} - - -/* Function: ILogsum() - * - * Purpose: Return the scaled integer log probability of - * the sum of two probabilities p1 and p2, where - * p1 and p2 are also given as scaled log probabilities. - * - * log(exp(p1)+exp(p2)) = p1 + log(1 + exp(p2-p1)) for p1 > p2 - * - * For speed, builds a lookup table the first time it's called. - * LOGSUM_TBL is set to 20000 by default, in config.h. - * - * Because of the one-time initialization, we have to - * be careful in a multithreaded implementation... hence - * the use of pthread_once(), which forces us to put - * the initialization routine and the lookup table outside - * ILogsum(). (Thanks to Henry Gabb at Intel for pointing - * out this problem.) - * - * Args: p1,p2 -- scaled integer log_2 probabilities to be summed - * in probability space. - * - * Return: scaled integer log_2 probability of the sum. - */ -static int ilogsum_lookup[LOGSUM_TBL]; -static void -init_ilogsum(void) -{ - int i; - for (i = 0; i < LOGSUM_TBL; i++) - ilogsum_lookup[i] = (int) (INTSCALE * 1.44269504 * - (log(1.+exp(0.69314718 * (float) -i/INTSCALE)))); -} -int -ILogsum(int p1, int p2) -{ - int diff; -#ifdef HMMER_THREADS - static pthread_once_t firsttime = PTHREAD_ONCE_INIT; - pthread_once(&firsttime, init_ilogsum); -#else - static int firsttime = 1; - if (firsttime) { init_ilogsum(); firsttime = 0; } -#endif - - diff = p1-p2; - if (diff >= LOGSUM_TBL) return p1; - else if (diff <= -LOGSUM_TBL) return p2; - else if (diff > 0) return p1 + ilogsum_lookup[diff]; - else return p2 + ilogsum_lookup[-diff]; -} - -/* Function: LogNorm() - * - * Purpose: Normalize a vector of log likelihoods, changing it - * to a probability vector. Be careful of overflowing exp(). - * Implementation adapted from Graeme Mitchison. - * - * Args: vec - vector destined to become log probabilities - * n - length of vec - */ -void -LogNorm(float *vec, int n) -{ - int x; - float max = -1.0e30; - float denom = 0.; - - for (x = 0; x < n; x++) - if (vec[x] > max) max = vec[x]; - for (x = 0; x < n; x++) - if (vec[x] > max - 50.) - denom += exp(vec[x] - max); - for (x = 0; x < n; x++) - if (vec[x] > max - 50.) - vec[x] = exp(vec[x] - max) / denom; - else - vec[x] = 0.0; -} - - -/* Function: Logp_cvec() - * - * Purpose: Calculates ln P(cvec|dirichlet), the log probability of a - * count vector given a Dirichlet distribution. Adapted - * from an implementation by Graeme Mitchison. - * - * Args: cvec - count vector - * n - length of cvec - * alpha - Dirichlet alpha terms - * - * Return: log P(cvec|dirichlet) - */ -float -Logp_cvec(float *cvec, int n, float *alpha) -{ - float lnp; /* log likelihood of P(cvec | Dirichlet) */ - float sum1, sum2, sum3; - int x; - - sum1 = sum2 = sum3 = lnp = 0.0; - for (x = 0; x < n; x++) - { - sum1 += cvec[x] + alpha[x]; - sum2 += alpha[x]; - sum3 += cvec[x]; - lnp += Gammln(alpha[x] + cvec[x]); - lnp -= Gammln(cvec[x] + 1.); - lnp -= Gammln(alpha[x]); - } - lnp -= Gammln(sum1); - lnp += Gammln(sum2); - lnp += Gammln(sum3 + 1.); - return lnp; -} - -/* Function: SampleDirichlet() - * - * Purpose: Given a Dirichlet distribution defined by - * a vector of n alpha terms, sample of probability - * distribution of dimension n. - * - * This code was derived from source provided - * by Betty Lazareva, from Gary Churchill's group. - * - * Args: alpha - vector of Dirichlet alphas components - * n - number of components - * ret_p - RETURN: sampled probability vector. - * - * Return: (void) - * ret_p, an n-dimensional array alloced by the caller, - * is filled. - */ -void -SampleDirichlet(float *alpha, int n, float *p) -{ - int x; - - for (x = 0; x < n; x++) - p[x] = SampleGamma(alpha[x]); - FNorm(p, n); -} - - -/* Function: SampleGamma() - * - * Purpose: Return a random deviate distributed as Gamma(alpha, 1.0). - * Uses two different accept/reject algorithms, one - * for 0= 1.0) - { - /*CONSTCOND*/ while (1) - { - lambda = sqrt(2.0*alpha -1.0); - U = sre_random(); - V = U/(1-U); - X = alpha * pow(V, 1/lambda); - W = .25*exp(-X+alpha)*pow(V,1.0+alpha/lambda)*pow(1.0+1.0/V, 2.0); - if (sre_random() <= W) - return X; - } - } - else if (alpha > 0.0) - { - /*CONSTCOND*/ while (1) - { - U = sre_random(); - V = U*(1+ alpha/exp(1.0)); - if (V > 1.0) - { - X = -log( (1-V+alpha/exp(1.0))/alpha); - if (sre_random() <= pow(X, alpha-1.0)) - return X; - } - else - { - X = pow(V,1.0/alpha); - if (sre_random() <= exp(-X)) - return X; - } - } - } - Die("Invalid argument alpha < 0.0 to SampleGamma()"); - /*NOTREACHED*/ - return 0.0; -} - -/* Function: SampleCountvector() - * - * Purpose: Given a probability vector p of dimensionality - * n, sample c counts and store them in cvec. - * cvec is n-dimensional and is alloced by the caller. - */ -void -SampleCountvector(float *p, int n, int c, float *cvec) -{ - int i; - - FSet(cvec, n, 0.0); - for (i = 0; i < c; i++) - cvec[FChoose(p,n)] += 1.0; -} - - - -/* Function: P_PvecGivenDirichlet() - * - * Purpose: Calculate the log probability of a probability - * vector given a single Dirichlet component, alpha. - * Follows Sjolander (1996) appendix, lemma 2. - * - * Return: log P(p | alpha) - */ -float -P_PvecGivenDirichlet(float *p, int n, float *alpha) -{ - float sum; /* for Gammln(|alpha|) in Z */ - float logp; /* RETURN: log P(p|alpha) */ - int x; - - sum = logp = 0.0; - for (x = 0; x < n; x++) - if (p[x] > 0.0) /* any param that is == 0.0 doesn't exist */ - { - logp += (alpha[x]-1.0) * log(p[x]); - logp -= Gammln(alpha[x]); - sum += alpha[x]; - } - logp += Gammln(sum); - return logp; -} - - diff --git a/forester/archive/RIO/others/hmmer/src/misc.c b/forester/archive/RIO/others/hmmer/src/misc.c deleted file mode 100644 index 9a7cf26..0000000 --- a/forester/archive/RIO/others/hmmer/src/misc.c +++ /dev/null @@ -1,140 +0,0 @@ -/************************************************************ - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - ************************************************************/ - -/* misc.c - * SRE, Thu Jul 15 18:49:19 1993 - * - * Functions that I don't know quite where to put yet. - */ - -#include -#include -#include -#include -#include -#include - -#include "squid.h" -#include "config.h" -#include "structs.h" -#include "version.h" - -/* Function: Getword() - * - * Purpose: little function used by ReadPrior() and ReadHMM() to parse - * next valid field out of an open file, ignoring - * comments. '#' marks the beginning of a comment. - * - * Arg: fp - open file for reading - * type - sqdARG_INT, sqdARG_FLOAT, or sqdARG_STRING from squid.h - */ -char * -Getword(FILE *fp, int type) -{ - static char buffer[512]; - static char *sptr = NULL; - - if (sptr != NULL) sptr = strtok(NULL, " \t\n"); - - while (sptr == NULL) - { - if ((sptr = fgets(buffer, 512, fp)) == NULL) return NULL; - if ((sptr = strchr(buffer, '#')) != NULL) *sptr = '\0'; - sptr = strtok(buffer, " \t\n"); - } - - switch (type) { - case sqdARG_STRING: - if (strlen(sptr) == 0) { - Warn("Parse failed: expected string, got nothing"); - sptr = NULL; - } - break; - case sqdARG_INT: - if (!IsInt(sptr)) { - Warn("Parse failed: expected integer, got %s", sptr); - sptr = NULL; - } - break; - case sqdARG_FLOAT: - if (!IsReal(sptr)) { - Warn("Parse failed: expected real value, got %s", sptr); - sptr = NULL; - } - break; - } - - return sptr; -} - - -/* Function: Getline() - * - * Purpose: Get the next non-blank, non-comment line from an open file. - * A comment line has '#' as the first non-whitespace character. - * Returns NULL if no line is found. - * Syntax is the same as fgets(). - * - * Args: s - allocated storage for line - * n - number of characters allocated for s - * fp - open FILE * - * - * Return: Either s, or NULL if no new line is found. - */ -char * -Getline(char *s, int n, FILE *fp) -{ - char *first; - - do { - if (fgets(s, n, fp) == NULL) return NULL; - first = s; while (isspace((int) (*first))) first++; - } while (*first == '#' || *first == '\0'); - return s; -} - - -/* Function: SetAutocuts() - * Date: SRE, Thu Jun 8 08:19:46 2000 [TW721 over Ireland] - * - * Purpose: Set score thresholds using the GA, TC, or NC information - * in an HMM. - * - * Args: thresh - score threshold structure. autocut must be set - * properly (CUT_GA, CUT_NC, or CUT_TC). - * hmm - HMM containing appropriate score cutoff info - * - * Returns: 1 on success. - * 0 if HMM does not have the score cutoffs available -- caller - * will have to decide on a fallback plan. - * Has no effect (and returns success) if autocut is - * CUT_NONE. - */ -int -SetAutocuts(struct threshold_s *thresh, struct plan7_s *hmm) -{ - if (thresh->autocut == CUT_GA) { - if (! (hmm->flags & PLAN7_GA)) return 0; - thresh->globT = hmm->ga1; - thresh->domT = hmm->ga2; - thresh->globE = thresh->domE = FLT_MAX; - } else if (thresh->autocut == CUT_NC) { - if (! (hmm->flags & PLAN7_NC)) return 0; - thresh->globT = hmm->nc1; - thresh->domT = hmm->nc2; - thresh->globE = thresh->domE = FLT_MAX; - } else if (thresh->autocut == CUT_TC) { - if (! (hmm->flags & PLAN7_TC)) return 0; - thresh->globT = hmm->tc1; - thresh->domT = hmm->tc2; - thresh->globE = thresh->domE = FLT_MAX; - } - return 1; -} diff --git a/forester/archive/RIO/others/hmmer/src/modelmakers.c b/forester/archive/RIO/others/hmmer/src/modelmakers.c deleted file mode 100644 index 8e5eab8..0000000 --- a/forester/archive/RIO/others/hmmer/src/modelmakers.c +++ /dev/null @@ -1,940 +0,0 @@ -/***************************************************************** - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - *****************************************************************/ - -/* modelmakers.c - * SRE, Fri Nov 15 10:00:04 1996 - * - * Construction of models from multiple alignments. Three versions: - * Handmodelmaker() -- use #=RF annotation to indicate match columns - * Fastmodelmaker() -- Krogh/Haussler heuristic - * Maxmodelmaker() -- MAP model construction algorithm (Eddy, - * unpublished) - * - * The meat of the model construction code is in matassign2hmm(). - * The three model construction strategies simply label which columns - * are supposed to be match states, and then hand this info to - * matassign2hmm(). - * - * Two wrinkles to watch for: - * 1) The alignment is assumed to contain sequence fragments. Look in - * fake_tracebacks() for how internal entry/exit points are handled. - * 2) Plan7 disallows DI and ID transitions, but an alignment may - * imply these. Look in trace_doctor() for how DI and ID transitions - * are removed. - */ - -#include -#include -#include -#include -#include -#include - -#include "structs.h" -#include "config.h" -#include "funcs.h" -#include "squid.h" -#include "msa.h" - -/* flags used for matassign[] arrays -- - * assignment of aligned columns to match/insert states - */ -#define ASSIGN_MATCH (1<<0) -#define FIRST_MATCH (1<<1) -#define LAST_MATCH (1<<2) -#define ASSIGN_INSERT (1<<3) -#define EXTERNAL_INSERT_N (1<<4) -#define EXTERNAL_INSERT_C (1<<5) - -static int build_cij(char **aseqs, int nseq, int *insopt, int i, int j, - float *wgt, float *cij); -static int estimate_model_length(MSA *msa); -static void matassign2hmm(MSA *msa, char **dsq, - int *matassign, struct plan7_s **ret_hmm, - struct p7trace_s ***ret_tr); -static void fake_tracebacks(char **aseq, int nseq, int alen, int *matassign, - struct p7trace_s ***ret_tr); -static void trace_doctor(struct p7trace_s *tr, int M, int *ret_ndi, - int *ret_nid); -static void annotate_model(struct plan7_s *hmm, int *matassign, MSA *msa); -static void print_matassign(int *matassign, int alen); - - - -/* Function: P7Handmodelmaker() - * - * Purpose: Manual model construction: - * Construct an HMM from an alignment, where the #=RF line - * of a HMMER alignment file is given to indicate - * the columns assigned to matches vs. inserts. - * - * NOTE: Handmodelmaker() will slightly revise the alignment - * if necessary, if the assignment of columns implies - * DI and ID transitions. - * - * Returns both the HMM in counts form (ready for applying - * Dirichlet priors as the next step), and fake tracebacks - * for each aligned sequence. - * - * Args: msa - multiple sequence alignment - * dsq - digitized unaligned aseq's - * ret_hmm - RETURN: counts-form HMM - * ret_tr - RETURN: array of tracebacks for aseq's - * - * Return: (void) - * ret_hmm and ret_tr alloc'ed here; FreeTrace(tr[i]), free(tr), - * FreeHMM(hmm). - */ -void -P7Handmodelmaker(MSA *msa, char **dsq, - struct plan7_s **ret_hmm, struct p7trace_s ***ret_tr) -{ - int *matassign; /* MAT state assignments if 1; 1..alen */ - int apos; /* counter for aligned columns */ - - /* Make sure we have all the info about the alignment that we need */ - if (msa->rf == NULL) - Die("Alignment must have RF annotation to hand-build an HMM"); - - /* Allocation */ - matassign = (int *) MallocOrDie (sizeof(int) * (msa->alen+1)); - - /* Determine match assignment from optional annotation - */ - matassign[0] = 0; - for (apos = 0; apos < msa->alen; apos++) - { - matassign[apos+1] = 0; - if (!isgap(msa->rf[apos])) - matassign[apos+1] |= ASSIGN_MATCH; - else - matassign[apos+1] |= ASSIGN_INSERT; - } - - /* Hand matassign off for remainder of model construction - */ - /* print_matassign(matassign, msa->alen); */ - matassign2hmm(msa, dsq, matassign, ret_hmm, ret_tr); - - free(matassign); - return; -} - - -/* Function: P7Fastmodelmaker() - * - * Purpose: Heuristic model construction: - * Construct an HMM from an alignment using the original - * Krogh/Haussler heuristic; any column with more - * symbols in it than a given fraction is assigned to - * match. - * - * NOTE: Fastmodelmaker() will slightly revise the - * alignment if the assignment of columns implies - * DI and ID transitions. - * - * Returns the HMM in counts form (ready for applying Dirichlet - * priors as the next step). Also returns fake traceback - * for each training sequence. - * - * Args: msa - multiple sequence alignment - * dsq - digitized unaligned aseq's - * maxgap - if more gaps than this, column becomes insert. - * ret_hmm - RETURN: counts-form HMM - * ret_tr - RETURN: array of tracebacks for aseq's - * - * Return: (void) - * ret_hmm and ret_tr alloc'ed here; FreeTrace(tr[i]), free(tr), - * FreeHMM(hmm). - */ -void -P7Fastmodelmaker(MSA *msa, char **dsq, float maxgap, - struct plan7_s **ret_hmm, struct p7trace_s ***ret_tr) -{ - int *matassign; /* MAT state assignments if 1; 1..alen */ - int idx; /* counter over sequences */ - int apos; /* counter for aligned columns */ - int ngap; /* number of gaps in a column */ - - /* Allocations: matassign is 1..alen array of bit flags - */ - matassign = (int *) MallocOrDie (sizeof(int) * (msa->alen+1)); - - /* Determine match assignment by counting symbols in columns - */ - matassign[0] = 0; - for (apos = 0; apos < msa->alen; apos++) { - matassign[apos+1] = 0; - - ngap = 0; - for (idx = 0; idx < msa->nseq; idx++) - if (isgap(msa->aseq[idx][apos])) - ngap++; - - if ((float) ngap / (float) msa->nseq > maxgap) - matassign[apos+1] |= ASSIGN_INSERT; - else - matassign[apos+1] |= ASSIGN_MATCH; - } - - /* Once we have matassign calculated, all modelmakers behave - * the same; matassign2hmm() does this stuff (traceback construction, - * trace counting) and sets up ret_hmm and ret_tr. - */ - matassign2hmm(msa, dsq, matassign, ret_hmm, ret_tr); - - free(matassign); - return; -} - - -/* Function: P7Maxmodelmaker() - * - * Purpose: The Unholy Beast of HMM model construction algorithms -- - * maximum a posteriori construction. A tour de force and - * probably overkill. MAP construction for Krogh - * HMM-profiles is fairly straightforward, but MAP construction of - * Plan 7 HMM-profiles is, er, intricate. - * - * Given a multiple alignment, construct an optimal (MAP) model - * architecture. Return a counts-based HMM. - * - * Args: msa - multiple sequence alignment - * dsq - digitized, unaligned seqs - * maxgap - above this, trailing columns are assigned to C - * prior - priors on parameters to use for model construction - * null - random sequence model emissions - * null_p1 - random sequence model p1 transition - * mpri - prior on architecture: probability of new match node - * ret_hmm - RETURN: new hmm (counts form) - * ret_tr - RETURN: array of tracebacks for aseq's - * - * Return: (void) - * ret_hmm and ret_tr (if !NULL) must be free'd by the caller. - */ -void -P7Maxmodelmaker(MSA *msa, char **dsq, float maxgap, - struct p7prior_s *prior, - float *null, float null_p1, float mpri, - struct plan7_s **ret_hmm, struct p7trace_s ***ret_tr) -{ - int idx; /* counter for seqs */ - int i, j; /* positions in alignment */ - int x; /* counter for syms or transitions */ - float **matc; /* count vectors: [1..alen][0..19] */ - float cij[8], tij[8]; /* count and score transit vectors */ - float matp[MAXABET]; /* match emission vector */ - float insp[MAXABET]; /* insert score vector */ - float insc[MAXABET]; /* insert count vector */ - float *sc; /* DP scores [0,1..alen,alen+1] */ - int *tbck; /* traceback ptrs for sc */ - int *matassign; /* match assignments [1..alen] */ - int *insopt; /* number of inserted chars [0..nseq-1] */ - int first, last; /* positions of first and last cols [1..alen] */ - float bm1, bm2; /* estimates for start,internal b->m t's */ - int est_M; /* estimate for the size of the model */ - float t_me; /* estimate for an internal M->E transition */ - float new, bestsc; /* new score, best score so far */ - int code; /* optimization: return code from build_cij() */ - int ngap; /* gap count in a column */ - float wgtsum; /* sum of weights; do not assume it is nseq */ - - /* Allocations - */ - matc = (float **) MallocOrDie (sizeof(float *) * (msa->alen+1)); - sc = (float *) MallocOrDie (sizeof(float) * (msa->alen+2)); - tbck = (int *) MallocOrDie (sizeof(int) * (msa->alen+2)); - matassign = (int *) MallocOrDie (sizeof(int) * (msa->alen+1)); - insopt = (int *) MallocOrDie (sizeof(int) * msa->nseq); - for (i = 0; i < msa->alen; i++) { - matc[i+1] = (float *) MallocOrDie (Alphabet_size * sizeof(float)); - FSet(matc[i+1], Alphabet_size, 0.); - } - - /* Precalculations - */ - for (i = 0; i < msa->alen; i++) - for (idx = 0; idx < msa->nseq; idx++) - if (!isgap(msa->aseq[idx][i])) - P7CountSymbol(matc[i+1], SymbolIndex(msa->aseq[idx][i]), msa->wgt[idx]); - mpri = sreLOG2(mpri); - - FCopy(insp, prior->i[0], Alphabet_size); - FNorm(insp, Alphabet_size); - wgtsum = FSum(msa->wgt, msa->nseq); - for (x = 0; x < Alphabet_size; x++) - insp[x] = sreLOG2(insp[x] / null[x]); - - /* Estimate the relevant special transitions. - */ - est_M = estimate_model_length(msa); - t_me = 0.5 / (float) (est_M-1); - bm1 = 0.5; - bm2 = 0.5 / (float) (est_M-1); - bm1 = sreLOG2(bm1 / null_p1); - bm2 = sreLOG2(bm2 / null_p1); - - /* Estimate the position of the last match-assigned column - * by counting gap frequencies. - */ - maxgap = 0.5; - for (last = msa->alen; last >= 1; last--) { - ngap = 0; - for (idx = 0; idx < msa->nseq; idx++) - if (isgap(msa->aseq[idx][last-1])) ngap++; - if ((float) ngap / (float) msa->nseq <= maxgap) - break; - } - - /* Initialization - */ - sc[last] = 0.; - tbck[last] = 0; - - /* Set ME gaps to '_' - */ - for (idx = 0; idx < msa->nseq; idx++) - for (i = last; i > 0 && isgap(msa->aseq[idx][i-1]); i--) - msa->aseq[idx][i-1] = '_'; - - /* Main recursion moves from right to left. - */ - for (i = last-1; i > 0; i--) { - /* Calculate match emission scores for i */ - FCopy(matp, matc[i], Alphabet_size); - P7PriorifyEmissionVector(matp, prior, prior->mnum, prior->mq, prior->m, NULL); - for (x = 0; x < Alphabet_size; x++) - matp[x] = sreLOG2(matp[x] / null[x]); - - /* Initialize insert counters to zero */ - FSet(insc, Alphabet_size, 0.); - for (idx = 0; idx < msa->nseq; idx++) insopt[idx] = 0; - - sc[i] = -FLT_MAX; - for (j = i+1; j <= last; j++) { - /* build transition matrix for column pair i,j */ - code = build_cij(msa->aseq, msa->nseq, insopt, i, j, msa->wgt, cij); - if (code == -1) break; /* no j to our right can work for us */ - if (code == 1) { - FCopy(tij, cij, 7); - P7PriorifyTransitionVector(tij, prior, prior->tq); - FNorm(tij, 3); - tij[TMM] = sreLOG2(tij[TMM] / null_p1); - tij[TMI] = sreLOG2(tij[TMI] / null_p1); - tij[TMD] = sreLOG2(tij[TMD]); - tij[TIM] = sreLOG2(tij[TIM] / null_p1); - tij[TII] = sreLOG2(tij[TII] / null_p1); - tij[TDM] = sreLOG2(tij[TDM] / null_p1); - tij[TDD] = sreLOG2(tij[TDD]); - /* calculate the score of using this j. */ - new = sc[j] + FDot(tij, cij, 7) + FDot(insp, insc, Alphabet_size); - - SQD_DPRINTF2(("%3d %3d new=%6.2f scj=%6.2f m=%6.2f i=%6.2f t=%6.2f\n", - i, j, new, sc[j], FDot(matp, matc[i], Alphabet_size), - FDot(insp, insc, Alphabet_size), FDot(tij, cij, 7))); - - /* keep it if it's better */ - if (new > sc[i]) { - sc[i] = new; - tbck[i] = j; - } - } - /* bump insc, insopt insert symbol counters */ - FAdd(insc, matc[j], Alphabet_size); - for (idx = 0; idx < msa->nseq; idx++) - if (!isgap(msa->aseq[idx][j-1])) insopt[idx]++; - } - /* add in constant contributions for col i */ - /* note ad hoc scaling of mpri by wgtsum (us. nseq)*/ - sc[i] += FDot(matp, matc[i], Alphabet_size) + mpri * wgtsum; - } /* end loop over start positions i */ - - /* Termination: place the begin state. - * log odds score for S->N->B is all zero except for NB transition, which - * is a constant. So we only have to evaluate BM transitions. - */ - bestsc = -FLT_MAX; - for (i = 1; i <= last; i++) { - new = sc[i]; - for (idx = 0; idx < msa->nseq; idx++) { - if (isgap(msa->aseq[idx][j-1])) - new += bm2; /* internal B->M transition */ - else - new += bm1; /* B->M1 transition */ - } - if (new > bestsc) { - bestsc = new; - first = i; - } - } - - /* Traceback - */ - matassign[0] = 0; - for (i = 1; i <= msa->alen; i++) matassign[i] = ASSIGN_INSERT; - for (i = first; i != 0; i = tbck[i]) { - matassign[i] &= ~ASSIGN_INSERT; - matassign[i] |= ASSIGN_MATCH; - } - - /* Hand matassign off for remainder of model construction - */ - /* print_matassign(matassign, ainfo->alen); */ - matassign2hmm(msa, dsq, matassign, ret_hmm, ret_tr); - - /* Clean up. - */ - for (i = 1; i <= msa->alen; i++) free(matc[i]); - free(matc); - free(sc); - free(tbck); - free(matassign); - free(insopt); -} - - -/* Function: build_cij() - * - * Purpose: Construct a counts vector for transitions between - * column i and column j in a multiple alignment. - * - * '_' gap characters indicate "external" gaps which - * are to be dealt with by B->M and M->E transitions. - * These characters must be placed by a preprocessor. - * - * insopt is an "insert optimization" -- an incrementor - * which keeps track of the number of insert symbols - * between i and j. - * - * Args: aseqs - multiple alignment. [0.nseq-1][0.alen-1] - * nseq - number of seqs in aseqs - * insopt - number of inserts per seq between i/j [0.nseq-1] - * i - i column [1.alen], off by one from aseqs - * j - j column [1.alen], off by one from aseqs - * wgt - per-seq weights [0.nseq-1] - * cij - transition count vectors [0..7] - * - * Return: -1 if an illegal transition was seen for this i/j assignment *and* - * we are guaranteed that any j to the right will also - * have illegal transitions. - * 0 if an illegal transition was seen, but a j further to the - * right may work. - * 1 if all transitions were legal. - */ -static int -build_cij(char **aseqs, int nseq, int *insopt, int i, int j, - float *wgt, float *cij) -{ - int idx; /* counter for seqs */ - - i--; /* make i,j relative to aseqs [0..alen-1] */ - j--; - FSet(cij, 8, 0.); /* zero cij */ - for (idx = 0; idx < nseq; idx++) { - if (insopt[idx] > 0) { - if (isgap(aseqs[idx][i])) return -1; /* D->I prohibited. */ - if (isgap(aseqs[idx][j])) return 0; /* I->D prohibited. */ - cij[TMI] += wgt[idx]; - cij[TII] += (insopt[idx]-1) * wgt[idx]; - cij[TIM] += wgt[idx]; - } else { - if (!isgap(aseqs[idx][i])) { - if (aseqs[idx][j] == '_') ; /* YO! what to do with trailer? */ - else if (isgap(aseqs[idx][j])) cij[TMD] += wgt[idx]; - else cij[TMM] += wgt[idx]; - } else { /* ignores B->E possibility */ - if (aseqs[idx][j] == '_') continue; - else if (isgap(aseqs[idx][j])) cij[TDD] += wgt[idx]; - else cij[TDM] += wgt[idx]; - } - } - } - return 1; -} - - -/* Function: estimate_model_length() - * - * Purpose: Return a decent guess about the length of the model, - * based on the lengths of the sequences. - * - * Algorithm is dumb: use weighted average length. - * - * Don't assume that weights sum to nseq! - */ -static int -estimate_model_length(MSA *msa) -{ - int idx; - float total = 0.; - float wgtsum = 0.; - - for (idx = 0; idx < msa->nseq; idx++) - { - total += msa->wgt[idx] * DealignedLength(msa->aseq[idx]); - wgtsum += msa->wgt[idx]; - } - - return (int) (total / wgtsum); -} - - -/* Function: matassign2hmm() - * - * Purpose: Given an assignment of alignment columns to match vs. - * insert, finish the final part of the model construction - * calculation that is constant between model construction - * algorithms. - * - * Args: msa - multiple sequence alignment - * dsq - digitized unaligned aseq's - * matassign - 1..alen bit flags for column assignments - * ret_hmm - RETURN: counts-form HMM - * ret_tr - RETURN: array of tracebacks for aseq's - * - * Return: (void) - * ret_hmm and ret_tr alloc'ed here for the calling - * modelmaker function. - */ -static void -matassign2hmm(MSA *msa, char **dsq, int *matassign, - struct plan7_s **ret_hmm, struct p7trace_s ***ret_tr) -{ - struct plan7_s *hmm; /* RETURN: new hmm */ - struct p7trace_s **tr; /* fake tracebacks for each seq */ - int M; /* length of new model in match states */ - int idx; /* counter over sequences */ - int apos; /* counter for aligned columns */ - - /* how many match states in the HMM? */ - M = 0; - for (apos = 1; apos <= msa->alen; apos++) { - if (matassign[apos] & ASSIGN_MATCH) - M++; - } - /* delimit N-terminal tail */ - for (apos=1; matassign[apos] & ASSIGN_INSERT && apos <= msa->alen; apos++) - matassign[apos] |= EXTERNAL_INSERT_N; - if (apos <= msa->alen) matassign[apos] |= FIRST_MATCH; - - /* delimit C-terminal tail */ - for (apos=msa->alen; matassign[apos] & ASSIGN_INSERT && apos > 0; apos--) - matassign[apos] |= EXTERNAL_INSERT_C; - if (apos > 0) matassign[apos] |= LAST_MATCH; - - /* print_matassign(matassign, msa->alen); */ - - /* make fake tracebacks for each seq */ - fake_tracebacks(msa->aseq, msa->nseq, msa->alen, matassign, &tr); - /* build model from tracebacks */ - hmm = AllocPlan7(M); - ZeroPlan7(hmm); - for (idx = 0; idx < msa->nseq; idx++) { - /* P7PrintTrace(stdout, tr[idx], NULL, NULL); */ - P7TraceCount(hmm, dsq[idx], msa->wgt[idx], tr[idx]); - } - /* annotate new model */ - annotate_model(hmm, matassign, msa); - - /* Set #=RF line of alignment to reflect our assignment - * of match, delete. matassign is valid from 1..alen and is off - * by one from msa->rf. - */ - if (msa->rf != NULL) free(msa->rf); - msa->rf = (char *) MallocOrDie (sizeof(char) * (msa->alen + 1)); - for (apos = 0; apos < msa->alen; apos++) - msa->rf[apos] = matassign[apos+1] & ASSIGN_MATCH ? 'x' : '.'; - msa->rf[msa->alen] = '\0'; - - /* Cleanup and return. */ - if (ret_tr != NULL) *ret_tr = tr; - else { for (idx = 0; idx < msa->nseq; idx++) P7FreeTrace(tr[idx]); free(tr); } - if (ret_hmm != NULL) *ret_hmm = hmm; else FreePlan7(hmm); - return; -} - - - -/* Function: fake_tracebacks() - * - * Purpose: From a consensus assignment of columns to MAT/INS, construct fake - * tracebacks for each individual sequence. - * - * Note: Fragment tolerant by default. Internal entries are - * B->M_x, instead of B->D1->D2->...->M_x; analogously - * for internal exits. - * - * Args: aseqs - alignment [0..nseq-1][0..alen-1] - * nseq - number of seqs in alignment - * alen - length of alignment in columns - * matassign - assignment of column; [1..alen] (off one from aseqs) - * ret_tr - RETURN: array of tracebacks - * - * Return: (void) - * ret_tr is alloc'ed here. Caller must free. - */ -static void -fake_tracebacks(char **aseq, int nseq, int alen, int *matassign, - struct p7trace_s ***ret_tr) -{ - struct p7trace_s **tr; - int idx; /* counter over sequences */ - int i; /* position in raw sequence (1..L) */ - int k; /* position in HMM */ - int apos; /* position in alignment columns */ - int tpos; /* position in traceback */ - - tr = (struct p7trace_s **) MallocOrDie (sizeof(struct p7trace_s *) * nseq); - - for (idx = 0; idx < nseq; idx++) - { - P7AllocTrace(alen+6, &tr[idx]); /* allow room for S,N,B,E,C,T */ - - /* all traces start with S state... */ - tr[idx]->statetype[0] = STS; - tr[idx]->nodeidx[0] = 0; - tr[idx]->pos[0] = 0; - /* ...and transit to N state; N-term tail - is emitted on N->N transitions */ - tr[idx]->statetype[1] = STN; - tr[idx]->nodeidx[1] = 0; - tr[idx]->pos[1] = 0; - - i = 1; - k = 0; - tpos = 2; - for (apos = 0; apos < alen; apos++) - { - tr[idx]->statetype[tpos] = STBOGUS; /* bogus, deliberately, to debug */ - - if (matassign[apos+1] & FIRST_MATCH) - { /* BEGIN */ - tr[idx]->statetype[tpos] = STB; - tr[idx]->nodeidx[tpos] = 0; - tr[idx]->pos[tpos] = 0; - tpos++; - } - - if (matassign[apos+1] & ASSIGN_MATCH && ! isgap(aseq[idx][apos])) - { /* MATCH */ - k++; /* move to next model pos */ - tr[idx]->statetype[tpos] = STM; - tr[idx]->nodeidx[tpos] = k; - tr[idx]->pos[tpos] = i; - i++; - tpos++; - } - else if (matassign[apos+1] & ASSIGN_MATCH) - { /* DELETE */ - /* being careful about S/W transitions; no B->D transitions */ - k++; /* *always* move on model when ASSIGN_MATCH */ - if (tr[idx]->statetype[tpos-1] != STB) - { - tr[idx]->statetype[tpos] = STD; - tr[idx]->nodeidx[tpos] = k; - tr[idx]->pos[tpos] = 0; - tpos++; - } - } - else if (matassign[apos+1] & EXTERNAL_INSERT_N && - ! isgap(aseq[idx][apos])) - { /* N-TERMINAL TAIL */ - tr[idx]->statetype[tpos] = STN; - tr[idx]->nodeidx[tpos] = 0; - tr[idx]->pos[tpos] = i; - i++; - tpos++; - } - else if (matassign[apos+1] & EXTERNAL_INSERT_C && - ! isgap(aseq[idx][apos])) - { /* C-TERMINAL TAIL */ - tr[idx]->statetype[tpos] = STC; - tr[idx]->nodeidx[tpos] = 0; - tr[idx]->pos[tpos] = i; - i++; - tpos++; - } - else if (! isgap(aseq[idx][apos])) - { /* INSERT */ - tr[idx]->statetype[tpos] = STI; - tr[idx]->nodeidx[tpos] = k; - tr[idx]->pos[tpos] = i; - i++; - tpos++; - } - - if (matassign[apos+1] & LAST_MATCH) - { /* END */ - /* be careful about S/W transitions; may need to roll - * back over some D's because there's no D->E transition - */ - while (tr[idx]->statetype[tpos-1] == STD) - tpos--; - tr[idx]->statetype[tpos] = STE; - tr[idx]->nodeidx[tpos] = 0; - tr[idx]->pos[tpos] = 0; - tpos++; - /* and then transit E->C; - alignments that use J are undefined; - C-term tail is emitted on C->C transitions */ - tr[idx]->statetype[tpos] = STC; - tr[idx]->nodeidx[tpos] = 0; - tr[idx]->pos[tpos] = 0; - tpos++; - } - } - /* all traces end with T state */ - tr[idx]->statetype[tpos] = STT; - tr[idx]->nodeidx[tpos] = 0; - tr[idx]->pos[tpos] = 0; - tr[idx]->tlen = ++tpos; - /* deal with DI, ID transitions */ - /* k == M here */ - trace_doctor(tr[idx], k, NULL, NULL); - - } /* end for sequence # idx */ - - *ret_tr = tr; - return; -} - -/* Function: trace_doctor() - * - * Purpose: Plan 7 disallows D->I and I->D "chatter" transitions. - * However, these transitions may be implied by many - * alignments for hand- or heuristic- built HMMs. - * trace_doctor() collapses I->D or D->I into a - * single M position in the trace. - * Similarly, B->I and I->E transitions may be implied - * by an alignment. - * - * trace_doctor does not examine any scores when it does - * this. In ambiguous situations (D->I->D) the symbol - * will be pulled arbitrarily to the left, regardless - * of whether that's the best column to put it in or not. - * - * Args: tr - trace to doctor - * M - length of model that traces are for - * ret_ndi - number of DI transitions doctored - * ret_nid - number of ID transitions doctored - * - * Return: (void) - * tr is modified - */ -static void -trace_doctor(struct p7trace_s *tr, int mlen, int *ret_ndi, int *ret_nid) -{ - int opos; /* position in old trace */ - int npos; /* position in new trace (<= opos) */ - int ndi, nid; /* number of DI, ID transitions doctored */ - - /* overwrite the trace from left to right */ - ndi = nid = 0; - opos = npos = 0; - while (opos < tr->tlen) { - /* fix implied D->I transitions; D transforms to M, I pulled in */ - if (tr->statetype[opos] == STD && tr->statetype[opos+1] == STI) { - tr->statetype[npos] = STM; - tr->nodeidx[npos] = tr->nodeidx[opos]; /* D transforms to M */ - tr->pos[npos] = tr->pos[opos+1]; /* insert char moves back */ - opos += 2; - npos += 1; - ndi++; - } /* fix implied I->D transitions; D transforms to M, I is pushed in */ - else if (tr->statetype[opos]== STI && tr->statetype[opos+1]== STD) { - tr->statetype[npos] = STM; - tr->nodeidx[npos] = tr->nodeidx[opos+1];/* D transforms to M */ - tr->pos[npos] = tr->pos[opos]; /* insert char moves up */ - opos += 2; - npos += 1; - nid++; - } /* fix implied B->I transitions; pull I back to its M */ - else if (tr->statetype[opos]== STI && tr->statetype[opos-1]== STB) { - tr->statetype[npos] = STM; - tr->nodeidx[npos] = tr->nodeidx[opos]; /* offending I transforms to M */ - tr->pos[npos] = tr->pos[opos]; - opos++; - npos++; - } /* fix implied I->E transitions; push I to next M */ - else if (tr->statetype[opos]== STI && tr->statetype[opos+1]== STE) { - tr->statetype[npos] = STM; - tr->nodeidx[npos] = tr->nodeidx[opos]+1;/* offending I transforms to M */ - tr->pos[npos] = tr->pos[opos]; - opos++; - npos++; - } /* rare: N-N-B-E becomes N-B-M_1-E (swap B,N) */ - else if (tr->statetype[opos]==STB && tr->statetype[opos+1]==STE - && tr->statetype[opos-1]==STN && tr->pos[opos-1] > 0) { - tr->statetype[npos] = STM; - tr->nodeidx[npos] = 1; - tr->pos[npos] = tr->pos[opos-1]; - tr->statetype[npos-1] = STB; - tr->nodeidx[npos-1] = 0; - tr->pos[npos-1] = 0; - opos++; - npos++; - } /* rare: B-E-C-C-x becomes B-M_M-E-C-x (swap E,C) */ - else if (tr->statetype[opos]==STE && tr->statetype[opos-1]==STB - && tr->statetype[opos+1]==STC - && tr->statetype[opos+2]==STC) { - tr->statetype[npos] = STM; - tr->nodeidx[npos] = mlen; - tr->pos[npos] = tr->pos[opos+2]; - tr->statetype[npos+1] = STE; - tr->nodeidx[npos+1] = 0; - tr->pos[npos+1] = 0; - tr->statetype[npos+2] = STC; /* first C must be a nonemitter */ - tr->nodeidx[npos+2] = 0; - tr->pos[npos+2] = 0; - opos+=3; - npos+=3; - } /* everything else is just copied */ - else { - tr->statetype[npos] = tr->statetype[opos]; - tr->nodeidx[npos] = tr->nodeidx[opos]; - tr->pos[npos] = tr->pos[opos]; - opos++; - npos++; - } - } - tr->tlen = npos; - - if (ret_ndi != NULL) *ret_ndi = ndi; - if (ret_nid != NULL) *ret_nid = nid; - return; -} - - -/* Function: annotate_model() - * - * Purpose: Add rf, cs optional annotation to a new model. - * - * Args: hmm - new model - * matassign - which alignment columns are MAT; [1..alen] - * msa - alignment, including annotation to transfer - * - * Return: (void) - */ -static void -annotate_model(struct plan7_s *hmm, int *matassign, MSA *msa) -{ - int apos; /* position in matassign, 1.alen */ - int k; /* position in model, 1.M */ - char *pri; /* X-PRM, X-PRI, X-PRT annotation */ - - /* Transfer reference coord annotation from alignment, - * if available - */ - if (msa->rf != NULL) { - hmm->rf[0] = ' '; - for (apos = k = 1; apos <= msa->alen; apos++) - if (matassign[apos] & ASSIGN_MATCH) /* ainfo is off by one from HMM */ - hmm->rf[k++] = (msa->rf[apos-1] == ' ') ? '.' : msa->rf[apos-1]; - hmm->rf[k] = '\0'; - hmm->flags |= PLAN7_RF; - } - - /* Transfer consensus structure annotation from alignment, - * if available - */ - if (msa->ss_cons != NULL) { - hmm->cs[0] = ' '; - for (apos = k = 1; apos <= msa->alen; apos++) - if (matassign[apos] & ASSIGN_MATCH) - hmm->cs[k++] = (msa->ss_cons[apos-1] == ' ') ? '.' : msa->ss_cons[apos-1]; - hmm->cs[k] = '\0'; - hmm->flags |= PLAN7_CS; - } - - /* Transfer surface accessibility annotation from alignment, - * if available - */ - if (msa->sa_cons != NULL) { - hmm->ca[0] = ' '; - for (apos = k = 1; apos <= msa->alen; apos++) - if (matassign[apos] & ASSIGN_MATCH) - hmm->ca[k++] = (msa->sa_cons[apos-1] == ' ') ? '.' : msa->sa_cons[apos-1]; - hmm->ca[k] = '\0'; - hmm->flags |= PLAN7_CA; - } - - /* Store the alignment map - */ - for (apos = k = 1; apos <= msa->alen; apos++) - if (matassign[apos] & ASSIGN_MATCH) - hmm->map[k++] = apos; - hmm->flags |= PLAN7_MAP; - - /* Translate and transfer X-PRM annotation. - * 0-9,[a-zA-Z] are legal; translate as 0-9,10-35 into hmm->mpri. - * Any other char is translated as -1, and this will be interpreted - * as a flag that means "unknown", e.g. use the normal mixture Dirichlet - * procedure for this column. - */ - if ((pri = MSAGetGC(msa, "X-PRM")) != NULL) - { - hmm->mpri = MallocOrDie(sizeof(int) * (hmm->M+1)); - for (apos = k = 1; apos <= msa->alen; apos++) - if (matassign[apos] & ASSIGN_MATCH) - { - if (isdigit((int) pri[apos-1])) hmm->mpri[k] = pri[apos-1] - '0'; - else if (islower((int) pri[apos-1])) hmm->mpri[k] = pri[apos-1] - 'a' + 10; - else if (isupper((int) pri[apos-1])) hmm->mpri[k] = pri[apos-1] - 'A' + 10; - else hmm->mpri[k] = -1; - k++; - } - } - /* And again for X-PRI annotation on insert priors: - */ - if ((pri = MSAGetGC(msa, "X-PRI")) != NULL) - { - hmm->ipri = MallocOrDie(sizeof(int) * (hmm->M+1)); - for (apos = k = 1; apos <= msa->alen; apos++) - if (matassign[apos] & ASSIGN_MATCH) - { - if (isdigit((int) pri[apos-1])) hmm->ipri[k] = pri[apos-1] - '0'; - else if (islower((int) pri[apos-1])) hmm->ipri[k] = pri[apos-1] - 'a' + 10; - else if (isupper((int) pri[apos-1])) hmm->ipri[k] = pri[apos-1] - 'A' + 10; - else hmm->ipri[k] = -1; - k++; - } - } - /* And one last time for X-PRT annotation on transition priors: - */ - if ((pri = MSAGetGC(msa, "X-PRT")) != NULL) - { - hmm->tpri = MallocOrDie(sizeof(int) * (hmm->M+1)); - for (apos = k = 1; apos <= msa->alen; apos++) - if (matassign[apos] & ASSIGN_MATCH) - { - if (isdigit((int) pri[apos-1])) hmm->tpri[k] = pri[apos-1] - '0'; - else if (islower((int) pri[apos-1])) hmm->tpri[k] = pri[apos-1] - 'a' + 10; - else if (isupper((int) pri[apos-1])) hmm->tpri[k] = pri[apos-1] - 'A' + 10; - else hmm->tpri[k] = -1; - k++; - } - } - -} - -static void -print_matassign(int *matassign, int alen) -{ - int apos; - - for (apos = 0; apos <= alen; apos++) { - printf("%3d %c %c %c\n", - apos, - (matassign[apos] & ASSIGN_MATCH) ? 'x':' ', - (matassign[apos] & FIRST_MATCH || matassign[apos] & LAST_MATCH) ? '<' : ' ', - (matassign[apos] & EXTERNAL_INSERT_N || - matassign[apos] & EXTERNAL_INSERT_C) ? '|':' '); - } -} diff --git a/forester/archive/RIO/others/hmmer/src/plan7.c b/forester/archive/RIO/others/hmmer/src/plan7.c deleted file mode 100644 index 6f5eed1..0000000 --- a/forester/archive/RIO/others/hmmer/src/plan7.c +++ /dev/null @@ -1,1036 +0,0 @@ -/************************************************************ - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - ************************************************************/ - - -/* plan7.c - * SRE, Sat Nov 16 14:19:56 1996 - * - * Support for Plan 7 HMM data structure, plan7_s. - */ - -#include -#include -#include -#include - -#include "funcs.h" -#include "config.h" -#include "structs.h" -#include "squid.h" - -/* Functions: AllocPlan7(), AllocPlan7Shell(), AllocPlan7Body(), FreePlan7() - * - * Purpose: Allocate or free a Plan7 HMM structure. - * Can either allocate all at one (AllocPlan7()) or - * in two steps (AllocPlan7Shell(), AllocPlan7Body()). - * The two step method is used in hmmio.c where we start - * parsing the header of an HMM file but don't - * see the size of the model 'til partway thru the header. - */ -struct plan7_s * -AllocPlan7(int M) -{ - struct plan7_s *hmm; - - hmm = AllocPlan7Shell(); - AllocPlan7Body(hmm, M); - return hmm; -} -struct plan7_s * -AllocPlan7Shell(void) -{ - struct plan7_s *hmm; - - hmm = (struct plan7_s *) MallocOrDie (sizeof(struct plan7_s)); - hmm->M = 0; - - hmm->name = NULL; - hmm->acc = NULL; - hmm->desc = NULL; - hmm->rf = NULL; - hmm->cs = NULL; - hmm->ca = NULL; - hmm->comlog = NULL; - hmm->nseq = 0; - hmm->ctime = NULL; - hmm->map = NULL; - hmm->checksum = 0; - - hmm->tpri = NULL; - hmm->mpri = NULL; - hmm->ipri = NULL; - - hmm->ga1 = hmm->ga2 = 0.0; - hmm->tc1 = hmm->tc2 = 0.0; - hmm->nc1 = hmm->nc2 = 0.0; - - hmm->t = NULL; - hmm->tsc = NULL; - hmm->mat = NULL; - hmm->ins = NULL; - hmm->msc = NULL; - hmm->isc = NULL; - - hmm->begin = NULL; - hmm->bsc = NULL; - hmm->end = NULL; - hmm->esc = NULL; - /* DNA translation is not enabled by default */ - hmm->dnam = NULL; - hmm->dnai = NULL; - hmm->dna2 = -INFTY; - hmm->dna4 = -INFTY; - /* statistical parameters set to innocuous empty values */ - hmm->mu = 0.; - hmm->lambda = 0.; - - hmm->flags = 0; - return hmm; -} -void -AllocPlan7Body(struct plan7_s *hmm, int M) -{ - int k, x; - - hmm->M = M; - - hmm->rf = MallocOrDie ((M+2) * sizeof(char)); - hmm->cs = MallocOrDie ((M+2) * sizeof(char)); - hmm->ca = MallocOrDie ((M+2) * sizeof(char)); - hmm->map = MallocOrDie ((M+1) * sizeof(int)); - - hmm->t = MallocOrDie (M * sizeof(float *)); - hmm->tsc = MallocOrDie (M * sizeof(int *)); - hmm->mat = MallocOrDie ((M+1) * sizeof(float *)); - hmm->ins = MallocOrDie (M * sizeof(float *)); - hmm->msc = MallocOrDie (MAXCODE * sizeof(int *)); - hmm->isc = MallocOrDie (MAXCODE * sizeof(int *)); - hmm->t[0] = MallocOrDie ((7*M) * sizeof(float)); - hmm->tsc[0] = MallocOrDie ((7*M) * sizeof(int)); - hmm->mat[0] = MallocOrDie ((MAXABET*(M+1)) * sizeof(float)); - hmm->ins[0] = MallocOrDie ((MAXABET*M) * sizeof(float)); - hmm->msc[0] = MallocOrDie ((MAXCODE*(M+1)) * sizeof(int)); - hmm->isc[0] = MallocOrDie ((MAXCODE*M) * sizeof(int)); - - /* note allocation strategy for important 2D arrays -- trying - * to keep locality as much as possible, cache efficiency etc. - */ - for (k = 1; k <= M; k++) { - hmm->mat[k] = hmm->mat[0] + k * MAXABET; - if (k < M) { - hmm->ins[k] = hmm->ins[0] + k * MAXABET; - hmm->t[k] = hmm->t[0] + k * 7; - hmm->tsc[k] = hmm->tsc[0] + k * 7; - } - } - for (x = 1; x < MAXCODE; x++) { - hmm->msc[x] = hmm->msc[0] + x * (M+1); - hmm->isc[x] = hmm->isc[0] + x * M; - } - /* tsc[0] is used as a boundary condition sometimes [Viterbi()], - * so set to -inf always. - */ - for (x = 0; x < 7; x++) - hmm->tsc[0][x] = -INFTY; - - hmm->begin = MallocOrDie ((M+1) * sizeof(float)); - hmm->bsc = MallocOrDie ((M+1) * sizeof(int)); - hmm->end = MallocOrDie ((M+1) * sizeof(float)); - hmm->esc = MallocOrDie ((M+1) * sizeof(int)); - - return; -} - - -void -FreePlan7(struct plan7_s *hmm) -{ - if (hmm->name != NULL) free(hmm->name); - if (hmm->desc != NULL) free(hmm->desc); - if (hmm->rf != NULL) free(hmm->rf); - if (hmm->cs != NULL) free(hmm->cs); - if (hmm->ca != NULL) free(hmm->ca); - if (hmm->comlog != NULL) free(hmm->comlog); - if (hmm->ctime != NULL) free(hmm->ctime); - if (hmm->map != NULL) free(hmm->map); - if (hmm->tpri != NULL) free(hmm->tpri); - if (hmm->mpri != NULL) free(hmm->mpri); - if (hmm->ipri != NULL) free(hmm->ipri); - if (hmm->bsc != NULL) free(hmm->bsc); - if (hmm->begin != NULL) free(hmm->begin); - if (hmm->esc != NULL) free(hmm->esc); - if (hmm->end != NULL) free(hmm->end); - if (hmm->msc != NULL) free(hmm->msc[0]); - if (hmm->mat != NULL) free(hmm->mat[0]); - if (hmm->isc != NULL) free(hmm->isc[0]); - if (hmm->ins != NULL) free(hmm->ins[0]); - if (hmm->tsc != NULL) free(hmm->tsc[0]); - if (hmm->t != NULL) free(hmm->t[0]); - if (hmm->msc != NULL) free(hmm->msc); - if (hmm->mat != NULL) free(hmm->mat); - if (hmm->isc != NULL) free(hmm->isc); - if (hmm->ins != NULL) free(hmm->ins); - if (hmm->tsc != NULL) free(hmm->tsc); - if (hmm->t != NULL) free(hmm->t); - if (hmm->dnam != NULL) free(hmm->dnam); - if (hmm->dnai != NULL) free(hmm->dnai); - free(hmm); -} - -/* Function: ZeroPlan7() - * - * Purpose: Zeros the counts/probabilities fields in a model. - * Leaves null model untouched. - */ -void -ZeroPlan7(struct plan7_s *hmm) -{ - int k; - for (k = 1; k < hmm->M; k++) - { - FSet(hmm->t[k], 7, 0.); - FSet(hmm->mat[k], Alphabet_size, 0.); - FSet(hmm->ins[k], Alphabet_size, 0.); - } - FSet(hmm->mat[hmm->M], Alphabet_size, 0.); - hmm->tbd1 = 0.; - FSet(hmm->begin+1, hmm->M, 0.); - FSet(hmm->end+1, hmm->M, 0.); - for (k = 0; k < 4; k++) - FSet(hmm->xt[k], 2, 0.); - hmm->flags &= ~PLAN7_HASBITS; /* invalidates scores */ - hmm->flags &= ~PLAN7_HASPROB; /* invalidates probabilities */ -} - - -/* Function: Plan7SetName() - * - * Purpose: Change the name of a Plan7 HMM. Convenience function. - * - * Note: Trailing whitespace and \n's are chopped. - */ -void -Plan7SetName(struct plan7_s *hmm, char *name) -{ - if (hmm->name != NULL) free(hmm->name); - hmm->name = Strdup(name); - StringChop(hmm->name); -} -/* Function: Plan7SetAccession() - * - * Purpose: Change the accession number of a Plan7 HMM. Convenience function. - * - * Note: Trailing whitespace and \n's are chopped. - */ -void -Plan7SetAccession(struct plan7_s *hmm, char *acc) -{ - if (hmm->acc != NULL) free(hmm->acc); - hmm->acc = Strdup(acc); - StringChop(hmm->acc); - hmm->flags |= PLAN7_ACC; -} - -/* Function: Plan7SetDescription() - * - * Purpose: Change the description line of a Plan7 HMM. Convenience function. - * - * Note: Trailing whitespace and \n's are chopped. - */ -void -Plan7SetDescription(struct plan7_s *hmm, char *desc) -{ - if (hmm->desc != NULL) free(hmm->desc); - hmm->desc = Strdup(desc); - StringChop(hmm->desc); - hmm->flags |= PLAN7_DESC; -} - -/* Function: Plan7ComlogAppend() - * Date: SRE, Wed Oct 29 09:57:30 1997 [TWA 721 over Greenland] - * - * Purpose: Concatenate command line options and append to the - * command line log. - */ -void -Plan7ComlogAppend(struct plan7_s *hmm, int argc, char **argv) -{ - int len; - int i; - - /* figure out length of command line, w/ spaces and \n */ - len = argc; - for (i = 0; i < argc; i++) - len += strlen(argv[i]); - - /* allocate */ - if (hmm->comlog != NULL) - { - len += strlen(hmm->comlog); - hmm->comlog = ReallocOrDie(hmm->comlog, sizeof(char)* (len+1)); - } - else - { - hmm->comlog = MallocOrDie(sizeof(char)* (len+1)); - *(hmm->comlog) = '\0'; /* need this to make strcat work */ - } - - /* append */ - strcat(hmm->comlog, "\n"); - for (i = 0; i < argc; i++) - { - strcat(hmm->comlog, argv[i]); - if (i < argc-1) strcat(hmm->comlog, " "); - } -} - -/* Function: Plan7SetCtime() - * Date: SRE, Wed Oct 29 11:53:19 1997 [TWA 721 over the Atlantic] - * - * Purpose: Set the ctime field in a new HMM to the current time. - */ -void -Plan7SetCtime(struct plan7_s *hmm) -{ - time_t date = time(NULL); - if (hmm->ctime != NULL) free(hmm->ctime); - hmm->ctime = Strdup(ctime(&date)); - StringChop(hmm->ctime); -} - - -/* Function: Plan7SetNullModel() - * - * Purpose: Set the null model section of an HMM. - * Convenience function. - */ -void -Plan7SetNullModel(struct plan7_s *hmm, float null[MAXABET], float p1) -{ - int x; - for (x = 0; x < Alphabet_size; x++) - hmm->null[x] = null[x]; - hmm->p1 = p1; -} - - -/* Function: P7Logoddsify() - * - * Purpose: Take an HMM with valid probabilities, and - * fill in the integer log-odds score section of the model. - * - * Notes on log-odds scores: - * type of parameter probability score - * ----------------- ----------- ------ - * any emission p_x log_2 p_x/null_x - * N,J,C /assume/ p_x = null_x so /always/ score zero. - * transition to emitters t_x log_2 t_x/p1 - * (M,I; N,C; J) - * NN and CC loops are often equal to p1, so usu. score zero. - * C->T transition t_x log_2 t_x/p2 - * often zero, usu. C->T = p2. - * all other transitions t_x log_2 t_x - * (no null model counterpart, so null prob is 1) - * - * Notes on entry/exit scores, B->M and M->E: - * The probability form model includes delete states 1 and M. - * these states are removed from a search form model to - * prevent B->D...D->E->J->B mute cycles, which would complicate - * dynamic programming algorithms. The data-independent - * S/W B->M and M->E transitions are folded together with - * data-dependent B->D...D->M and M->D...D->E paths. - * - * This process is referred to in the code as "wing folding" - * or "wing retraction"... the analogy is to a swept-wing - * fighter in landing vs. high speed flight configuration. - * - * Note on Viterbi vs. forward flag: - * Wing retraction must take forward vs. Viterbi - * into account. If forward, sum two paths; if Viterbi, take - * max. I tried to slide this by as a sum, without - * the flag, but Alex detected it as a bug, because you can - * then find cases where the Viterbi score doesn't match - * the P7TraceScore(). - * - * Args: hmm - the hmm to calculate scores in. - * viterbi_mode - TRUE to fold wings in Viterbi configuration. - * - * Return: (void) - * hmm scores are filled in. - */ -void -P7Logoddsify(struct plan7_s *hmm, int viterbi_mode) -{ - int k; /* counter for model position */ - int x; /* counter for symbols */ - float accum; - float tbm, tme; - - if (hmm->flags & PLAN7_HASBITS) return; - - /* Symbol emission scores - */ - for (k = 1; k <= hmm->M; k++) - { - /* match/insert emissions in main model */ - for (x = 0; x < Alphabet_size; x++) - { - hmm->msc[x][k] = Prob2Score(hmm->mat[k][x], hmm->null[x]); - if (k < hmm->M) - hmm->isc[x][k] = Prob2Score(hmm->ins[k][x], hmm->null[x]); - } - /* degenerate match/insert emissions */ - for (x = Alphabet_size; x < Alphabet_iupac; x++) - { - hmm->msc[x][k] = DegenerateSymbolScore(hmm->mat[k], hmm->null, x); - if (k < hmm->M) - hmm->isc[x][k] = DegenerateSymbolScore(hmm->ins[k], hmm->null, x); - } - } - - /* State transitions. - * - * A note on "folding" of D_1 and D_M. - * These two delete states are folded out of search form models - * in order to prevent null cycles in the dynamic programming - * algorithms (see code below). However, we use their log transitions - * when we save the model! So the following log transition probs - * are used *only* in save files, *never* in search algorithms: - * log (tbd1), D1 -> M2, D1 -> D2 - * Mm-1 -> Dm, Dm-1 -> Dm - * - * In a search algorithm, these have to be interpreted as -INFTY - * because their contributions are folded into bsc[] and esc[] - * entry/exit scores. They can't be set to -INFTY here because - * we need them in save files. - */ - for (k = 1; k < hmm->M; k++) - { - hmm->tsc[k][TMM] = Prob2Score(hmm->t[k][TMM], hmm->p1); - hmm->tsc[k][TMI] = Prob2Score(hmm->t[k][TMI], hmm->p1); - hmm->tsc[k][TMD] = Prob2Score(hmm->t[k][TMD], 1.0); - hmm->tsc[k][TIM] = Prob2Score(hmm->t[k][TIM], hmm->p1); - hmm->tsc[k][TII] = Prob2Score(hmm->t[k][TII], hmm->p1); - hmm->tsc[k][TDM] = Prob2Score(hmm->t[k][TDM], hmm->p1); - hmm->tsc[k][TDD] = Prob2Score(hmm->t[k][TDD], 1.0); - } - - /* B->M entry transitions. Note how D_1 is folded out. - * M1 is just B->M1 - * M2 is sum (or max) of B->M2 and B->D1->M2 - * M_k is sum (or max) of B->M_k and B->D1...D_k-1->M_k - * These have to be done in log space, else you'll get - * underflow errors; and we also have to watch for log(0). - * A little sloppier than it probably has to be; historically, - * doing in this in log space was in response to a bug report. - */ - accum = hmm->tbd1 > 0.0 ? log(hmm->tbd1) : -9999.; - for (k = 1; k <= hmm->M; k++) - { - tbm = hmm->begin[k] > 0. ? log(hmm->begin[k]) : -9999.; /* B->M_k part */ - - /* B->D1...D_k-1->M_k part we get from accum*/ - if (k > 1 && accum > -9999.) - { - if (hmm->t[k-1][TDM] > 0.0) - { - if (viterbi_mode) tbm = MAX(tbm, accum + log(hmm->t[k-1][TDM])); - else tbm = LogSum(tbm, accum + log(hmm->t[k-1][TDM])); - } - - accum = (hmm->t[k-1][TDD] > 0.0) ? accum + log(hmm->t[k-1][TDD]) : -9999.; - } - /* Convert from log_e to scaled integer log_2 odds. */ - if (tbm > -9999.) - hmm->bsc[k] = (int) floor(0.5 + INTSCALE * 1.44269504 * (tbm - log(hmm->p1))); - else - hmm->bsc[k] = -INFTY; - } - - /* M->E exit transitions. Note how D_M is folded out. - * M_M is 1 by definition - * M_M-1 is sum of M_M-1->E and M_M-1->D_M->E, where D_M->E is 1 by definition - * M_k is sum of M_k->E and M_k->D_k+1...D_M->E - * Must be done in log space to avoid underflow errors. - * A little sloppier than it probably has to be; historically, - * doing in this in log space was in response to a bug report. - */ - hmm->esc[hmm->M] = 0; - accum = 0.; - for (k = hmm->M-1; k >= 1; k--) - { - tme = hmm->end[k] > 0. ? log(hmm->end[k]) : -9999.; - if (accum > -9999.) - { - if (hmm->t[k][TMD] > 0.0) - { - if (viterbi_mode) tme = MAX(tme, accum + log(hmm->t[k][TMD])); - else tme = LogSum(tme, accum + log(hmm->t[k][TMD])); - } - accum = (hmm->t[k][TDD] > 0.0) ? accum + log(hmm->t[k][TDD]) : -9999.; - } - /* convert from log_e to scaled integer log odds. */ - hmm->esc[k] = (tme > -9999.) ? (int) floor(0.5 + INTSCALE * 1.44269504 * tme) : -INFTY; - } - - /* special transitions */ - hmm->xsc[XTN][LOOP] = Prob2Score(hmm->xt[XTN][LOOP], hmm->p1); - hmm->xsc[XTN][MOVE] = Prob2Score(hmm->xt[XTN][MOVE], 1.0); - hmm->xsc[XTE][LOOP] = Prob2Score(hmm->xt[XTE][LOOP], 1.0); - hmm->xsc[XTE][MOVE] = Prob2Score(hmm->xt[XTE][MOVE], 1.0); - hmm->xsc[XTC][LOOP] = Prob2Score(hmm->xt[XTC][LOOP], hmm->p1); - hmm->xsc[XTC][MOVE] = Prob2Score(hmm->xt[XTC][MOVE], 1.-hmm->p1); - hmm->xsc[XTJ][LOOP] = Prob2Score(hmm->xt[XTJ][LOOP], hmm->p1); - hmm->xsc[XTJ][MOVE] = Prob2Score(hmm->xt[XTJ][MOVE], 1.0); - - hmm->flags |= PLAN7_HASBITS; /* raise the log-odds ready flag */ -} - - - -/* Function: Plan7Renormalize() - * - * Purpose: Take an HMM in counts form, and renormalize - * all of its probability vectors. Also enforces - * Plan7 restrictions on nonexistent transitions. - * - * Args: hmm - the model to renormalize. - * - * Return: (void) - * hmm is changed. - */ -void -Plan7Renormalize(struct plan7_s *hmm) -{ - int k; /* counter for model position */ - int st; /* counter for special states */ - float d; /* denominator */ - - /* match emissions */ - for (k = 1; k <= hmm->M; k++) - FNorm(hmm->mat[k], Alphabet_size); - /* insert emissions */ - for (k = 1; k < hmm->M; k++) - FNorm(hmm->ins[k], Alphabet_size); - /* begin transitions */ - d = FSum(hmm->begin+1, hmm->M) + hmm->tbd1; - FScale(hmm->begin+1, hmm->M, 1./d); - hmm->tbd1 /= d; - /* main model transitions */ - for (k = 1; k < hmm->M; k++) - { - d = FSum(hmm->t[k], 3) + hmm->end[k]; - FScale(hmm->t[k], 3, 1./d); - hmm->end[k] /= d; - - FNorm(hmm->t[k]+3, 2); /* insert */ - FNorm(hmm->t[k]+5, 2); /* delete */ - } - /* null model emissions */ - FNorm(hmm->null, Alphabet_size); - /* special transitions */ - for (st = 0; st < 4; st++) - FNorm(hmm->xt[st], 2); - /* enforce nonexistent transitions */ - /* (is this necessary?) */ - hmm->t[0][TDM] = hmm->t[0][TDD] = 0.0; - - hmm->flags &= ~PLAN7_HASBITS; /* clear the log-odds ready flag */ - hmm->flags |= PLAN7_HASPROB; /* set the probabilities OK flag */ -} - - -/* Function: Plan7RenormalizeExits() - * Date: SRE, Fri Aug 14 11:22:19 1998 [St. Louis] - * - * Purpose: Renormalize just the match state transitions; - * for instance, after a Config() function has - * modified the exit distribution. - * - * Args: hmm - hmm to renormalize - * - * Returns: void - */ -void -Plan7RenormalizeExits(struct plan7_s *hmm) -{ - int k; - float d; - - for (k = 1; k < hmm->M; k++) - { - d = FSum(hmm->t[k], 3); - FScale(hmm->t[k], 3, 1./(d + d*hmm->end[k])); - } -} - - -/***************************************************************** - * Plan7 configuration functions - * The following few functions are the Plan7 equivalent of choosing - * different alignment styles (fully local, fully global, global/local, - * multihit, etc.) - * - * There is (at least) one constraint worth noting. - * If you want per-domain scores to sum up to per-sequence scores, - * then one of the following two sets of conditions must be met: - * - * 1) t(E->J) = 0 - * e.g. no multidomain hits - * - * 2) t(N->N) = t(C->C) = t(J->J) = hmm->p1 - * e.g. unmatching sequence scores zero, and - * N->B first-model score is equal to J->B another-model score. - * - * These constraints are obeyed in the default Config() functions below, - * but in the future (when HMM editing may be allowed) we'll have - * to remember this. Non-equality of the summed domain scores and - * the total sequence score is a really easy "red flag" for people to - * notice and report as a bug, even if it may make probabilistic - * sense not to meet either constraint for certain modeling problems. - ***************************************************************** - */ - -/* Function: Plan7NakedConfig() - * - * Purpose: Set the alignment-independent, algorithm-dependent parameters - * of a Plan7 model so that no special states (N,C,J) emit anything: - * one simple, full global pass through the model. - * - * Args: hmm - the plan7 model - * - * Return: (void) - * The HMM is modified; algorithm dependent parameters are set. - * Previous scores are invalidated if they existed. - */ -void -Plan7NakedConfig(struct plan7_s *hmm) -{ - hmm->xt[XTN][MOVE] = 1.; /* disallow N-terminal tail */ - hmm->xt[XTN][LOOP] = 0.; - hmm->xt[XTE][MOVE] = 1.; /* only 1 domain/sequence ("global" alignment) */ - hmm->xt[XTE][LOOP] = 0.; - hmm->xt[XTC][MOVE] = 1.; /* disallow C-terminal tail */ - hmm->xt[XTC][LOOP] = 0.; - hmm->xt[XTJ][MOVE] = 0.; /* J state unused */ - hmm->xt[XTJ][LOOP] = 1.; - FSet(hmm->begin+2, hmm->M-1, 0.); /* disallow internal entries. */ - hmm->begin[1] = 1. - hmm->tbd1; - FSet(hmm->end+1, hmm->M-1, 0.); /* disallow internal exits. */ - hmm->end[hmm->M] = 1.; - Plan7RenormalizeExits(hmm); - hmm->flags &= ~PLAN7_HASBITS; /* reconfig invalidates log-odds scores */ -} - -/* Function: Plan7GlobalConfig() - * - * Purpose: Set the alignment-independent, algorithm-dependent parameters - * of a Plan7 model to global (Needleman/Wunsch) configuration. - * - * Like a non-looping hmmls, since we actually allow flanking - * N and C terminal sequence. - * - * Args: hmm - the plan7 model - * - * Return: (void) - * The HMM is modified; algorithm dependent parameters are set. - * Previous scores are invalidated if they existed. - */ -void -Plan7GlobalConfig(struct plan7_s *hmm) -{ - hmm->xt[XTN][MOVE] = 1. - hmm->p1; /* allow N-terminal tail */ - hmm->xt[XTN][LOOP] = hmm->p1; - hmm->xt[XTE][MOVE] = 1.; /* only 1 domain/sequence ("global" alignment) */ - hmm->xt[XTE][LOOP] = 0.; - hmm->xt[XTC][MOVE] = 1. - hmm->p1; /* allow C-terminal tail */ - hmm->xt[XTC][LOOP] = hmm->p1; - hmm->xt[XTJ][MOVE] = 0.; /* J state unused */ - hmm->xt[XTJ][LOOP] = 1.; - FSet(hmm->begin+2, hmm->M-1, 0.); /* disallow internal entries. */ - hmm->begin[1] = 1. - hmm->tbd1; - FSet(hmm->end+1, hmm->M-1, 0.); /* disallow internal exits. */ - hmm->end[hmm->M] = 1.; - Plan7RenormalizeExits(hmm); - hmm->flags &= ~PLAN7_HASBITS; /* reconfig invalidates log-odds scores */ -} - -/* Function: Plan7LSConfig() - * - * Purpose: Set the alignment independent parameters of a Plan7 model - * to hmmls (global in HMM, local in sequence) configuration. - * - * Args: hmm - the plan7 model - * - * Return: (void); - * the HMM probabilities are modified. - */ -void -Plan7LSConfig(struct plan7_s *hmm) -{ - hmm->xt[XTN][MOVE] = 1.-hmm->p1; /* allow N-terminal tail */ - hmm->xt[XTN][LOOP] = hmm->p1; - hmm->xt[XTE][MOVE] = 0.5; /* expectation 2 domains/seq */ - hmm->xt[XTE][LOOP] = 0.5; - hmm->xt[XTC][MOVE] = 1.-hmm->p1; /* allow C-terminal tail */ - hmm->xt[XTC][LOOP] = hmm->p1; - hmm->xt[XTJ][MOVE] = 1.-hmm->p1; /* allow J junction state */ - hmm->xt[XTJ][LOOP] = hmm->p1; - FSet(hmm->begin+2, hmm->M-1, 0.); /* start at M1/D1 */ - hmm->begin[1] = 1. - hmm->tbd1; - FSet(hmm->end+1, hmm->M-1, 0.); /* end at M_m/D_m */ - hmm->end[hmm->M] = 1.; - Plan7RenormalizeExits(hmm); - hmm->flags &= ~PLAN7_HASBITS; /* reconfig invalidates log-odds scores */ -} - - -/* Function: Plan7SWConfig() - * - * Purpose: Set the alignment independent parameters of - * a Plan7 model to hmmsw (Smith/Waterman) configuration. - * - * Notes: entry/exit is asymmetric because of the left/right - * nature of the HMM/profile. Entry probability is distributed - * simply by assigning p_x = pentry / (M-1) to M-1 - * internal match states. However, the same approach doesn't - * lead to a flat distribution over exit points. Exit p's - * must be corrected for the probability of a previous exit - * from the model. Requiring a flat distribution over exit - * points leads to an easily solved piece of algebra, giving: - * p_1 = pexit / (M-1) - * p_x = p_1 / (1 - (x-1) p_1) - * - * Args: hmm - the Plan7 model w/ data-dep prob's valid - * pentry - probability of an internal entry somewhere; - * will be evenly distributed over M-1 match states - * pexit - probability of an internal exit somewhere; - * will be distributed over M-1 match states. - * - * Return: (void) - * HMM probabilities are modified. - */ -void -Plan7SWConfig(struct plan7_s *hmm, float pentry, float pexit) -{ - float basep; /* p1 for exits: the base p */ - int k; /* counter over states */ - - /* Configure special states. - */ - hmm->xt[XTN][MOVE] = 1-hmm->p1; /* allow N-terminal tail */ - hmm->xt[XTN][LOOP] = hmm->p1; - hmm->xt[XTE][MOVE] = 1.; /* disallow jump state */ - hmm->xt[XTE][LOOP] = 0.; - hmm->xt[XTC][MOVE] = 1-hmm->p1; /* allow C-terminal tail */ - hmm->xt[XTC][LOOP] = hmm->p1; - hmm->xt[XTJ][MOVE] = 1.; /* J is unused */ - hmm->xt[XTJ][LOOP] = 0.; - - /* Configure entry. - */ - hmm->begin[1] = (1. - pentry) * (1. - hmm->tbd1); - FSet(hmm->begin+2, hmm->M-1, (pentry * (1.- hmm->tbd1)) / (float)(hmm->M-1)); - - /* Configure exit. - */ - hmm->end[hmm->M] = 1.0; - basep = pexit / (float) (hmm->M-1); - for (k = 1; k < hmm->M; k++) - hmm->end[k] = basep / (1. - basep * (float) (k-1)); - Plan7RenormalizeExits(hmm); - hmm->flags &= ~PLAN7_HASBITS; /* reconfig invalidates log-odds scores */ -} - -/* Function: Plan7FSConfig() - * Date: SRE, Fri Jan 2 15:34:40 1998 [StL] - * - * Purpose: Set the alignment independent parameters of - * a Plan7 model to hmmfs (multihit Smith/Waterman) configuration. - * - * See comments on Plan7SWConfig() for explanation of - * how pentry and pexit are used. - * - * Args: hmm - the Plan7 model w/ data-dep prob's valid - * pentry - probability of an internal entry somewhere; - * will be evenly distributed over M-1 match states - * pexit - probability of an internal exit somewhere; - * will be distributed over M-1 match states. - * - * Return: (void) - * HMM probabilities are modified. - */ -void -Plan7FSConfig(struct plan7_s *hmm, float pentry, float pexit) -{ - float basep; /* p1 for exits: the base p */ - int k; /* counter over states */ - - /* Configure special states. - */ - hmm->xt[XTN][MOVE] = 1-hmm->p1; /* allow N-terminal tail */ - hmm->xt[XTN][LOOP] = hmm->p1; - hmm->xt[XTE][MOVE] = 0.5; /* allow loops / multihits */ - hmm->xt[XTE][LOOP] = 0.5; - hmm->xt[XTC][MOVE] = 1-hmm->p1; /* allow C-terminal tail */ - hmm->xt[XTC][LOOP] = hmm->p1; - hmm->xt[XTJ][MOVE] = 1.-hmm->p1; /* allow J junction between domains */ - hmm->xt[XTJ][LOOP] = hmm->p1; - - /* Configure entry. - */ - hmm->begin[1] = (1. - pentry) * (1. - hmm->tbd1); - FSet(hmm->begin+2, hmm->M-1, (pentry * (1.-hmm->tbd1)) / (float)(hmm->M-1)); - - /* Configure exit. - */ - hmm->end[hmm->M] = 1.0; - basep = pexit / (float) (hmm->M-1); - for (k = 1; k < hmm->M; k++) - hmm->end[k] = basep / (1. - basep * (float) (k-1)); - Plan7RenormalizeExits(hmm); - hmm->flags &= ~PLAN7_HASBITS; /* reconfig invalidates log-odds scores */ -} - - - - -/* Function: Plan7ESTConfig() - * - * Purpose: Configure a Plan7 model for EST Smith/Waterman - * analysis. - * - * OUTDATED; DO NOT USE WITHOUT RECHECKING - * - * Args: hmm - hmm to configure. - * aacode - 0..63 vector mapping genetic code to amino acids - * estmodel - 20x64 translation matrix, w/ codon bias and substitution error - * dna2 - probability of a -1 frameshift in a triplet - * dna4 - probability of a +1 frameshift in a triplet - */ -void -Plan7ESTConfig(struct plan7_s *hmm, int *aacode, float **estmodel, - float dna2, float dna4) -{ - int k; - int x; - float p; - float *tripnull; /* UNFINISHED!!! */ - - /* configure specials */ - hmm->xt[XTN][MOVE] = 1./351.; - hmm->xt[XTN][LOOP] = 350./351.; - hmm->xt[XTE][MOVE] = 1.; - hmm->xt[XTE][LOOP] = 0.; - hmm->xt[XTC][MOVE] = 1./351.; - hmm->xt[XTC][LOOP] = 350./351.; - hmm->xt[XTJ][MOVE] = 1.; - hmm->xt[XTJ][LOOP] = 0.; - /* configure entry/exit */ - hmm->begin[1] = 0.5; - FSet(hmm->begin+2, hmm->M-1, 0.5 / ((float)hmm->M - 1.)); - hmm->end[hmm->M] = 1.; - FSet(hmm->end, hmm->M-1, 0.5 / ((float)hmm->M - 1.)); - - /* configure dna triplet/frameshift emissions */ - for (k = 1; k <= hmm->M; k++) - { - /* translate aa to triplet probabilities */ - for (x = 0; x < 64; x++) { - p = hmm->mat[k][aacode[x]] * estmodel[aacode[x]][x] * (1.-dna2-dna4); - hmm->dnam[x][k] = Prob2Score(p, tripnull[x]); - - p = hmm->ins[k][aacode[x]] * estmodel[aacode[x]][x] * (1.-dna2-dna4); - hmm->dnai[x][k] = Prob2Score(p, tripnull[x]); - } - hmm->dnam[64][k] = 0; /* ambiguous codons score 0 (danger?) */ - hmm->dna2 = Prob2Score(dna2, 1.); - hmm->dna4 = Prob2Score(dna4, 1.); - } -} - -/* Function: PrintPlan7Stats() - * - * Purpose: Given a newly constructed HMM and the tracebacks - * of the sequences it was trained on, print out all - * the interesting information at the end of hmmb - * and hmmt runs that convinces the user we actually - * did something. - * - * Args: fp - where to send the output (stdout, usually) - * hmm - the new HMM, probability form - * dsq - digitized training seqs - * nseq - number of dsq's - * tr - array of tracebacks for dsq - * - * Return: (void) - */ -void -PrintPlan7Stats(FILE *fp, struct plan7_s *hmm, char **dsq, int nseq, - struct p7trace_s **tr) -{ - int idx; /* counter for sequences */ - float score; /* an individual trace score */ - float total, best, worst; /* for the avg. and range of the scores */ - float sqsum, stddev; /* for the std. deviation of the scores */ - - P7Logoddsify(hmm, TRUE); /* make sure model scores are ready */ - - /* find individual trace scores */ - score = P7TraceScore(hmm, dsq[0], tr[0]); - total = best = worst = score; - sqsum = score * score; - for (idx = 1; idx < nseq; idx++) { - /* P7PrintTrace(stdout, tr[idx], hmm, dsq[idx]); */ - score = P7TraceScore(hmm, dsq[idx], tr[idx]); - total += score; - sqsum += score * score; - if (score > best) best = score; - if (score < worst) worst = score; - } - if (nseq > 1) { - stddev = (sqsum - (total * total / (float) nseq)) / ((float) nseq - 1.); - stddev = (stddev > 0) ? sqrt(stddev) : 0.0; - } else stddev = 0.0; - /* print out stuff. */ - fprintf(fp, "Average score: %10.2f bits\n", total / (float) nseq); - fprintf(fp, "Minimum score: %10.2f bits\n", worst); - fprintf(fp, "Maximum score: %10.2f bits\n", best); - fprintf(fp, "Std. deviation: %10.2f bits\n", stddev); -} - -/* Function: DegenerateSymbolScore() - * - * Purpose: Given a sequence character x and an hmm emission probability - * vector, calculate the log-odds (base 2) score of - * the symbol. - * - * Easy if x is in the emission alphabet, but not so easy - * is x is a degenerate symbol. The "correct" Bayesian - * philosophy is to calculate score(X) by summing over - * p(x) for all x in the degenerate symbol X to get P(X), - * doing the same sum over the prior to get F(X), and - * doing log_2 (P(X)/F(X)). This gives an X a zero score, - * for instance. - * - * Though this is correct in a formal Bayesian sense -- - * we have no information on the sequence, so we can't - * say if it's random or model, so it scores zero -- - * it sucks, big time, for scoring biological sequences. - * Sequences with lots of X's score near zero, while - * real sequences have average scores that are negative -- - * so the X-laden sequences appear to be lifted out - * of the noise of a full histogram of a database search. - * Correct or not, this is highly undesirable. - * - * So therefore we calculated the expected score of - * the degenerate symbol by summing over all x in X: - * e_x log_2 (p(x)/f(x)) - * where the expectation of x, e_x, is calculated from - * the random model. - * - * Empirically, this works; it also has a wooly hand-waving - * probabilistic justification that I'm happy enough about. - * - * Args: p - probabilities of normal symbols - * null - null emission model - * ambig - index of the degenerate character in Alphabet[] - * - * Return: the integer log odds score of x given the emission - * vector and the null model, scaled up by INTSCALE. - */ -int -DegenerateSymbolScore(float *p, float *null, int ambig) -{ - int x; - float numer = 0.; - float denom = 0.; - - for (x = 0; x < Alphabet_size; x++) { - if (Degenerate[ambig][x]) { - numer += null[x] * sreLOG2(p[x] / null[x]); - denom += null[x]; - } - } - return (int) (INTSCALE * numer / denom); -} - -/***************************************************************** - * - * Plan9/Plan7 interface - * - * Very important code during the evolutionary takeover by Plan7 -- - * convert between Krogh/Haussler and Plan7 models. - *****************************************************************/ - -/* Function: Plan9toPlan7() - * - * Purpose: Convert an old HMM into Plan7. Configures it in - * ls mode. - * - * Args: hmm - old ugly plan9 style HMM - * ret_plan7 - new wonderful Plan7 HMM - * - * Return: (void) - * Plan7 HMM is allocated here. Free w/ FreePlan7(). - */ -void -Plan9toPlan7(struct plan9_s *hmm, struct plan7_s **ret_plan7) -{ - struct plan7_s *plan7; - int k, x; - - plan7 = AllocPlan7(hmm->M); - - for (k = 1; k < hmm->M; k++) - { - plan7->t[k][TMM] = hmm->mat[k].t[MATCH]; - plan7->t[k][TMD] = hmm->mat[k].t[DELETE]; - plan7->t[k][TMI] = hmm->mat[k].t[INSERT]; - plan7->t[k][TDM] = hmm->del[k].t[MATCH]; - plan7->t[k][TDD] = hmm->del[k].t[DELETE]; - plan7->t[k][TIM] = hmm->ins[k].t[MATCH]; - plan7->t[k][TII] = hmm->ins[k].t[INSERT]; - } - - for (k = 1; k <= hmm->M; k++) - for (x = 0; x < Alphabet_size; x++) - plan7->mat[k][x] = hmm->mat[k].p[x]; - - for (k = 1; k < hmm->M; k++) - for (x = 0; x < Alphabet_size; x++) - plan7->ins[k][x] = hmm->ins[k].p[x]; - - plan7->tbd1 = hmm->mat[0].t[DELETE] / (hmm->mat[0].t[DELETE] + hmm->mat[0].t[MATCH]); - - /* We have to make up the null transition p1; use default */ - P7DefaultNullModel(plan7->null, &(plan7->p1)); - for (x = 0; x < Alphabet_size; x++) - plan7->null[x] = hmm->null[x]; - - if (hmm->name != NULL) - Plan7SetName(plan7, hmm->name); - if (hmm->flags & HMM_REF) { - strcpy(plan7->rf, hmm->ref); - plan7->flags |= PLAN7_RF; - } - if (hmm->flags & HMM_CS) { - strcpy(plan7->cs, hmm->cs); - plan7->flags |= PLAN7_CS; - } - - Plan7LSConfig(plan7); /* configure specials for ls-style alignment */ - Plan7Renormalize(plan7); /* mainly to correct for missing ID and DI */ - plan7->flags |= PLAN7_HASPROB; /* probabilities are valid */ - plan7->flags &= ~PLAN7_HASBITS; /* scores are not valid */ - *ret_plan7 = plan7; -} - - diff --git a/forester/archive/RIO/others/hmmer/src/plan9.c b/forester/archive/RIO/others/hmmer/src/plan9.c deleted file mode 100644 index f7bf7bb..0000000 --- a/forester/archive/RIO/others/hmmer/src/plan9.c +++ /dev/null @@ -1,141 +0,0 @@ -/************************************************************ - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - ************************************************************/ - -/* plan9.c - * SRE, Wed Apr 8 07:35:30 1998 - * - * alloc, free, and initialization of old Plan9 (HMMER 1.x) functions. - * Rescued from the wreckage of HMMER 1.9m code. - */ - -#include -#include -#include -#include -#include "squid.h" -#include "config.h" -#include "structs.h" -#include "funcs.h" - -#ifdef MEMDEBUG -#include "dbmalloc.h" -#endif - - -struct plan9_s * -P9AllocHMM(int M) /* length of model to make */ -{ - struct plan9_s *hmm; /* RETURN: blank HMM */ - - hmm = (struct plan9_s *) MallocOrDie (sizeof(struct plan9_s)); - hmm->ins = (struct basic_state *) MallocOrDie (sizeof(struct basic_state) * (M+2)); - hmm->del = (struct basic_state *) MallocOrDie (sizeof(struct basic_state) * (M+2)); - hmm->mat = (struct basic_state *) MallocOrDie (sizeof(struct basic_state) * (M+2)); - hmm->ref = (char *) MallocOrDie ((M+2) * sizeof(char)); - hmm->cs = (char *) MallocOrDie ((M+2) * sizeof(char)); - hmm->xray = (float *) MallocOrDie ((M+2) * sizeof(float) * NINPUTS); - hmm->M = M; - hmm->name = Strdup("unnamed"); /* name is not optional. */ - - hmm->flags = 0; - P9ZeroHMM(hmm); - return hmm; -} -int -P9FreeHMM(struct plan9_s *hmm) -{ - if (hmm == NULL) return 0; - free(hmm->ref); - free(hmm->cs); - free(hmm->xray); - free(hmm->name); - if (hmm->mat != NULL) free (hmm->mat); - if (hmm->ins != NULL) free (hmm->ins); - if (hmm->del != NULL) free (hmm->del); - free(hmm); - return 1; -} - - -/* Function: P9ZeroHMM() - * - * Purpose: Zero emission and transition counts in an HMM. - */ -void -P9ZeroHMM(struct plan9_s *hmm) -{ - int k, ts, idx; - - for (k = 0; k <= hmm->M+1; k++) - { - for (ts = 0; ts < 3; ts++) - { - hmm->mat[k].t[ts] = 0.0; - hmm->ins[k].t[ts] = 0.0; - hmm->del[k].t[ts] = 0.0; - } - for (idx = 0; idx < Alphabet_size; idx++) - { - hmm->mat[k].p[idx] = 0.0; - hmm->ins[k].p[idx] = 0.0; - hmm->del[k].p[idx] = 0.0; - } - } -} - - - - - -/* Function: P9Renormalize() - * - * Normalize all P distributions so they sum to 1. - * P distributions that are all 0, or contain negative - * probabilities, are left untouched. - * - * Returns 1 on success, or 0 on failure. - */ -void -P9Renormalize(struct plan9_s *hmm) -{ - int k; /* counter for states */ - - for (k = 0; k <= hmm->M ; k++) - { - /* match state transition frequencies */ - FNorm(hmm->mat[k].t, 3); - FNorm(hmm->ins[k].t, 3); - if (k > 0) FNorm(hmm->del[k].t, 3); - - if (k > 0) FNorm(hmm->mat[k].p, Alphabet_size); - FNorm(hmm->ins[k].p, Alphabet_size); - } -} - -/* Function: P9DefaultNullModel() - * - * Purpose: Set up a default random sequence model, using - * global aafq[]'s for protein or 0.25 for nucleic - * acid. randomseq is alloc'ed in caller. Alphabet information - * must already be known. - */ -void -P9DefaultNullModel(float *null) -{ - int x; - if (Alphabet_type == hmmAMINO) - for (x = 0; x < Alphabet_size; x++) - null[x] = aafq[x]; - else if (Alphabet_type == hmmNUCLEIC) - for (x = 0; x < Alphabet_size; x++) - null[x] = 0.25; - else - Die("No support for non-protein, non-nucleic acid alphabets."); -} diff --git a/forester/archive/RIO/others/hmmer/src/postprob.c b/forester/archive/RIO/others/hmmer/src/postprob.c deleted file mode 100644 index e57b1fc..0000000 --- a/forester/archive/RIO/others/hmmer/src/postprob.c +++ /dev/null @@ -1,709 +0,0 @@ -/************************************************************ - * Copyright (C) 1998 Ian Holmes (ihh@sanger.ac.uk) - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - ************************************************************/ - -/* postprob.c - * Author: Ian Holmes (ihh@sanger.ac.uk, Jun 5 1998) - * Derived from core_algorithms.c (SRE, Nov 11 1996) - * Incorporated SRE, Sat Nov 6 09:07:12 1999 [Cold Spring Harbor] - * - * RCS $Id: postprob.c,v 1.1.1.1 2005/03/22 08:34:15 cmzmasek Exp $ - ***************************************************************** - * IHH's notes: - * - * Functions for working with posterior probabilities, - * including unfussed "backwards" and "optimal accuracy" - * implementations. - ***************************************************************** - * SRE's notes: - * - * Simple API example: - * struct p7trace_s *tr; - * struct dpmatrix_s *fwd; - * struct dpmatrix_s *bck; - * struct dpmatrix_s *posterior; - * char *postcode; - * - * (get a traceback from somewhere: P7Viterbi() or a modelmaker) - * (get an HMM from somewhere: read file or construct it) - * P7Forward (dsq, len, hmm, &fwd); - * P7Backward(dsq, len, hmm, &bck); - * posterior = bck; -- can alloc posterior, but also can re-use bck -- - * P7EmitterPosterior(len, hmm, fwd, bck, posterior); - * postcode = PostalCode(len, posterior, tr); - * - * MSAAppendGR(msa, "POST", seqidx, postcode); -- or a similar annotation call -- - * - * free(postcode); - * FreePlan7Matrix(fwd); - * FreePlan7Matrix(bck); - * - * P7OptimalAccuracy() - the Durbin/Holmes optimal accuracy - * alignment algorithm. Takes a sequence - * and an HMM, returns an alignment as - * a trace structure. - * - * P7Backward() - The Backward() algorithm, counterpart - * of P7Forward() in core_algorithms.c. - * - * P7EmitterPosterior()- The heart of postprob.c: given a Forward - * and a Backward matrix, calculate a new matrix - * that contains the posterior probabilities - * for each symbol i being emitted by - * state k (so, \sum_k p(k | x_i) = 1.0). - * - * P7FillOptimalAccuracy() - The core DP algorithm called by - * P7OptimalAccuracy(). - * - * P7OptimalAccuracyTrace() - the traceback algorithm called by - * P7FillOptimalAccuracy(). - * - * PostalCode() - Create a character string for annotating - * an alignment. - * - * No small memory variants of these algorithms are available - * right now. - */ - -#include "structs.h" -#include "config.h" -#include "funcs.h" -#include "squid.h" - - -/* Function: P7OptimalAccuracy() - * - * Purpose: The optimal accuracy dynamic programming algorithm. - * Identical to Viterbi() except that posterior residue - * label probabilities are used as scores. - * - * Args: dsq - sequence in digitized form - * L - length of dsq - * hmm - the model - * ret_tr - RETURN: traceback; pass NULL if it's not wanted - * - * Return: log ( sum_{residues} P(label|M,D) ), as a bit score - * (i.e. log of expected accuracy) - */ -float -P7OptimalAccuracy(char *dsq, int L, struct plan7_s *hmm, struct p7trace_s **ret_tr) -{ - double sc; - struct dpmatrix_s *forward; - struct dpmatrix_s *backward; - - (void) P7Forward(dsq, L, hmm, &forward); - (void) P7Backward(dsq, L, hmm, &backward); - - P7EmitterPosterior(L, hmm, forward, backward, backward); /* Re-use backward matrix for posterior scores */ - - sc = P7FillOptimalAccuracy(L, hmm->M, backward, forward, ret_tr); /* Re-use forward matrix for optimal accuracy scores */ - - FreePlan7Matrix(forward); - FreePlan7Matrix(backward); - - return sc; -} - - - -/* Function: P7Backward() - * - * Purpose: The Backward dynamic programming algorithm. - * The scaling issue is dealt with by working in log space - * and calling ILogsum(); this is a slow but robust approach. - * - * Args: dsq - sequence in digitized form - * L - length of dsq - * hmm - the model - * ret_mx - RETURN: dp matrix; pass NULL if it's not wanted - * - * Return: log P(S|M)/P(S|R), as a bit score. - */ -float -P7Backward(char *dsq, int L, struct plan7_s *hmm, struct dpmatrix_s **ret_mx) -{ - struct dpmatrix_s *mx; - int **xmx; - int **mmx; - int **imx; - int **dmx; - int i,k; - int sc; - - /* Allocate a DP matrix with 0..L rows, 0..M-1 columns. - */ - mx = AllocPlan7Matrix(L+1, hmm->M, &xmx, &mmx, &imx, &dmx); - - /* Initialization of the L row. - * Note that xmx[i][stS] = xmx[i][stN] by definition for all i, - * so stS need not be calculated in backward DP matrices. - */ - xmx[L][XMC] = hmm->xsc[XTC][MOVE]; /* C<-T */ - xmx[L][XME] = xmx[L][XMC] + hmm->xsc[XTE][MOVE]; /* E<-C, no C-tail */ - xmx[L][XMJ] = xmx[L][XMB] = xmx[L][XMN] = -INFTY; /* need seq to get out from here */ - for (k = hmm->M; k >= 1; k--) { - mmx[L][k] = xmx[L][XME] + hmm->esc[k]; /* M<-E ... */ - mmx[L][k] += hmm->msc[(int) dsq[L]][k]; /* ... + emitted match symbol */ - imx[L][k] = dmx[L][k] = -INFTY; /* need seq to get out from here */ - } - - /* Recursion. Done as a pull. - * Note slightly wasteful boundary conditions: - * M_M precalculated, D_M set to -INFTY, - * D_1 wastefully calculated. - * Scores for transitions to D_M also have to be hacked to -INFTY, - * as Plan7Logoddsify does not do this for us (I think? - ihh). - */ - hmm->tsc[hmm->M-1][TDD] = hmm->tsc[hmm->M-1][TMD] = -INFTY; /* no D_M state -- HACK -- should be in Plan7Logoddsify */ - for (i = L-1; i >= 0; i--) - { - /* Do the special states first. - * remember, C, N and J emissions are zero score by definition - */ - xmx[i][XMC] = xmx[i+1][XMC] + hmm->xsc[XTC][LOOP]; - - xmx[i][XMB] = -INFTY; - /* The following section has been hacked to fit a bug in core_algorithms.c - * The "correct" code is: - * for (k = hmm->M; k >= 1; k--) - * xmx[i][XMB] = ILogsum(xmx[i][XMB], mmx[i+1][k] + hmm->bsc[k]; - * - * The following code gives the same results as core_algorithms.c: - */ - xmx[i][XMB] = ILogsum(xmx[i][XMB], mmx[i+1][hmm->M] + hmm->bsc[hmm->M-1]); - for (k = hmm->M-1; k >= 1; k--) - xmx[i][XMB] = ILogsum(xmx[i][XMB], mmx[i+1][k] + hmm->bsc[k]); - - xmx[i][XMJ] = ILogsum(xmx[i][XMB] + hmm->xsc[XTJ][MOVE], - xmx[i+1][XMJ] + hmm->xsc[XTJ][LOOP]); - - xmx[i][XME] = ILogsum(xmx[i][XMC] + hmm->xsc[XTE][MOVE], - xmx[i][XMJ] + hmm->xsc[XTE][LOOP]); - - xmx[i][XMN] = ILogsum(xmx[i][XMB] + hmm->xsc[XTN][MOVE], - xmx[i+1][XMN] + hmm->xsc[XTN][LOOP]); - - /* Now the main states. Note the boundary conditions at M. - */ - - if (i>0) { - mmx[i][hmm->M] = xmx[i][XME] + hmm->esc[hmm->M] + hmm->msc[(int) dsq[i]][hmm->M]; - dmx[i][hmm->M] = -INFTY; - for (k = hmm->M-1; k >= 1; k--) - { - mmx[i][k] = ILogsum(ILogsum(xmx[i][XME] + hmm->esc[k], - mmx[i+1][k+1] + hmm->tsc[k][TMM]), - ILogsum(imx[i+1][k] + hmm->tsc[k][TMI], - dmx[i][k+1] + hmm->tsc[k][TMD])); - mmx[i][k] += hmm->msc[(int) dsq[i]][k]; - - imx[i][k] = ILogsum(imx[i+1][k] + hmm->tsc[k][TII], - mmx[i+1][k+1] + hmm->tsc[k][TIM]); - imx[i][k] += hmm->isc[(int) dsq[i]][k]; - - dmx[i][k] = ILogsum(dmx[i][k+1] + hmm->tsc[k][TDD], - mmx[i+1][k+1] + hmm->tsc[k][TDM]); - - } - } - - } - - sc = xmx[0][XMN]; - - if (ret_mx != NULL) *ret_mx = mx; - else FreePlan7Matrix(mx); - - return Scorify(sc); /* the total Backward score. */ -} - - -/* Function: P7EmitterPosterior() - * - * Purpose: Combines Forward and Backward matrices into a posterior - * probability matrix. - * The entries in row i of this matrix are the logs of the - * posterior probabilities of each state emitting symbol i of - * the sequence, i.e. all entries for non-emitting states are -INFTY. - * The caller must allocate space for the matrix, although the - * backward matrix can be used instead (overwriting it will not - * compromise the algorithm). - * - * Args: L - length of sequence - * hmm - the model - * forward - pre-calculated forward matrix - * backward - pre-calculated backward matrix - * mx - pre-allocated dynamic programming matrix - * - * Return: void - */ -void -P7EmitterPosterior(int L, - struct plan7_s *hmm, - struct dpmatrix_s *forward, - struct dpmatrix_s *backward, - struct dpmatrix_s *mx) -{ - int i; - int k; - int sc; - - sc = backward->xmx[0][XMN]; - - for (i = L; i >= 1; i--) - { - mx->xmx[i][XMC] = forward->xmx[i-1][XMC] + hmm->xsc[XTC][LOOP] + backward->xmx[i][XMC] - sc; - - mx->xmx[i][XMJ] = forward->xmx[i-1][XMJ] + hmm->xsc[XTJ][LOOP] + backward->xmx[i][XMJ] - sc; - - mx->xmx[i][XMN] = forward->xmx[i-1][XMN] + hmm->xsc[XTN][LOOP] + backward->xmx[i][XMN] - sc; - - mx->xmx[i][XMB] = mx->xmx[i][XME] = -INFTY; - - for (k = 1; k < hmm->M; k++) { - mx->mmx[i][k] = backward->mmx[i][k]; - mx->mmx[i][k] += ILogsum(ILogsum(forward->mmx[i-1][k-1] + hmm->tsc[k-1][TMM], - forward->imx[i-1][k-1] + hmm->tsc[k-1][TIM]), - ILogsum(forward->xmx[i-1][XMB] + hmm->bsc[k], - forward->dmx[i-1][k-1] + hmm->tsc[k-1][TDM])); - mx->mmx[i][k] -= sc; - - mx->imx[i][k] = backward->imx[i][k]; - mx->imx[i][k] += ILogsum(forward->mmx[i-1][k] + hmm->tsc[k][TMI], - forward->imx[i-1][k] + hmm->tsc[k][TII]); - mx->imx[i][k] -= sc; - - mx->dmx[i][k] = -INFTY; - } - mx->mmx[i][hmm->M] = backward->mmx[i][hmm->M]; - mx->mmx[i][hmm->M] += ILogsum(ILogsum(forward->mmx[i-1][hmm->M-1] + hmm->tsc[hmm->M-1][TMM], - forward->imx[i-1][hmm->M-1] + hmm->tsc[hmm->M-1][TIM]), - ILogsum(forward->xmx[i-1][XMB] + hmm->bsc[hmm->M], - forward->dmx[i-1][hmm->M-1] + hmm->tsc[hmm->M-1][TDM])); - mx->mmx[i][hmm->M] -= sc; - - mx->imx[i][hmm->M] = mx->dmx[i][hmm->M] = mx->dmx[i][0] = -INFTY; - - } -} - - -/* Function: P7FillOptimalAccuracy() - * - * Purpose: The core of the optimal accuracy dynamic programming algorithm. - * Identical to Viterbi() except that scores are given by a - * posterior matrix (that the caller must pre-calculate). - * Also, the caller must pre-allocate the optimal accuracy matrix - * (this allows the forward matrix to be re-used). - * P7OptimalAccuracy() does all this for you and cleans up. - * - * - * Args: L - length of sequence - * M - length of model - * posterior - pre-calculated emitter posterior matrix - * mx - pre-allocated dynamic programming matrix - * ret_tr - RETURN: traceback; pass NULL if it's not wanted - * - * Return: log ( sum_{residues} P(label|M,D) ), as a bit score - * (i.e. log of expected accuracy) - */ -float P7FillOptimalAccuracy(int L, - int M, - struct dpmatrix_s *posterior, - struct dpmatrix_s *mx, - struct p7trace_s **ret_tr) -{ - struct p7trace_s *tr; - int **xmx; - int **mmx; - int **imx; - int **dmx; - int i,k; - int sc; - - xmx = mx->xmx; - mmx = mx->mmx; - imx = mx->imx; - dmx = mx->dmx; - - /* Initialization of the zero row. - * Each cell in the optimal accuracy matrix holds the log of the expected - * of correctly assigned symbols up to that point. - * To begin with, everything is log(0) = -INFTY. - */ - xmx[0][XMN] = xmx[0][XMB] = xmx[0][XME] = xmx[0][XMC] = xmx[0][XMJ] = -INFTY; - for (k = 0; k <= M; k++) - mmx[0][k] = imx[0][k] = dmx[0][k] = -INFTY; - - /* Recursion. Done as a pull. - * Note some slightly wasteful boundary conditions: - * D_M and I_M are wastefully calculated (they don't exist) - */ - for (i = 1; i <= L; i++) - { - mmx[i][0] = imx[i][0] = dmx[i][0] = -INFTY; - - for (k = 1; k <= M; k++) - { - /* match state */ - mmx[i][k] = -INFTY; - if ((sc = mmx[i-1][k-1]) > mmx[i][k]) - mmx[i][k] = sc; - if ((sc = imx[i-1][k-1]) > mmx[i][k]) - mmx[i][k] = sc; - if ((sc = dmx[i-1][k-1]) > mmx[i][k]) - mmx[i][k] = sc; - if ((sc = xmx[i-1][XMB]) > mmx[i][k]) - mmx[i][k] = sc; - mmx[i][k] = ILogsum(mmx[i][k], posterior->mmx[i][k]); - - /* delete state */ - dmx[i][k] = -INFTY; - if ((sc = mmx[i][k-1]) > dmx[i][k]) - dmx[i][k] = sc; - if ((sc = dmx[i][k-1]) > dmx[i][k]) - dmx[i][k] = sc; - - /* insert state */ - imx[i][k] = -INFTY; - if ((sc = mmx[i-1][k]) > imx[i][k]) - imx[i][k] = sc; - if ((sc = imx[i-1][k]) > imx[i][k]) - imx[i][k] = sc; - imx[i][k] = ILogsum(imx[i][k], posterior->imx[i][k]); - } - - /* Now the special states. Order is important here. - * remember, C and J emissions are zero score by definition, - */ - - /* N state */ - xmx[i][XMN] = -INFTY; - if ((sc = ILogsum(xmx[i-1][XMN], posterior->xmx[i][XMN])) > -INFTY) - xmx[i][XMN] = sc; - - /* E state */ - xmx[i][XME] = -INFTY; - for (k = 1; k <= M; k++) - if ((sc = mmx[i][k]) > xmx[i][XME]) - xmx[i][XME] = sc; - - /* J state */ - xmx[i][XMJ] = -INFTY; - if ((sc = ILogsum(xmx[i-1][XMJ], posterior->xmx[i][XMJ])) > -INFTY) - xmx[i][XMJ] = sc; - if ((sc = xmx[i][XME]) > xmx[i][XMJ]) /* no E->J emission */ - xmx[i][XMJ] = sc; - - /* B state */ - xmx[i][XMB] = -INFTY; - if ((sc = xmx[i][XMN]) > -INFTY) - xmx[i][XMB] = sc; - if ((sc = xmx[i][XMJ]) > xmx[i][XMB]) - xmx[i][XMB] = sc; - - /* C state */ - xmx[i][XMC] = -INFTY; - if ((sc = ILogsum(xmx[i-1][XMC], posterior->xmx[i][XMC])) > -INFTY) - xmx[i][XMC] = sc; - if ((sc = xmx[i][XME]) > xmx[i][XMC]) /* no E->C emission */ - xmx[i][XMC] = sc; - } - - /* T state (not stored) */ - sc = xmx[L][XMC]; - - if (ret_tr != NULL) { - P7OptimalAccuracyTrace(L, M, posterior, mx, &tr); - *ret_tr = tr; - } - - return Score2Prob(sc,1); /* the log of the expected accuracy. */ -} - - -/* Function: P7OptimalAccuracyTrace() - * - * Purpose: Traceback of an optimal accuracy matrix: i.e. retrieval - * of optimum alignment. - * - * Args: L - length of sequence - * M - length of HMM - * posterior - the posterior matrix - * mx - the matrix to trace back in, (L+1) x M - * ret_tr - RETURN: traceback. - * - * Return: (void) - * ret_tr is allocated here. Free using P7FreeTrace(). - */ -void -P7OptimalAccuracyTrace(int L, - int M, - struct dpmatrix_s *posterior, - struct dpmatrix_s *mx, - struct p7trace_s **ret_tr) -{ - struct p7trace_s *tr; - int curralloc; /* current allocated length of trace */ - int tpos; /* position in trace */ - int i; /* position in seq (1..L) */ - int k; /* position in model (1..M) */ - int **xmx, **mmx, **imx, **dmx; - int sc; /* temp var for pre-emission score */ - - /* Overallocate for the trace. - * S-N-B- ... - E-C-T : 6 states + L is minimum trace; - * add L more as buffer. - */ - curralloc = L * 2 + 6; - P7AllocTrace(curralloc, &tr); - - xmx = mx->xmx; - mmx = mx->mmx; - imx = mx->imx; - dmx = mx->dmx; - - /* Initialization of trace - * We do it back to front; ReverseTrace() is called later. - */ - tr->statetype[0] = STT; - tr->nodeidx[0] = 0; - tr->pos[0] = 0; - tr->statetype[1] = STC; - tr->nodeidx[1] = 0; - tr->pos[1] = 0; - tpos = 2; - i = L; /* current i (seq pos) we're trying to assign */ - - /* Traceback - */ - while (tr->statetype[tpos-1] != STS) { - switch (tr->statetype[tpos-1]) { - case STM: /* M connects from i-1,k-1, or B */ - sc = mmx[i+1][k+1]; - if (sc == ILogsum(mmx[i][k], posterior->mmx[i+1][k+1]) && i > 0 && k > 0) - { - tr->statetype[tpos] = STM; - tr->nodeidx[tpos] = k--; - tr->pos[tpos] = i--; - } - else if (sc == ILogsum(imx[i][k], posterior->mmx[i+1][k+1]) && i > 0 && k > 0) - { - tr->statetype[tpos] = STI; - tr->nodeidx[tpos] = k; - tr->pos[tpos] = i--; - } - else if (sc == ILogsum(dmx[i][k], posterior->mmx[i+1][k+1]) && i > 0 && k > 1) - { - tr->statetype[tpos] = STD; - tr->nodeidx[tpos] = k--; - tr->pos[tpos] = 0; - } - else if (sc == ILogsum(xmx[i][XMB], posterior->mmx[i+1][k+1])) - { - tr->statetype[tpos] = STB; - tr->nodeidx[tpos] = 0; - tr->pos[tpos] = 0; - } - else Die("traceback failed"); - break; - - case STD: /* D connects from M,D */ - if (dmx[i][k+1] == mmx[i][k] && i > 0 && k > 0) - { - tr->statetype[tpos] = STM; - tr->nodeidx[tpos] = k--; - tr->pos[tpos] = i--; - } - else if (dmx[i][k+1] == dmx[i][k] && k > 1) - { - tr->statetype[tpos] = STD; - tr->nodeidx[tpos] = k--; - tr->pos[tpos] = 0; - } - else Die("traceback failed"); - break; - - case STI: /* I connects from M,I */ - sc = imx[i+1][k]; - if (sc == ILogsum(mmx[i][k], posterior->imx[i+1][k]) && i > 0 && k > 0) - { - tr->statetype[tpos] = STM; - tr->nodeidx[tpos] = k--; - tr->pos[tpos] = i--; - } - else if (sc == ILogsum(imx[i][k], posterior->imx[i+1][k]) && i > 0 && k > 0) - { - tr->statetype[tpos] = STI; - tr->nodeidx[tpos] = k; - tr->pos[tpos] = i--; - } - else Die("traceback failed"); - break; - - case STN: /* N connects from S, N */ - if (i == 0 && xmx[i][XMN] == -INFTY) - { - tr->statetype[tpos] = STS; - tr->nodeidx[tpos] = 0; - tr->pos[tpos] = 0; - } - else if (i > 0 && xmx[i+1][XMN] == ILogsum(xmx[i][XMN], posterior->xmx[i+1][XMN]) && i > 0) - { - tr->statetype[tpos] = STN; - tr->nodeidx[tpos] = 0; - tr->pos[tpos] = 0; /* note convention adherence: */ - tr->pos[tpos-1] = i--; /* first N doesn't emit */ - } - else Die("traceback failed"); - break; - - case STB: /* B connects from N, J */ - if (xmx[i][XMB] == xmx[i][XMN]) - { - tr->statetype[tpos] = STN; - tr->nodeidx[tpos] = 0; - tr->pos[tpos] = 0; - } - else if (xmx[i][XMB] == xmx[i][XMJ]) - { - tr->statetype[tpos] = STJ; - tr->nodeidx[tpos] = 0; - tr->pos[tpos] = 0; - } - else Die("traceback failed"); - break; - - case STE: /* E connects from any M state. k set here */ - for (k = M; k >= 1; k--) - if (xmx[i][XME] == mmx[i][k] && i > 0) - { - tr->statetype[tpos] = STM; - tr->nodeidx[tpos] = k--; - tr->pos[tpos] = i--; - break; - } - if (k <= 0) Die("traceback failed"); - break; - - case STC: /* C comes from C, E */ - if (xmx[i][XMC] == ILogsum(xmx[i-1][XMC], posterior->xmx[i][XMC]) && i > 0) - { - tr->statetype[tpos] = STC; - tr->nodeidx[tpos] = 0; - tr->pos[tpos] = 0; /* note convention adherence: */ - tr->pos[tpos-1] = i--; /* first C doesn't emit */ - } - else if (xmx[i][XMC] == xmx[i][XME]) - { - tr->statetype[tpos] = STE; - tr->nodeidx[tpos] = 0; - tr->pos[tpos] = 0; /* E is a nonemitter */ - } - else Die("Traceback failed."); - break; - - case STJ: /* J connects from E, J */ - if (xmx[i][XMJ] == ILogsum(xmx[i-1][XMJ], posterior->xmx[i][XMJ]) && i > 0) - { - tr->statetype[tpos] = STJ; - tr->nodeidx[tpos] = 0; - tr->pos[tpos] = 0; /* note convention adherence: */ - tr->pos[tpos-1] = i--; /* first J doesn't emit */ - } - else if (xmx[i][XMJ] == xmx[i][XME]) - { - tr->statetype[tpos] = STE; - tr->nodeidx[tpos] = 0; - tr->pos[tpos] = 0; /* E is a nonemitter */ - } - else Die("Traceback failed."); - break; - - default: - Die("traceback failed"); - - } /* end switch over statetype[tpos-1] */ - - tpos++; - if (tpos == curralloc) - { /* grow trace if necessary */ - curralloc += L; - P7ReallocTrace(tr, curralloc); - } - - } /* end traceback, at S state; tpos == tlen now */ - tr->tlen = tpos; - P7ReverseTrace(tr); - *ret_tr = tr; - -} - - -/* Function: PostalCode() - * Date: SRE, Sun Nov 7 15:31:35 1999 [Cold Spring Harbor] - * - * Purpose: Given a traceback and one of Ian's posterior - * probability matrices, calculate a string that - * represents the confidence values on each - * residue in the sequence. - * - * The code string is 0..L-1 (L = len of target seq), - * so it's in the coordinate system of the sequence string; - * off by one from dsq; and convertible to the coordinate - * system of aseq using MakeAlignedString(). - * - * Values are 0-9,* - * for example, 9 means with >=90% posterior probabiility, - * residue i is aligned to the state k that it - * is assigned to in the given trace. - * - * Args: L - length of seq - * mx - posterior prob matrix: see P7EmitterPosterior() - * tr - a traceback to get a Postal code string for. - * - * Returns: char * array of codes, 0..L-1 - * Caller is responsible for free'ing it. - */ -static char -score2postcode(int sc) -{ - char i; - i = (char) (Score2Prob(sc, 1.) * 10.); - return ((i > 9) ? '*' : '0'+i); -} -char * -PostalCode(int L, struct dpmatrix_s *mx, struct p7trace_s *tr) -{ - int tpos; - int i; - int k; - char *postcode; - - postcode = MallocOrDie((L+1) * sizeof(char)); - for (tpos = 0; tpos < tr->tlen; tpos++) - { - i = tr->pos[tpos]; - k = tr->nodeidx[tpos]; - if (i == 0) continue; - - switch (tr->statetype[tpos]) { - case STM: postcode[i-1] = score2postcode(mx->mmx[i][k]); break; - case STI: postcode[i-1] = score2postcode(mx->imx[i][k]); break; - case STN: postcode[i-1] = score2postcode(mx->xmx[i][XMN]); break; - case STC: postcode[i-1] = score2postcode(mx->xmx[i][XMC]); break; - case STJ: postcode[i-1] = score2postcode(mx->xmx[i][XMJ]); break; - } - } - postcode[L] = '\0'; - - return postcode; -} diff --git a/forester/archive/RIO/others/hmmer/src/postprob.h b/forester/archive/RIO/others/hmmer/src/postprob.h deleted file mode 100644 index b09c036..0000000 --- a/forester/archive/RIO/others/hmmer/src/postprob.h +++ /dev/null @@ -1,55 +0,0 @@ -/************************************************************ - * Copyright (C) 1998 Ian Holmes (ihh@sanger.ac.uk) - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - ************************************************************/ - -/* postprob.h - * Author: Ian Holmes (ihh@sanger.ac.uk, Jun 5 1998) - * Derived from core_algorithms.c (SRE, Nov 11 1996) - * Incorporated SRE, Sat Nov 6 09:07:02 1999 - * - * Functions for working with posterior probabilities, - * including unfussed "backwards" and "optimal accuracy" - * implementations. - */ - -#ifndef POSTPROB_INCLUDED -#define POSTPROB_INCLUDED - -#include "structs.h" -#include "config.h" -#include "funcs.h" -#include "squid.h" - -/* Extra algorithms to work with posterior probabilities. - */ - -extern float P7OptimalAccuracy(char *dsq, int L, struct plan7_s *hmm, - struct p7trace_s **ret_tr); - -extern float P7Backward(char *dsq, int L, struct plan7_s *hmm, - struct dpmatrix_s **ret_mx); - -extern void P7EmitterPosterior(int L, struct plan7_s *hmm, - struct dpmatrix_s *forward, - struct dpmatrix_s *backward, - struct dpmatrix_s *mx); - -extern float P7FillOptimalAccuracy(int L, int M, - struct dpmatrix_s *posterior, - struct dpmatrix_s *mx, - struct p7trace_s **ret_tr); - -extern void P7OptimalAccuracyTrace(int L, int M, - struct dpmatrix_s *posterior, - struct dpmatrix_s *mx, - struct p7trace_s **ret_tr); - -#endif - diff --git a/forester/archive/RIO/others/hmmer/src/prior.c b/forester/archive/RIO/others/hmmer/src/prior.c deleted file mode 100644 index b2475ac..0000000 --- a/forester/archive/RIO/others/hmmer/src/prior.c +++ /dev/null @@ -1,725 +0,0 @@ -/***************************************************************** - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - *****************************************************************/ - -/* prior.c - * SRE, Mon Nov 18 15:44:08 1996 - * - * Support for Dirichlet prior data structure, p7prior_s. - */ - -#include "config.h" -#include "structs.h" -#include "funcs.h" -#include "squid.h" - -#ifdef MEMDEBUG -#include "dbmalloc.h" -#endif - -static struct p7prior_s *default_amino_prior(void); -static struct p7prior_s *default_nucleic_prior(void); - -/* Function: P7AllocPrior(), P7FreePrior() - * - * Purpose: Allocation and free'ing of a prior structure. - * Very simple, but might get more complex someday. - */ -struct p7prior_s * -P7AllocPrior(void) -{ return (struct p7prior_s *) MallocOrDie (sizeof(struct p7prior_s)); } -void -P7FreePrior(struct p7prior_s *pri) -{ free(pri); } - - -/* Function: P7LaplacePrior() - * - * Purpose: Create a Laplace plus-one prior. (single component Dirichlets). - * Global alphabet info is assumed to have been set already. - * - * Args: (void) - * - * Return: prior. Allocated here; call FreePrior() to free it. - */ -struct p7prior_s * -P7LaplacePrior(void) -{ - struct p7prior_s *pri; - - pri = P7AllocPrior(); - pri->strategy = PRI_DCHLET; - - pri->tnum = 1; - pri->tq[0] = 1.; - FSet(pri->t[0], 8, 1.); - - pri->mnum = 1; - pri->mq[0] = 1.; - FSet(pri->m[0], Alphabet_size, 1.); - - pri->inum = 1; - pri->iq[0] = 1.; - FSet(pri->i[0], Alphabet_size, 1.); - - return pri; -} - -/* Function: P7DefaultPrior() - * - * Purpose: Set up a somewhat more realistic single component - * Dirichlet prior than Laplace. - */ -struct p7prior_s * -P7DefaultPrior(void) -{ - switch (Alphabet_type) { - case hmmAMINO: return default_amino_prior(); - case hmmNUCLEIC: return default_nucleic_prior(); - case hmmNOTSETYET: Die("Can't set prior; alphabet type not set yet"); - } - /*NOTREACHED*/ - return NULL; -} - -/* Function: P7ReadPrior() - * - * Purpose: Input a prior from disk file. - */ -struct p7prior_s * -P7ReadPrior(char *prifile) -{ - FILE *fp; - struct p7prior_s *pri; - char *sptr; - int q, x; - - if ((fp = fopen(prifile, "r")) == NULL) - Die("Failed to open HMMER prior file %s\n", prifile); - pri = P7AllocPrior(); - - /* First entry is the strategy: - * Only standard Dirichlet prior (simple or mixture) is supported in Plan7 so far - */ - sptr = Getword(fp, sqdARG_STRING); - s2upper(sptr); - if (strcmp(sptr, "DIRICHLET") == 0) pri->strategy = PRI_DCHLET; - else Die("No such prior strategy %s; failed to parse file %s", sptr, prifile); - - /* Second entry is the alphabet type: - * Amino or Nucleic - */ - sptr = Getword(fp, sqdARG_STRING); - s2upper(sptr); - if (strcmp(sptr, "AMINO") == 0) - { - if (Alphabet_type != hmmAMINO) - Die("HMM and/or sequences are DNA/RNA; can't use protein prior %s", prifile); - } - else if (strcmp(sptr, "NUCLEIC") == 0) - { - if (Alphabet_type != hmmNUCLEIC) - Die("HMM and/or sequences are protein; can't use DNA/RNA prior %s", prifile); - } - else - Die("Alphabet \"%s\" in prior file %s isn't valid.", sptr, prifile); - - /* State transition priors: - * # of mixtures. - * then for each mixture: - * prior P(q) - * Dirichlet terms for Tmm, Tmi, Tmd, Tim, Tii, Tid, Tdm, Tdi, Tdd - */ - pri->tnum = atoi(Getword(fp, sqdARG_INT)); - if (pri->tnum < 0) - Die("%d is bad; need at least one state transition mixture component", pri->tnum); - if (pri->tnum > MAXDCHLET) - Die("%d is bad, too many transition components (MAXDCHLET = %d)\n", MAXDCHLET); - for (q = 0; q < pri->tnum; q++) - { - pri->tq[q] = (float) atof(Getword(fp, sqdARG_FLOAT)); - for (x = 0; x < 7; x++) - pri->t[q][x] = (float) atof(Getword(fp, sqdARG_FLOAT)); - } - - /* Match emission priors: - * # of mixtures. - * then for each mixture: - * prior P(q) - * Dirichlet terms for Alphabet_size symbols in Alphabet - */ - pri->mnum = atoi(Getword(fp, sqdARG_INT)); - if (pri->mnum < 0) - Die("%d is bad; need at least one match emission mixture component", pri->mnum); - if (pri->mnum > MAXDCHLET) - Die("%d is bad; too many match components (MAXDCHLET = %d)\n", pri->mnum, MAXDCHLET); - - for (q = 0; q < pri->mnum; q++) - { - pri->mq[q] = (float) atof(Getword(fp, sqdARG_FLOAT)); - for (x = 0; x < Alphabet_size; x++) - pri->m[q][x] = (float) atof(Getword(fp, sqdARG_FLOAT)); - } - - /* Insert emission priors: - * # of mixtures. - * then for each mixture component: - * prior P(q) - * Dirichlet terms for Alphabet_size symbols in Alphabet - */ - pri->inum = atoi(Getword(fp, sqdARG_INT)); - if (pri->inum < 0) - Die("%d is bad; need at least one insert emission mixture component", pri->inum); - if (pri->inum > MAXDCHLET) - Die("%d is bad; too many insert components (MAXDCHLET = %d)\n", pri->inum, MAXDCHLET); - for (q = 0; q < pri->inum; q++) - { - pri->iq[q] = (float) atof(Getword(fp, sqdARG_FLOAT)); - for (x = 0; x < Alphabet_size; x++) - pri->i[q][x] = (float) atof(Getword(fp, sqdARG_FLOAT)); - } - - fclose(fp); - return pri; -} - - -/* Function: PAMPrior() - * - * Purpose: Produces an ad hoc "Dirichlet mixture" prior for - * match emissions, using a PAM matrix. - * - * Side effect notice: PAMPrior() replaces the match - * emission section of an existing Dirichlet prior, - * which is /expected/ to be a simple one-component - * kind of prior. The insert emissions /must/ be a - * one-component prior (because of details in how - * PriorifyEmissionVector() is done). However, - * the transitions /could/ be a mixture Dirichlet prior - * without causing problems. In other words, the - * -p and -P options of hmmb can coexist, but there - * may be conflicts. PAMPrior() checks for these, - * so there's no serious problem, except that the - * error message from PAMPrior() might be confusing to - * a user. - */ -void -PAMPrior(char *pamfile, struct p7prior_s *pri, float wt) -{ - FILE *fp; - char *blastpamfile; /* BLAST looks in aa/ subdirectory of BLASTMAT */ - int **pam; - float scale; - int xi, xj; - int idx1, idx2; - - if (Alphabet_type != hmmAMINO) - Die("PAM prior is only valid for protein sequences"); - if (pri->strategy != PRI_DCHLET) - Die("PAM prior may only be applied over an existing Dirichlet prior"); - if (pri->inum != 1) - Die("PAM prior requires that the insert emissions be a single Dirichlet"); - if (MAXDCHLET < 20) - Die("Whoa, code is misconfigured; MAXDCHLET must be >= 20 for PAM prior"); - - blastpamfile = FileConcat("aa", pamfile); - - if ((fp = fopen(pamfile, "r")) == NULL && - (fp = EnvFileOpen(pamfile, "BLASTMAT", NULL)) == NULL && - (fp = EnvFileOpen(blastpamfile, "BLASTMAT", NULL)) == NULL) - Die("Failed to open PAM scoring matrix file %s", pamfile); - if (! ParsePAMFile(fp, &pam, &scale)) - Die("Failed to parse PAM scoring matrix file %s", pamfile); - fclose(fp); - free(blastpamfile); - - pri->strategy = PRI_PAM; - pri->mnum = 20; - - /* Convert PAM entries back to conditional prob's P(xj | xi), - * which we'll use as "pseudocounts" weighted by wt. - */ - for (xi = 0; xi < Alphabet_size; xi++) - for (xj = 0; xj < Alphabet_size; xj++) - { - idx1 = Alphabet[xi] - 'A'; - idx2 = Alphabet[xj] - 'A'; - pri->m[xi][xj] = aafq[xj] * exp((float) pam[idx1][idx2] * scale); - } - - /* Normalize so that rows add up to wt. - * i.e. Sum(xj) mat[xi][xj] = wt for every row xi - */ - for (xi = 0; xi < Alphabet_size; xi++) - { - pri->mq[xi] = 1. / Alphabet_size; - FNorm(pri->m[xi], Alphabet_size); - FScale(pri->m[xi], Alphabet_size, wt); - } - - Free2DArray((void **)pam,27); -} - - -/* Function: P7DefaultNullModel() - * - * Purpose: Set up a default random sequence model, using - * global aafq[]'s for protein or 1/Alphabet_size for anything - * else. randomseq is alloc'ed in caller. Alphabet information - * must already be known. - */ -void -P7DefaultNullModel(float *null, float *ret_p1) -{ - int x; - if (Alphabet_type == hmmAMINO) { - for (x = 0; x < Alphabet_size; x++) - null[x] = aafq[x]; - *ret_p1 = 350./351.; /* rationale: approx avg protein length. */ - } else { - for (x = 0; x < Alphabet_size; x++) - null[x] = 1.0 / (float) Alphabet_size; - *ret_p1 = 1000./1001.; /* rationale: approx inter-Alu distance. */ - } -} - -void -P7ReadNullModel(char *rndfile, float *null, float *ret_p1) -{ - FILE *fp; - char *s; - int x; - int type = 0; - - if ((fp = fopen(rndfile, "r")) == NULL) - Die("Failed to open null model file %s\n", rndfile); - if ((s = Getword(fp, sqdARG_STRING)) == NULL) goto FAILURE; - s2upper(s); - if (strcmp(s, "NUCLEIC") == 0) type = hmmNUCLEIC; - else if (strcmp(s, "AMINO") == 0) type = hmmAMINO; - else goto FAILURE; - /* check/set alphabet type */ - if (Alphabet_type == 0) - SetAlphabet(type); - else if (Alphabet_type != type) - Die("Alphabet type conflict; null model in %s is inappropriate\n", rndfile); - /* parse the file */ - for (x = 0; x < Alphabet_size; x++) { - if ((s = Getword(fp, sqdARG_FLOAT)) == NULL) goto FAILURE; - null[x] = atof(s); - } - if ((s = Getword(fp, sqdARG_FLOAT)) == NULL) goto FAILURE; - *ret_p1 = atof(s); - - fclose(fp); - return; - -FAILURE: - fclose(fp); - Die("%s is not in HMMER null model file format", rndfile); -} - - -/* Function: P7PriorifyHMM() - * - * Purpose: Add pseudocounts to an HMM using Dirichlet priors, - * and renormalize the HMM. - * - * Args: hmm -- the HMM to add counts to (counts form) - * pri -- the Dirichlet prior to use - * - * Return: (void) - * HMM returns in probability form. - */ -void -P7PriorifyHMM(struct plan7_s *hmm, struct p7prior_s *pri) -{ - int k; /* counter for model position */ - float d; /* a denominator */ - float tq[MAXDCHLET]; /* prior distribution over mixtures */ - float mq[MAXDCHLET]; /* prior distribution over mixtures */ - float iq[MAXDCHLET]; /* prior distribution over mixtures */ - - /* Model-dependent transitions are handled simply; Laplace. - */ - FSet(hmm->begin+2, hmm->M-1, 0.); /* wipe internal BM entries */ - FSet(hmm->end+1, hmm->M-1, 0.); /* wipe internal ME exits */ - d = hmm->tbd1 + hmm->begin[1] + 2.; - hmm->tbd1 = (hmm->tbd1 + 1.)/ d; - hmm->begin[1] = (hmm->begin[1] + 1.)/ d; - hmm->end[hmm->M] = 1.0; - - /* Main model transitions and emissions - */ - for (k = 1; k < hmm->M; k++) - { - /* The following code chunk is experimental. - * Collaboration with Michael Asman, Erik Sonnhammer, CGR Stockholm. - * Only activated if X-PR* annotation has been used, in which - * priors are overridden and a single Dirichlet component is - * specified for each column (using structural annotation). - * If X-PR* annotation is not used, which is usually the case, - * the following code has no effect (observe how the real prior - * distributions are copied into tq, mq, iq). - */ - if (hmm->tpri != NULL && hmm->tpri[k] >= 0) - { - if (hmm->tpri[k] >= pri->tnum) Die("X-PRT annotation out of range"); - FSet(tq, pri->tnum, 0.0); - tq[hmm->tpri[k]] = 1.0; - } - else - FCopy(tq, pri->tq, pri->tnum); - if (hmm->mpri != NULL && hmm->mpri[k] >= 0) - { - if (hmm->mpri[k] >= pri->mnum) Die("X-PRM annotation out of range"); - FSet(mq, pri->mnum, 0.0); - mq[hmm->mpri[k]] = 1.0; - } - else - FCopy(mq, pri->mq, pri->mnum); - if (hmm->ipri != NULL && hmm->ipri[k] >= 0) - { - if (hmm->ipri[k] >= pri->inum) Die("X-PRI annotation out of range"); - FSet(iq, pri->inum, 0.0); - iq[hmm->ipri[k]] = 1.0; - } - else - FCopy(iq, pri->iq, pri->inum); - - /* This is the main line of the code: - */ - P7PriorifyTransitionVector(hmm->t[k], pri, tq); - P7PriorifyEmissionVector(hmm->mat[k], pri, pri->mnum, mq, pri->m, NULL); - P7PriorifyEmissionVector(hmm->ins[k], pri, pri->inum, iq, pri->i, NULL); - } - - /* We repeat the above steps just for the final match state, M. - */ - if (hmm->mpri != NULL && hmm->mpri[hmm->M] >= 0) - { - if (hmm->mpri[hmm->M] >= pri->mnum) Die("X-PRM annotation out of range"); - FSet(mq, pri->mnum, 0.0); - mq[hmm->mpri[hmm->M]] = 1.0; - } - else - FCopy(mq, pri->mq, pri->mnum); - - P7PriorifyEmissionVector(hmm->mat[hmm->M], pri, pri->mnum, mq, pri->m, NULL); - - /* Now we're done. Convert the counts-based HMM to probabilities. - */ - Plan7Renormalize(hmm); -} - - -/* Function: P7PriorifyEmissionVector() - * - * Purpose: Add prior pseudocounts to an observed - * emission count vector and renormalize. - * - * Can return the posterior mixture probabilities - * P(q | counts) if ret_mix[MAXDCHLET] is passed. - * Else, pass NULL. - * - * Args: vec - the 4 or 20-long vector of counts to modify - * pri - prior data structure - * num - pri->mnum or pri->inum; # of mixtures - * eq - pri->mq or pri->iq; prior mixture probabilities - * e - pri->i or pri->m; Dirichlet components - * ret_mix - filled with posterior mixture probabilities, or NULL - * - * Return: (void) - * The counts in vec are changed and normalized to probabilities. - */ -void -P7PriorifyEmissionVector(float *vec, struct p7prior_s *pri, - int num, float eq[MAXDCHLET], float e[MAXDCHLET][MAXABET], - float *ret_mix) -{ - int x; /* counter over vec */ - int q; /* counter over mixtures */ - float mix[MAXDCHLET]; /* posterior distribution over mixtures */ - float totc; /* total counts */ - float tota; /* total alpha terms */ - float xi; /* X_i term, Sjolander eq. 41 */ - - /* Calculate mix[], which is the posterior probability - * P(q | n) of mixture component q given the count vector n - * - * (side effect note: note that an insert vector in a PAM prior - * is passed with num = 1, bypassing pam prior code; this means - * that inserts cannot be mixture Dirichlets...) - * [SRE, 12/24/00: the above comment is cryptic! what the hell does that - * mean, inserts can't be mixtures? doesn't seem to be true. it - * may mean that in a PAM prior, you can't have a mixture for inserts, - * but I don't even understand that. The insert vectors aren't passed - * with num=1!!] - */ - mix[0] = 1.0; - if (pri->strategy == PRI_DCHLET && num > 1) - { - for (q = 0; q < num; q++) - { - mix[q] = eq[q] > 0.0 ? log(eq[q]) : -999.; - mix[q] += Logp_cvec(vec, Alphabet_size, e[q]); - } - LogNorm(mix, num); /* now mix[q] is P(component_q | n) */ - } - else if (pri->strategy == PRI_PAM && num > 1) - { /* pam prior uses aa frequencies as `P(q|n)' */ - for (q = 0; q < Alphabet_size; q++) - mix[q] = vec[q]; - FNorm(mix, Alphabet_size); - } - - /* Convert the counts to probabilities, following Sjolander (1996) - */ - totc = FSum(vec, Alphabet_size); - for (x = 0; x < Alphabet_size; x++) { - xi = 0.0; - for (q = 0; q < num; q++) { - tota = FSum(e[q], Alphabet_size); - xi += mix[q] * (vec[x] + e[q][x]) / (totc + tota); - } - vec[x] = xi; - } - FNorm(vec, Alphabet_size); - - if (ret_mix != NULL) - for (q = 0; q < num; q++) - ret_mix[q] = mix[q]; -} - - - -/* Function: P7PriorifyTransitionVector() - * - * Purpose: Add prior pseudocounts to transition vector, - * which contains three different probability vectors - * for m, d, and i. - * - * Args: t - state transitions, counts: 3 for M, 2 for I, 2 for D. - * prior - Dirichlet prior information - * tq - prior distribution over Dirichlet components. - * (overrides prior->iq[]; used for alternative - * methods of conditioning prior on structural data) - * - * Return: (void) - * t is changed, and renormalized -- comes back as - * probability vectors. - */ -void -P7PriorifyTransitionVector(float *t, struct p7prior_s *prior, - float tq[MAXDCHLET]) -{ - int ts; - int q; - float mix[MAXDCHLET]; - float totm, totd, toti; /* total counts in three transition vecs */ - float xi; /* Sjolander's X_i term */ - - mix[0] = 1.0; /* default is simple one component */ - if ((prior->strategy == PRI_DCHLET || prior->strategy == PRI_PAM) && prior->mnum > 1) - { - for (q = 0; q < prior->tnum; q++) - { - mix[q] = tq[q] > 0.0 ? log(tq[q]) : -999.; - mix[q] += Logp_cvec(t, 3, prior->t[q]); /* 3 match */ - mix[q] += Logp_cvec(t+3, 2, prior->t[q]+3); /* 2 insert */ - mix[q] += Logp_cvec(t+5, 2, prior->t[q]+5); /* 2 delete */ - } - LogNorm(mix, prior->tnum); /* mix[q] is now P(q | counts) */ - } - /* precalc some denominators */ - totm = FSum(t,3); - toti = t[TIM] + t[TII]; - totd = t[TDM] + t[TDD]; - - for (ts = 0; ts < 7; ts++) - { - xi = 0.0; - for (q = 0; q < prior->tnum; q++) - { - switch (ts) { - case TMM: case TMI: case TMD: - xi += mix[q] * (t[ts] + prior->t[q][ts]) / - (totm + FSum(prior->t[q], 3)); - break; - case TIM: case TII: - xi += mix[q] * (t[ts] + prior->t[q][ts]) / - (toti + prior->t[q][TIM] + prior->t[q][TII]); - break; - case TDM: case TDD: - xi += mix[q] * (t[ts] + prior->t[q][ts]) / - (totd + prior->t[q][TDM] + prior->t[q][TDD]); - break; - } - } - t[ts] = xi; - } - FNorm(t, 3); /* match */ - FNorm(t+3, 2); /* insert */ - FNorm(t+5, 2); /* delete */ -} - - -/* Function: default_amino_prior() - * - * Purpose: Set the default protein prior. - */ -static struct p7prior_s * -default_amino_prior(void) -{ - struct p7prior_s *pri; - int q, x; - /* default match mixture coefficients */ - static float defmq[9] = { - 0.178091, 0.056591, 0.0960191, 0.0781233, 0.0834977, - 0.0904123, 0.114468, 0.0682132, 0.234585 }; - - /* default match mixture Dirichlet components */ - static float defm[9][20] = { - { 0.270671, 0.039848, 0.017576, 0.016415, 0.014268, - 0.131916, 0.012391, 0.022599, 0.020358, 0.030727, - 0.015315, 0.048298, 0.053803, 0.020662, 0.023612, - 0.216147, 0.147226, 0.065438, 0.003758, 0.009621 }, - { 0.021465, 0.010300, 0.011741, 0.010883, 0.385651, - 0.016416, 0.076196, 0.035329, 0.013921, 0.093517, - 0.022034, 0.028593, 0.013086, 0.023011, 0.018866, - 0.029156, 0.018153, 0.036100, 0.071770, 0.419641 }, - { 0.561459, 0.045448, 0.438366, 0.764167, 0.087364, - 0.259114, 0.214940, 0.145928, 0.762204, 0.247320, - 0.118662, 0.441564, 0.174822, 0.530840, 0.465529, - 0.583402, 0.445586, 0.227050, 0.029510, 0.121090 }, - { 0.070143, 0.011140, 0.019479, 0.094657, 0.013162, - 0.048038, 0.077000, 0.032939, 0.576639, 0.072293, - 0.028240, 0.080372, 0.037661, 0.185037, 0.506783, - 0.073732, 0.071587, 0.042532, 0.011254, 0.028723 }, - { 0.041103, 0.014794, 0.005610, 0.010216, 0.153602, - 0.007797, 0.007175, 0.299635, 0.010849, 0.999446, - 0.210189, 0.006127, 0.013021, 0.019798, 0.014509, - 0.012049, 0.035799, 0.180085, 0.012744, 0.026466 }, - { 0.115607, 0.037381, 0.012414, 0.018179, 0.051778, - 0.017255, 0.004911, 0.796882, 0.017074, 0.285858, - 0.075811, 0.014548, 0.015092, 0.011382, 0.012696, - 0.027535, 0.088333, 0.944340, 0.004373, 0.016741 }, - { 0.093461, 0.004737, 0.387252, 0.347841, 0.010822, - 0.105877, 0.049776, 0.014963, 0.094276, 0.027761, - 0.010040, 0.187869, 0.050018, 0.110039, 0.038668, - 0.119471, 0.065802, 0.025430, 0.003215, 0.018742 }, - { 0.452171, 0.114613, 0.062460, 0.115702, 0.284246, - 0.140204, 0.100358, 0.550230, 0.143995, 0.700649, - 0.276580, 0.118569, 0.097470, 0.126673, 0.143634, - 0.278983, 0.358482, 0.661750, 0.061533, 0.199373 }, - { 0.005193, 0.004039, 0.006722, 0.006121, 0.003468, - 0.016931, 0.003647, 0.002184, 0.005019, 0.005990, - 0.001473, 0.004158, 0.009055, 0.003630, 0.006583, - 0.003172, 0.003690, 0.002967, 0.002772, 0.002686 }, - }; - - pri = P7AllocPrior(); - pri->strategy = PRI_DCHLET; - - /* Transition priors are subjective, but borrowed from GJM's estimations - * on Pfam - */ - pri->tnum = 1; - pri->tq[0] = 1.0; - pri->t[0][TMM] = 0.7939; - pri->t[0][TMI] = 0.0278; - pri->t[0][TMD] = 0.0135; - pri->t[0][TIM] = 0.1551; - pri->t[0][TII] = 0.1331; - pri->t[0][TDM] = 0.9002; - pri->t[0][TDD] = 0.5630; - - /* Match emission priors are a mixture Dirichlet, - * from Kimmen Sjolander (Blocks9) - */ - pri->mnum = 9; - for (q = 0; q < pri->mnum; q++) - { - pri->mq[q] = defmq[q]; - for (x = 0; x < 20; x++) - pri->m[q][x] = defm[q][x]; - } - - /* These insert emission priors are subjective. Observed frequencies - * were obtained from PFAM 1.0, 10 Nov 96; - * see ~/projects/plan7/InsertStatistics. - * Inserts are slightly biased towards polar residues and away from - * hydrophobic residues. - */ - pri->inum = 1; - pri->iq[0] = 1.; - pri->i[0][0] = 681.; /* A */ - pri->i[0][1] = 120.; /* C */ - pri->i[0][2] = 623.; /* D */ - pri->i[0][3] = 651.; /* E */ - pri->i[0][4] = 313.; /* F */ - pri->i[0][5] = 902.; /* G */ - pri->i[0][6] = 241.; /* H */ - pri->i[0][7] = 371.; /* I */ - pri->i[0][8] = 687.; /* K */ - pri->i[0][9] = 676.; /* L */ - pri->i[0][10] = 143.; /* M */ - pri->i[0][11] = 548.; /* N */ - pri->i[0][12] = 647.; /* P */ - pri->i[0][13] = 415.; /* Q */ - pri->i[0][14] = 551.; /* R */ - pri->i[0][15] = 926.; /* S */ - pri->i[0][16] = 623.; /* T */ - pri->i[0][17] = 505.; /* V */ - pri->i[0][18] = 102.; /* W */ - pri->i[0][19] = 269.; /* Y */ - - return pri; -} - - -/* Function: default_nucleic_prior() - * - * Purpose: Set the default DNA prior. (for now, almost a Laplace) - */ -static struct p7prior_s * -default_nucleic_prior(void) -{ - struct p7prior_s *pri; - - pri = P7AllocPrior(); - pri->strategy = PRI_DCHLET; - - /* The use of the Pfam-trained amino acid transition priors - * here is TOTALLY bogus. But it works better than a straight - * Laplace, esp. for Maxmodelmaker(). For example, a Laplace - * prior builds M=1 models for a single sequence GAATTC (at - * one time an open "bug"). - */ - pri->tnum = 1; - pri->tq[0] = 1.; - pri->t[0][TMM] = 0.7939; - pri->t[0][TMI] = 0.0278; - pri->t[0][TMD] = 0.0135; - pri->t[0][TIM] = 0.1551; - pri->t[0][TII] = 0.1331; - pri->t[0][TDM] = 0.9002; - pri->t[0][TDD] = 0.5630; - - pri->mnum = 1; - pri->mq[0] = 1.; - FSet(pri->m[0], Alphabet_size, 1.); - - pri->inum = 1; - pri->iq[0] = 1.; - FSet(pri->i[0], Alphabet_size, 1.); - - return pri; -} - diff --git a/forester/archive/RIO/others/hmmer/src/pvm.c b/forester/archive/RIO/others/hmmer/src/pvm.c deleted file mode 100644 index fd5b4bb..0000000 --- a/forester/archive/RIO/others/hmmer/src/pvm.c +++ /dev/null @@ -1,453 +0,0 @@ -/************************************************************ - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - ************************************************************/ - -/* pvm.c - * SRE, Wed Aug 5 15:40:09 1998 [St. Louis] - * - * PVM code shared amongst pvm masters and slaves. - * - * CVS $Id: pvm.c,v 1.1.1.1 2005/03/22 08:34:00 cmzmasek Exp $ - */ -#ifdef HMMER_PVM - -#include -#include -#include -#include -#include - -#include "version.h" -#include "structs.h" -#include "funcs.h" -#include "squid.h" -#include "sqfuncs.h" - -/* Function: PVMSpawnSlaves() - * Date: SRE, Wed Aug 19 14:01:39 1998 [St. Louis] - * - * Purpose: Spawn the slaves. - * We use the "speed" field for each host to - * determine how many tasks should be started - * on it. 1000 indicates a single processor; - * 2000 indicates a dual processor; etc. - * Since hmmpfam-pvm load balances automatically, - * the relative speed of the processor(s) is - * irrelevant. - * - * Args: slave - name of slave process to spawn ("hmmpfam-slave") - * ret_tid - RETURN: malloc'ed list of slave tid's. - * ret_nslaves - RETURN: total number of slaves. - * - * Returns: (void). - * caller must free() ret_tid. - */ -void -PVMSpawnSlaves(char *slave, int **ret_tid, int *ret_nslaves) -{ - struct pvmhostinfo *hostp; - int nodes; /* total number of nodes in the VM */ - int nslaves; /* RETURN: total number of slaves */ - int ntasks; /* number of tasks to start on this node */ - int code; /* a code returned from a PVM call */ - int *tid; /* array of slave task tids */ - int *dtid; /* array of host PVMD tids; for pvm_notify() */ - int i; - - SQD_DPRINTF1(("requesting PVM configuration...\n")); - if (pvm_config(&nodes, NULL, &hostp) != 0) Die("PVM not responding"); - dtid = MallocOrDie(sizeof(int) * nodes); - nslaves = 0; - for (i = 0; i < nodes; i++) - { - dtid[i] = hostp[i].hi_tid; - ntasks = hostp[i].hi_speed / 1000; - if (ntasks == 0) continue; - - if (nslaves == 0) tid = MallocOrDie(sizeof(int) * ntasks); - else tid = ReallocOrDie(tid, sizeof(int) * (ntasks+nslaves)); - code = pvm_spawn(slave, NULL, PvmTaskHost, hostp[i].hi_name, ntasks, tid + nslaves); - if (code < ntasks) { /* Careful error diagnostics. Important! */ - pvm_exit(); - switch (*(tid+nslaves)) { - case PvmBadParam: - Die("pvm_spawn claims PvmBadParam - code error?"); - case PvmNoHost: - Die("pvm_spawn: host %d (%s): not in virtual machine", - i, hostp[i].hi_name); - case PvmNoFile: - Die("pvm_spawn: host %d (%s): %s not in path", - i+1, hostp[i].hi_name, slave); - case PvmNoMem: - Die("pvm_spawn claims that host %s has insufficient memory", - hostp[i].hi_name); - case PvmSysErr: - Die("pvm_spawn: host %d (%s): pvmd not responding", - i+1, hostp[i].hi_name); - case PvmOutOfRes: - Die("pvm_spawn claims it is out of resources."); - default: - Die("Spawned too few slaves on node %s; expected %d got %d\n", hostp[i].hi_name, ntasks, code); - } - } - nslaves += ntasks; - SQD_DPRINTF1(("Spawned %d slaves on host %s...\n", ntasks, hostp[i].hi_name)); - } - if (nslaves == 0) { pvm_exit(); Die("No slaves were spawned"); } - - /* Arrange to be notified in case of trouble - */ - if (pvm_notify(PvmTaskExit, HMMPVM_TASK_TROUBLE, nslaves, tid) != 0) - { pvm_exit(); Die("pvm_notify() unexpectedly failed"); } - if (pvm_notify(PvmHostDelete, HMMPVM_HOST_TROUBLE, nodes, dtid) != 0) - { pvm_exit(); Die("pvm_notify() unexpectedly failed"); } - - *ret_tid = tid; - *ret_nslaves = nslaves; - free(dtid); - return; -} - -/* Function: PVMConfirmSlaves() - * Date: SRE, Mon Oct 26 17:31:42 1998 [St. Louis] - * - * Purpose: Make sure all the slaves initialized properly; - * after the master spawns and initializes them, - * they're supposed to send back a code. Valid - * codes are in structs.h and include: - * HMMPVM_OK everything's fine - * HMMPVM_NO_HMMFILE file not found (hmmpfam) - * HMMPVM_NO_INDEX no SSI file found (hmmpfam) - * HMMPVM_BAD_INIT miscellaneous error - * They also send back the RELEASE code, which - * must match the master. This was added as an - * integrity check for bug#1. - * - * Args: slave_tid array of nslaves TIDs - * nslaves number of slaves - * - * Returns: (void) - * If everything isn't OK, we Die() here. - */ -void -PVMConfirmSlaves(int *slave_tid, int nslaves) -{ - struct pvmhostinfo *hostp; - int nodes; - int i; - struct timeval tmout; - int code; /* code returned by slave */ - int bufid; - char *slaverelease; - - tmout.tv_sec = 5; /* wait 5 sec before giving up on a slave. */ - tmout.tv_usec = 0; - - SQD_DPRINTF1(("requesting PVM configuration...\n")); - if (pvm_config(&nodes, NULL, &hostp) != 0) Die("PVM not responding"); - SQD_DPRINTF1(("Slaves, count off!\n")); - for (i = 0; i < nslaves; i++) - { - /* Do a timeout receive. If we don't hear back pronto - * from our slaves, we've got a problem. - */ - if ((bufid = pvm_trecv(-1, HMMPVM_RESULTS, &tmout)) <= 0) - { - SQD_DPRINTF1(("Slave %d (%s) gives bufid %d.\n", i, hostp[i].hi_name, bufid)); - PVMKillSlaves(slave_tid, nslaves); - pvm_exit(); - Die("One or more slaves started but died before initializing."); - } - - SQD_DPRINTF1(("Slave %d (%s): present, sir!\n", i, hostp[i].hi_name)); - pvm_upkint(&code, 1, 1); - slaverelease = PVMUnpackString(); - - if (code != HMMPVM_OK) - { - PVMKillSlaves(slave_tid, nslaves); - pvm_exit(); - switch (code) { - case HMMPVM_NO_HMMFILE: - Die("One or more PVM slaves couldn't open hmm file. Check installation."); - case HMMPVM_NO_INDEX: - Die("One or more PVM slaves couldn't open SSI index for hmm file. Check installation."); - case HMMPVM_BAD_INIT: - Die("One or more PVM slaves reports a failure to initialize."); - default: - Die("Unknown error code. A slave is confused."); - } - } - - if (strcmp(slaverelease, RELEASE) != 0) - { - PVMKillSlaves(slave_tid, nslaves); - pvm_exit(); - Die("Slave %d reports that it's running release %s, which doesn't match the master (%s)", i, slaverelease, RELEASE); - } - } -} - - - -/* Function: PVMCheckSlaves() - * Date: SRE, Fri Aug 14 09:04:25 1998 [St. Louis] - * - * Purpose: Make sure all the slaves are alive. If they - * aren't, kill the rest, and die. - * - * Args: slave_tid - array of slave TIDs - * nslaves - number of slaves - * - * Returns: void - */ -void -PVMCheckSlaves(int *slave_tid, int nslaves) -{ - int trouble; /* non-zero if a trouble message is waiting */ - - trouble = pvm_nrecv(-1, HMMPVM_TASK_TROUBLE); - if (trouble > 0) - { - PVMKillSlaves(slave_tid, nslaves); - pvm_exit(); Die("One or more slave tasks exited prematurely. Shutting down."); - } - trouble = pvm_nrecv(-1, HMMPVM_HOST_TROUBLE); - if (trouble > 0) - { - PVMKillSlaves(slave_tid, nslaves); - pvm_exit(); Die("One or more hosts left the PVM unexpectedly. Shutting down."); - } -} - -/* Function: PVMKillSlaves() - * Date: SRE, Thu Aug 13 16:27:40 1998 [St. Louis] - * - * Purpose: shut down the slaves, after a fatal error. - * - * Args: slave_tid - array of slave tids - * nslaves - number of slaves - * - * Returns: void - */ -void -PVMKillSlaves(int *slave_tid, int nslaves) -{ - int i; - - for (i = 0; i < nslaves; i++) - if (pvm_kill(slave_tid[i]) != 0) - Warn("a slave refuses to die"); - return; -} - - -/* Function: PVMPackString() - * Date: SRE, Tue Aug 18 14:08:05 1998 [St. Louis] - * - * Purpose: pack a variable length string for sending over PVM, - * sending its length first so the receiver can - * malloc appropriately. - * - * Args: s - the string to send - * - * Returns: 1 on success. 0 on failure. - */ -int -PVMPackString(char *s) -{ - int len; - - len = (s == NULL) ? -1 : strlen(s); - if (pvm_pkint(&len, 1, 1) != 0) return 0; - if (len >= 0) - if (pvm_pkstr(s) != 0) return 0; - return 1; -} - -/* Function: PVMUnpackString() - * Date: SRE, Tue Aug 18 14:11:04 1998 [St. Louis] - * - * Purpose: unpack a string. - * - * Args: (void) - * - * Returns: ptr to string. - */ -char * -PVMUnpackString(void) -{ - int len; - char *s; - - if (pvm_upkint(&len, 1, 1) != 0) return NULL; - if (len == -1) return NULL; - - s = MallocOrDie(sizeof(char) * (len+1)); - if (pvm_upkstr(s) != 0) return NULL; - return s; -} - - -/* Function: PVMPackTrace() - * Date: SRE, Wed Aug 5 15:41:36 1998 [St. Louis] - * - * Purpose: Pack a trace structure for a PVM send. - * The caller is responsible for calling pvm_initsend() before, - * and pvm_send() after packing. - * - * Args: tr - the trace structure to pack. - * - * Returns: 1 on success, 0 on failure. - */ -int -PVMPackTrace(struct p7trace_s *tr) -{ - if (pvm_pkint(&(tr->tlen), 1, 1) < 0) return 0; - if (pvm_pkbyte(tr->statetype, tr->tlen, 1) < 0) return 0; - if (pvm_pkint(tr->nodeidx, tr->tlen, 1) < 0) return 0; - if (pvm_pkint(tr->pos, tr->tlen, 1) < 0) return 0; - return 1; -} - -/* Function: PVMUnpackTrace() - * Date: SRE, Wed Aug 5 15:51:03 1998 [St. Louis] - * - * Purpose: Unpack a trace structure from a PVM send. - * Caller is responsible for calling for a pvm_recv() - * before calling this. - * - * Args: none. - * - * Returns: ptr to alloc'ed trace, or NULL on failure. - * caller free's returned trace with P7FreeTrace(). - */ -struct p7trace_s * -PVMUnpackTrace(void) -{ - struct p7trace_s *tr; - int tlen; - - pvm_upkint(&tlen, 1, 1); - P7AllocTrace(tlen, &tr); - if (pvm_upkbyte(tr->statetype, tlen, 1) < 0) { P7FreeTrace(tr); return NULL;} - if (pvm_upkint(tr->nodeidx, tlen, 1) < 0) { P7FreeTrace(tr); return NULL;} - if (pvm_upkint(tr->pos, tlen, 1) < 0) { P7FreeTrace(tr); return NULL;} - tr->tlen = tlen; - return tr; -} - - -/* Function: PVMPackHMM() - * Date: SRE, Tue Aug 18 11:47:44 1998 [St. Louis] - * - * Purpose: Pack an HMM for sending over PVM. - * - * Args: hmm - the HMM to send. - * - * Returns: 1 on success, 0 on failure - */ -int -PVMPackHMM(struct plan7_s *hmm) -{ - int k; - int sendflags; /* HMM flags to send */ - - sendflags = hmm->flags; - sendflags &= ~PLAN7_HASBITS; /* no log odds scores sent */ - sendflags &= ~PLAN7_HASDNA; /* no DNA scores sent */ - - if (pvm_pkint(&(hmm->M), 1, 1) != 0) return 0; - if (pvm_pkint(&sendflags, 1, 1) != 0) return 0; - if (! PVMPackString(hmm->name)) return 0; - if (hmm->flags & PLAN7_DESC) { if (!PVMPackString(hmm->desc)) return 0; } - if (hmm->flags & PLAN7_RF) { if (!PVMPackString(hmm->rf)) return 0; } - if (hmm->flags & PLAN7_CS) { if (!PVMPackString(hmm->cs)) return 0; } - if (! PVMPackString(hmm->comlog)) return 0; - if (pvm_pkint(&(hmm->nseq), 1, 1) != 0) return 0; - if (!PVMPackString(hmm->ctime)) return 0; - if (hmm->flags & PLAN7_MAP) { if (pvm_pkint(hmm->map, hmm->M+1, 1) != 0) return 0; } - if (pvm_pkint(&(hmm->checksum), 1, 1) != 0) return 0; - - for (k = 1; k < hmm->M; k++) - if (pvm_pkfloat(hmm->t[k], 7, 1) != 0) return 0; - for (k = 1; k <= hmm->M; k++) - if (pvm_pkfloat(hmm->mat[k], Alphabet_size, 1) != 0) return 0; - for (k = 1; k < hmm->M; k++) - if (pvm_pkfloat(hmm->ins[k], Alphabet_size, 1) != 0) return 0; - if (pvm_pkfloat(&(hmm->tbd1), 1, 1) != 0) return 0; - for (k = 0; k < 4; k++) - if (pvm_pkfloat(hmm->xt[k], 2, 1) != 0) return 0; - if (pvm_pkfloat(hmm->begin, hmm->M+1, 1) != 0) return 0; - if (pvm_pkfloat(hmm->end, hmm->M+1, 1) != 0) return 0; - if (pvm_pkfloat(hmm->null, Alphabet_size, 1) != 0) return 0; - if (pvm_pkfloat(&(hmm->p1), 1, 1) != 0) return 0; - if (hmm->flags & PLAN7_STATS) - { - if (pvm_pkfloat(&(hmm->mu), 1, 1) != 0) return 0; - if (pvm_pkfloat(&(hmm->lambda), 1, 1) != 0) return 0; - } - return 1; -} - - -/* Function: PVMUnpackHMM() - * Date: SRE, Tue Aug 18 13:56:13 1998 [St. Louis] - * - * Purpose: Unpack an HMM from PVM. - * - * Args: (void) - * - * Returns: ptr to HMM, or NULL - */ -struct plan7_s * -PVMUnpackHMM(void) -{ - struct plan7_s *hmm; - int k; - int M; - - if (pvm_upkint(&(M), 1, 1) != 0) return NULL; - hmm = AllocPlan7(M); - - if (pvm_upkint(&(hmm->flags), 1, 1) != 0) return NULL; - if ((hmm->name = PVMUnpackString()) == NULL) return NULL; - if (hmm->flags & PLAN7_DESC) { if ((hmm->desc = PVMUnpackString()) == NULL) return NULL; } - if (hmm->flags & PLAN7_RF) { if ((hmm->rf = PVMUnpackString()) == NULL) return NULL; } - if (hmm->flags & PLAN7_CS) { if ((hmm->cs = PVMUnpackString()) == NULL) return NULL; } - - if ((hmm->comlog = PVMUnpackString()) == NULL) return NULL; - if (pvm_upkint(&(hmm->nseq), 1, 1) != 0) return NULL; - if ((hmm->ctime = PVMUnpackString()) == NULL) return NULL; - if (hmm->flags & PLAN7_MAP) { if (pvm_upkint(hmm->map, hmm->M+1, 1) != 0) return NULL; } - if (pvm_upkint(&(hmm->checksum), 1, 1) != 0) return NULL; - - for (k = 1; k < hmm->M; k++) - if (pvm_upkfloat(hmm->t[k], 7, 1) != 0) return NULL; - for (k = 1; k <= hmm->M; k++) - if (pvm_upkfloat(hmm->mat[k], Alphabet_size, 1) != 0) return NULL; - for (k = 1; k < hmm->M; k++) - if (pvm_upkfloat(hmm->ins[k], Alphabet_size, 1) != 0) return NULL; - if (pvm_upkfloat(&(hmm->tbd1), 1, 1) != 0) return NULL; - for (k = 0; k < 4; k++) - if (pvm_upkfloat(hmm->xt[k], 2, 1) != 0) return NULL; - if (pvm_upkfloat(hmm->begin, hmm->M+1, 1) != 0) return NULL; - if (pvm_upkfloat(hmm->end, hmm->M+1, 1) != 0) return NULL; - if (pvm_upkfloat(hmm->null, Alphabet_size, 1) != 0) return NULL; - if (pvm_upkfloat(&(hmm->p1), 1, 1) != 0) return NULL; - if (hmm->flags & PLAN7_STATS) - { - if (pvm_upkfloat(&(hmm->mu), 1, 1) != 0) return NULL; - if (pvm_upkfloat(&(hmm->lambda), 1, 1) != 0) return NULL; - } - return hmm; -} - - -#endif /* HMMER_PVM */ diff --git a/forester/archive/RIO/others/hmmer/src/states.c b/forester/archive/RIO/others/hmmer/src/states.c deleted file mode 100644 index 0fb7e50..0000000 --- a/forester/archive/RIO/others/hmmer/src/states.c +++ /dev/null @@ -1,444 +0,0 @@ -/************************************************************ - * HMMER - Biological sequence analysis with profile-HMMs - * Copyright (C) 1992-1997 Sean R. Eddy - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and - * GNULICENSE for details. - * - ************************************************************/ - -/* states.c - * - * alloc, free, and initialization of state structures - */ - -#include -#include -#include -#include -#include "squid.h" -#include "config.h" -#include "structs.h" -#include "funcs.h" - -#ifdef MEMDEBUG -#include "dbmalloc.h" -#endif - - -struct hmm_struc * -AllocHMM(int M) /* length of model to make */ -{ - struct hmm_struc *hmm; /* RETURN: blank HMM */ - - hmm = (struct hmm_struc *) MallocOrDie (sizeof(struct hmm_struc)); - hmm->ins = (struct basic_state *) MallocOrDie (sizeof(struct basic_state) * (M+2)); - hmm->del = (struct basic_state *) MallocOrDie (sizeof(struct basic_state) * (M+2)); - hmm->mat = (struct basic_state *) MallocOrDie (sizeof(struct basic_state) * (M+2)); - hmm->ref = (char *) MallocOrDie ((M+2) * sizeof(char)); - hmm->cs = (char *) MallocOrDie ((M+2) * sizeof(char)); - hmm->xray = (float *) MallocOrDie ((M+2) * sizeof(float) * NINPUTS); - hmm->M = M; - hmm->name = Strdup("unnamed"); /* name is not optional. */ - - hmm->flags = 0; - ZeroHMM(hmm); - return hmm; -} - -/* Function: ZeroHMM() - * - * Purpose: Zero emission and transition counts in an HMM. - */ -void -ZeroHMM(struct hmm_struc *hmm) -{ - int k, ts, idx; - - for (k = 0; k <= hmm->M+1; k++) - { - for (ts = 0; ts < 3; ts++) - { - hmm->mat[k].t[ts] = 0.0; - hmm->ins[k].t[ts] = 0.0; - hmm->del[k].t[ts] = 0.0; - } - for (idx = 0; idx < Alphabet_size; idx++) - { - hmm->mat[k].p[idx] = 0.0; - hmm->ins[k].p[idx] = 0.0; - hmm->del[k].p[idx] = 0.0; - } - } -} - - -/* Function: LogifyHMM() - * - * Purpose: Convert a probability-form HMM to log probabilities. - * Best to do this on a modifiable copy of an HMM. - */ -void -LogifyHMM(struct hmm_struc *hmm) -{ - int k, ts, idx; - - for (k = 0; k <= hmm->M+1; k++) - { - for (ts = 0; ts < 3; ts++) - { - hmm->mat[k].t[ts] = sreLOG2(hmm->mat[k].t[ts]); - hmm->ins[k].t[ts] = sreLOG2(hmm->ins[k].t[ts]); - hmm->del[k].t[ts] = sreLOG2(hmm->del[k].t[ts]); - } - for (idx = 0; idx < Alphabet_size; idx++) - { - hmm->mat[k].p[idx] = sreLOG2(hmm->mat[k].p[idx]); - hmm->ins[k].p[idx] = sreLOG2(hmm->ins[k].p[idx]); - } - } -} - -/* Function: LogoddsifyHMM() - * - * Convert a probability form HMM to log odds scores. - * Best to do this on a modifiable copy of an HMM. - */ -void -LogoddsifyHMM(struct hmm_struc *hmm) -{ - int k, ts, x; - - for (k = 0; k <= hmm->M+1; k++) - { - for (ts = 0; ts < 3; ts++) - { - hmm->mat[k].t[ts] = sreLOG2(hmm->mat[k].t[ts]); - hmm->ins[k].t[ts] = sreLOG2(hmm->ins[k].t[ts]); - hmm->del[k].t[ts] = sreLOG2(hmm->del[k].t[ts]); - } - for (x = 0; x < Alphabet_size; x++) - { - hmm->mat[k].p[x] = sreLOG2(hmm->mat[k].p[x]) - sreLOG2(hmm->null[x]); - hmm->ins[k].p[x] = sreLOG2(hmm->ins[k].p[x]) - sreLOG2(hmm->null[x]); - } - } -} - - -/* Function: WriteFlatPriorHMM() - * - * Purpose: Fill an HMM with expected probabilities according - * to a given prior. Used to construct "flat" initial - * models for hmmt. - */ -int -WriteFlatPriorHMM(struct hmm_struc *hmm, struct prior_s *prior) -{ - int k; /* counter across model */ - int q; /* counter over mixtures */ - int x; /* counter over symbols or transitions */ - float malpha; /* alpha for mixture */ - float ialpha; /* alpha for insert mixture */ - float dalpha; /* alpha for delete mixture */ - - for (k = 0; k <= hmm->M; k++) - { - /* xray info for structure prior */ - if (prior->strategy == PRI_STRUCT) - { - hmm->xray[k*NINPUTS + XRAY_bias] = 1.0; - hmm->xray[k*NINPUTS + XRAY_E] = 0.0; - hmm->xray[k*NINPUTS + XRAY_H] = 0.0; - hmm->xray[k*NINPUTS + XRAY_SA] = 0.0; - } - /* match symbol emissions */ - for (x = 0; x < Alphabet_size; x++) - hmm->mat[k].p[x] = 0.0; - if (k > 0) - for (q = 0; q < prior->mnum; q++) - { - if (prior->strategy == PRI_STRUCT) - prior->mq[q] = 1.0 / prior->mnum; - malpha = 0.0; - for (x = 0; x < Alphabet_size; x++) - malpha += prior->mat[q][x]; - for (x = 0; x < Alphabet_size; x++) - hmm->mat[k].p[x] += prior->mq[q] * prior->mat[q][x] / malpha; - } - /* insert emissions */ - for (x = 0; x < Alphabet_size; x++) - hmm->ins[k].p[x] = 0.0; - for (q = 0; q < prior->inum; q++) - { - if (prior->strategy == PRI_STRUCT) - prior->iq[q] = 1.0 / prior->inum; - ialpha = 0.0; - for (x = 0; x < Alphabet_size; x++) - ialpha += prior->ins[q][x]; - for (x = 0; x < Alphabet_size; x++) - hmm->ins[k].p[x] += prior->iq[q] * prior->ins[q][x] / ialpha; - } - - /* state transitions */ - for (x = 0; x < 3; x++) - hmm->mat[k].t[x] = hmm->ins[k].t[x] = hmm->del[k].t[x] = 0.0; - for (q = 0; q < prior->tnum; q++) - { - if (prior->strategy == PRI_STRUCT) - prior->tq[q] = 1.0 / prior->tnum; - malpha = ialpha = dalpha = 0.0; - for (x = 0; x < 3; x++) - { - malpha += prior->tm[q][x]; - ialpha += prior->ti[q][x]; - dalpha += prior->td[q][x]; - } - for (x = 0; x < 3; x++) - { - hmm->mat[k].t[x] += prior->tq[q] * prior->tm[q][x] / malpha; - hmm->ins[k].t[x] += prior->tq[q] * prior->ti[q][x] / ialpha; - if (k > 0) hmm->del[k].t[x] += prior->tq[q] * prior->td[q][x] / dalpha; - } - } - } - /* the final state never transits to d+1 */ - hmm->mat[hmm->M].t[DELETE] = 0.0; - hmm->ins[hmm->M].t[DELETE] = 0.0; - hmm->del[hmm->M].t[DELETE] = 0.0; - Renormalize(hmm); - return 1; -} - - -/* Function: HMMDup() - * - * Purpose: Create a duplicate copy of an HMM. - * - * Return: Pointer to the duplicate. - * Caller is responsible for free'ing the duplicate. - */ -struct hmm_struc * -HMMDup(struct hmm_struc *hmm) -{ - struct hmm_struc *newhmm; - - if ((newhmm = AllocHMM(hmm->M)) == NULL) - Die("AllocHMM() failed"); - HMMCopy(newhmm, hmm); - return newhmm; -} - - -/* Function: HMMCopy() - * - * Purpose: Make a copy of hmm2 in hmm1. - * - * Return: (void) - * Caller promises that hmm1 and hmm2 have identical architectures. - */ -void -HMMCopy(struct hmm_struc *hmm1, struct hmm_struc *hmm2) -{ - int k, x, ts; - - hmm1->flags = hmm2->flags; - if (hmm1->name != NULL) free(hmm1->name); - hmm1->name = Strdup(hmm2->name); - - if (hmm2->flags & HMM_REF) strcpy(hmm1->ref, hmm2->ref); - if (hmm2->flags & HMM_CS) strcpy(hmm1->cs, hmm2->cs); - if (hmm2->flags & HMM_XRAY) - memcpy(hmm1->xray, hmm2->xray, NINPUTS * (hmm2->M+2) * sizeof(float)); - memcpy(hmm1->null, hmm2->null, sizeof(float) * Alphabet_size); - - for (k = 0; k <= hmm2->M+1; k++) - { - /* copy transition T's */ - for (ts = 0; ts < 3; ts++) - { - hmm1->mat[k].t[ts] = hmm2->mat[k].t[ts]; - hmm1->ins[k].t[ts] = hmm2->ins[k].t[ts]; - hmm1->del[k].t[ts] = hmm2->del[k].t[ts]; - } - /* copy symbol P tables */ - for (x = 0; x < Alphabet_size; x++) - { - hmm1->mat[k].p[x] = hmm2->mat[k].p[x]; - hmm1->ins[k].p[x] = hmm2->ins[k].p[x]; - } - } - return; -} - - -int -FreeHMM(struct hmm_struc *hmm) -{ - if (hmm == NULL) return 0; - free(hmm->ref); - free(hmm->cs); - free(hmm->xray); - free(hmm->name); - if (hmm->mat != NULL) free (hmm->mat); - if (hmm->ins != NULL) free (hmm->ins); - if (hmm->del != NULL) free (hmm->del); - free(hmm); - return 1; -} - - -struct shmm_s * -AllocSearchHMM(int M) -{ - struct shmm_s *shmm; - int x; - - if ((shmm = (struct shmm_s *) malloc (sizeof(struct shmm_s))) == NULL) - Die("malloc failed"); - for (x = 0; x < 26; x++) - if ((shmm->m_emit[x] = (int *) calloc (M+1, sizeof(int))) == NULL || - (shmm->i_emit[x] = (int *) calloc (M+1, sizeof(int))) == NULL) - Die("malloc failed"); - if ((shmm->t = (int *) malloc (sizeof(int) * (9*(M+1)))) == NULL || - (shmm->ref = (char *) malloc (sizeof(char) * (M+2))) == NULL || - (shmm->cs = (char *) malloc (sizeof(char) * (M+2))) == NULL) - Die("malloc failed"); - shmm->flags = 0; - shmm->name = Strdup("nameless"); - shmm->M = M; - return shmm; -} - -void -FreeSearchHMM(struct shmm_s *shmm) -{ - int x; - - for (x = 0; x < 26; x++) - { - free(shmm->m_emit[x]); - free(shmm->i_emit[x]); - } - free(shmm->t); - free(shmm->ref); - free(shmm->cs); - free(shmm->name); - free(shmm); -} - - -/* Function: CountSymbol() - * - * Purpose: Given an observed symbol, and a number of counts to - * distribute (typically just 1.0), bump the appropriate counter(s). - * - * This is completely trivial only so long as the symbols - * always come from the expected alphabet; since we also - * have to deal with degenerate symbols for both nucleic - * acid and protein languages, we make a function to deal - * with this. - * - * Args: sym - observed symbol, e.g. `A' or `X' - * wt - number of counts to distribute (e.g. 1.0) - * counters - array of 4 or 20 counters to increment - * - * Return: Returns 1 on success and bumps the necessary counters. - * Returns 0 on failure and bumps each counter evenly, as - * if it saw a completely ambiguous symbol; this lets - * the caller silently accept garbage symbols, if it cares to. - */ -int -CountSymbol(char sym, float wt, float *counters) -{ - char *sptr; /* pointer into symbol in hmm->alphabet */ - int status; /* RETURN: status; did we recognize the symbol? */ - char symidx; /* index of symbol in Alphabet_iupac */ - - if ((sptr = strchr(Alphabet,sym)) != NULL) - { - symidx = (char) (sptr - Alphabet); - status = 1; - } - else - { - symidx = (char) (Alphabet_iupac - 1); - Warn("unrecognized character %c in CountSymbol()\n", sym); - status = 0; - } - P7CountSymbol(counters, symidx, wt); - return status; -} - - -/* Function: HMMDistance() - * - * Purpose: Test two models for how different they are, using - * a simple squared difference measure on all homologous - * parameters. They must have the same architecture: - * i.e. check that newhmm->M == oldhmm->M before calling. - * - * Args: newhmm - new HMM, probability form - * oldhmm - old HMM, probability form - * - * Return: distance. - */ -float -HMMDistance(struct hmm_struc *newhmm, struct hmm_struc *oldhmm) -{ - int k,x, ts; - float distance = 0.0; - - for (k = 0; k <= newhmm->M; k++) - { - /* state transition distances */ - if (k > 0) - { - for (ts = 0; ts < 3; ts++) - distance += SQR( 100. * (newhmm->del[k].t[ts] - oldhmm->del[k].t[ts])); - } - for (ts = 0; ts < 3; ts++) - distance += SQR( 100. * (newhmm->mat[k].t[ts] - oldhmm->mat[k].t[ts])); - for (ts = 0; ts < 3; ts++) - distance += SQR( 100. * (newhmm->ins[k].t[ts] - oldhmm->ins[k].t[ts])); - - /* symbol emission distances */ - if (k > 0) - for (x = 0; x < Alphabet_size; x++) - distance += SQR( 100. * (newhmm->mat[k].p[x] - oldhmm->mat[k].p[x])); - for (x = 0; x < Alphabet_size; x++) - distance += SQR( 100. * (newhmm->ins[k].p[x] - oldhmm->ins[k].p[x])); - } - distance = sqrt(distance) / newhmm->M; - return distance; -} - - - - -/* Function: Renormalize() - * - * Normalize all P distributions so they sum to 1. - * P distributions that are all 0, or contain negative - * probabilities, are left untouched. - * - * Returns 1 on success, or 0 on failure. - */ -void -Renormalize(struct hmm_struc *hmm) -{ - int k; /* counter for states */ - - for (k = 0; k <= hmm->M ; k++) - { - /* match state transition frequencies */ - FNorm(hmm->mat[k].t, 3); - FNorm(hmm->ins[k].t, 3); - if (k > 0) FNorm(hmm->del[k].t, 3); - - if (k > 0) FNorm(hmm->mat[k].p, Alphabet_size); - FNorm(hmm->ins[k].p, Alphabet_size); - } -} - diff --git a/forester/archive/RIO/others/hmmer/src/structs.h b/forester/archive/RIO/others/hmmer/src/structs.h deleted file mode 100644 index 105ebec..0000000 --- a/forester/archive/RIO/others/hmmer/src/structs.h +++ /dev/null @@ -1,564 +0,0 @@ -/************************************************************ - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - ************************************************************/ - -/* structs.h - * - * Data structures used in HMMER. - * Also, a few miscellaneous macros and global variable declarations. - * - * RCS $Id: structs.h,v 1.1.1.1 2005/03/22 08:34:01 cmzmasek Exp $ - */ - -#ifndef STRUCTSH_INCLUDED -#define STRUCTSH_INCLUDED - -#include "squid.h" -#include "config.h" -#include "ssi.h" - -/* Miscellaneous math macros used in the package - */ -#define sreLOG2(x) ((x) > 0 ? log(x) * 1.44269504 : -9999.) -#define sreEXP2(x) (exp((x) * 0.69314718 )) -#define SQR(x) ((x) * (x)) - -/* an idiom for determining a symbol's position in the array - * by pointer arithmetic. - * does no error checking, so caller must already be damned sure x is - * valid in the alphabet! - */ -#define SYMIDX(x) (strchr(Alphabet, (x)) - Alphabet) - -/* The symbol alphabet. - * Must deal with IUPAC degeneracies. Nondegenerate symbols - * come first in Alphabet[], followed by degenerate symbols. - * Nucleic alphabet also must deal with other common symbols - * like U (in RNA) and X (often misused for N). - * Example: - * Nucleic: "ACGTUNRYMKSWHBVDX" size=4 iupac=17 - * Amino: "ACDEFGHIKLMNPQRSTVWYBZX" size=20 iupac=23 - * - * Parts of the code assume that the last symbol is a - * symbol for an unknown residue, i.e. 'X'. - * - * MAXCODE and MAXABET constants are defined in config.h - */ -extern char Alphabet[MAXCODE]; /* "ACDEFGHIKLMNPQRSTVWYBZX" for example */ -extern int Alphabet_type; /* hmmNUCLEIC or hmmAMINO */ -extern int Alphabet_size; /* uniq alphabet size: 4 or 20 */ -extern int Alphabet_iupac; /* total size of alphabet + IUPAC degen. */ -extern char Degenerate[MAXCODE][MAXABET]; -extern int DegenCount[MAXCODE]; -#define hmmNOTSETYET 0 -#define hmmNUCLEIC 2 /* compatibility with squid's kRNA */ -#define hmmAMINO 3 /* compatibility with squid's kAmino */ - -/********************************************************************** - * - * Plan7 - * Implementation of the new Plan7 HMM architecture. - * Fully probabilistic even for hmmsw, hmmls, and hmmfs; - * No insert->delete or delete->insert transitions; - * Improved structure layout. - * - * The strategy is to infiltrate plan7 code into HMMER in - * an evolutionary rather than revolutionary manner. - * - **********************************************************************/ - -/* Plan 7 construction strategies. - */ -enum p7_construction { - P7_MAP_CONSTRUCTION, /* maximum a posteriori architecture */ - P7_HAND_CONSTRUCTION, /* hand specified architecture */ - P7_FAST_CONSTRUCTION /* fast ad hoc architecture */ -}; - -/* Plan 7 parameter optimization strategies - */ -enum p7_param { - P7_MAP_PARAM, /* standard maximum a posteriori */ - P7_MD_PARAM, /* maximum discrimination */ - P7_MRE_PARAM, /* maximum relative entropy */ - P7_WMAP_PARAM /* ad hoc weighted MAP */ -}; - -/* Structure: plan7_s - * - * Declaration of a Plan 7 profile-HMM. - */ -struct plan7_s { - /* Annotation on the model. A name is mandatory. - * Other fields are optional; whether they are present is - * flagged in the stateflags bit array. - * - * desc is only valid if PLAN7_DESC is set in flags. - * acc is only valid if PLAN7_ACC is set in flags. - * rf is only valid if PLAN7_RF is set in flags. - * cs is only valid if PLAN7_CS is set in flags. - * ca is only valid if PLAN7_CA is set in flags. - * map is only valid if PLAN7_MAP is set in flags. - */ - char *name; /* name of the model +*/ - char *acc; /* accession number of model (Pfam) +*/ - char *desc; /* brief description of model +*/ - char *rf; /* reference line from alignment 0..M +*/ - char *cs; /* consensus structure line 0..M +*/ - char *ca; /* consensus accessibility line 0..M */ - char *comlog; /* command line(s) that built model +*/ - int nseq; /* number of training sequences +*/ - char *ctime; /* creation date +*/ - int *map; /* map of alignment cols onto model 1..M+*/ - int checksum; /* checksum of training sequences +*/ - - /* The following are annotations added to support work by Michael Asman, - * CGR Stockholm. They are not stored in model files; they are only - * used in model construction. - * - * #=GC X-PRM (PRT,PRI) annotation is picked up by hmmbuild and interpreted - * as specifying which mixture Dirichlet component to use. If these flags - * are non-NULL, the normal mixture Dirichlet code is bypassed, and a - * single specific Dirichlet is used at each position. - */ - int *tpri; /* which transition mixture prior to use */ - int *mpri; /* which match mixture prior to use */ - int *ipri; /* which insert mixture prior to use */ - - /* Pfam-specific score cutoffs. - * - * ga1, ga2 are valid if PLAN7_GA is set in flags. - * tc1, tc2 are valid if PLAN7_TC is set in flags. - * nc1, nc2 are valid if PLAN7_NC is set in flags. - */ - float ga1, ga2; /* per-seq/per-domain gathering thresholds (bits) +*/ - float tc1, tc2; /* per-seq/per-domain trusted cutoff (bits) +*/ - float nc1, nc2; /* per-seq/per-domain noise cutoff (bits) +*/ - - /* The main model in probability form: data-dependent probabilities. - * This is the core Krogh/Haussler model. - * Transition probabilities are usually accessed as a - * two-D array: hmm->t[k][TMM], for instance. They are allocated - * such that they can also be stepped through in 1D by pointer - * manipulations, for efficiency in DP algorithms. - */ - int M; /* length of the model (# nodes) +*/ - float **t; /* transition prob's. t[1..M-1][0..6] +*/ - float **mat; /* match emissions. mat[1..M][0..19] +*/ - float **ins; /* insert emissions. ins[1..M-1][0..19] +*/ - float tbd1; /* B->D1 prob (data dependent) +*/ - - /* The unique states of Plan 7 in probability form. - * These are the algorithm-dependent, data-independent probabilities. - * Some parts of the code may briefly use a trick of copying tbd1 - * into begin[0]; this makes it easy to call FChoose() or FNorm() - * on the resulting vector. However, in general begin[0] is not - * a valid number. - */ - float xt[4][2]; /* N,E,C,J extra states: 2 transitions +*/ - float *begin; /* 1..M B->M state transitions +*/ - float *end; /* 1..M M->E state transitions (!= a dist!) +*/ - - /* The null model probabilities. - */ - float null[MAXABET]; /* "random sequence" emission prob's +*/ - float p1; /* null model loop probability +*/ - - /* The model in log-odds score form. - * These are created from the probabilities by LogoddsifyHMM(). - * By definition, null[] emission scores are all zero. - * Note that emission distributions are over 26 upper-case letters, - * not just the unambiguous protein or DNA alphabet: we - * precalculate the scores for all IUPAC degenerate symbols we - * may see. Non-IUPAC symbols simply have a -INFTY score. - * Note the reversed indexing on msc and isc -- for efficiency reasons. - * - * Only valid if PLAN7_HASBITS is set. - */ - int **tsc; /* transition scores [1.M-1][0.6] -*/ - int **msc; /* match emission scores [0.MAXCODE-1][1.M] -*/ - int **isc; /* ins emission scores [0.MAXCODE-1][1.M-1] -*/ - int xsc[4][2]; /* N,E,C,J transitions -*/ - int *bsc; /* begin transitions [1.M] -*/ - int *esc; /* end transitions [1.M] -*/ - - /* DNA translation scoring parameters - * For aligning protein Plan7 models to DNA sequence. - * Lookup value for a codon is calculated by pos1 * 16 + pos2 * 4 + pos3, - * where 'pos1' is the digitized value of the first nucleotide position; - * if any of the positions are ambiguous codes, lookup value 64 is used - * (which will generally have a score of zero) - * - * Only valid if PLAN7_HASDNA is set. - */ - int **dnam; /* triplet match scores [0.64][1.M] -*/ - int **dnai; /* triplet insert scores [0.64][1.M] -*/ - int dna2; /* -1 frameshift, doublet emission, M or I -*/ - int dna4; /* +1 frameshift, doublet emission, M or I -*/ - - /* P-value and E-value statistical parameters - * Only valid if PLAN7_STATS is set. - */ - float mu; /* EVD mu +*/ - float lambda; /* EVD lambda +*/ - - int flags; /* bit flags indicating state of HMM, valid data +*/ -}; - -/* Flags for plan7->flags. - * Note: Some models have scores but no probabilities (for instance, - * after reading from an HMM save file). Other models have - * probabilities but no scores (for instance, during training - * or building). Since it costs time to convert either way, - * I use PLAN7_HASBITS and PLAN7_HASPROB flags to defer conversion - * until absolutely necessary. This means I have to be careful - * about keeping these flags set properly when I fiddle a model. - */ -#define PLAN7_HASBITS (1<<0) /* raised if model has log-odds scores */ -#define PLAN7_DESC (1<<1) /* raised if description exists */ -#define PLAN7_RF (1<<2) /* raised if #RF annotation available */ -#define PLAN7_CS (1<<3) /* raised if #CS annotation available */ -#define PLAN7_XRAY (1<<4) /* raised if structural data available */ -#define PLAN7_HASPROB (1<<5) /* raised if model has probabilities */ -#define PLAN7_HASDNA (1<<6) /* raised if protein HMM->DNA seq params set*/ -#define PLAN7_STATS (1<<7) /* raised if EVD parameters are available */ -#define PLAN7_MAP (1<<8) /* raised if alignment map is available */ -#define PLAN7_ACC (1<<9) /* raised if accession number is available */ -#define PLAN7_GA (1<<10) /* raised if gathering thresholds available */ -#define PLAN7_TC (1<<11) /* raised if trusted cutoffs available */ -#define PLAN7_NC (1<<12) /* raised if noise cutoffs available */ -#define PLAN7_CA (1<<13) /* raised if surface accessibility avail. */ - -/* Indices for special state types, I: used for dynamic programming xmx[][] - * mnemonic: eXtra Matrix for B state = XMB - */ -#define XMB 0 -#define XME 1 -#define XMC 2 -#define XMJ 3 -#define XMN 4 - -/* Indices for special state types, II: used for hmm->xt[] indexing - * mnemonic: eXtra Transition for N state = XTN - */ -#define XTN 0 -#define XTE 1 -#define XTC 2 -#define XTJ 3 - -/* Indices for Plan7 main model state transitions. - * Used for indexing hmm->t[k][] - * mnemonic: Transition from Match to Match = TMM - */ -#define TMM 0 -#define TMI 1 -#define TMD 2 -#define TIM 3 -#define TII 4 -#define TDM 5 -#define TDD 6 - -/* Indices for extra state transitions - * Used for indexing hmm->xt[][]. - */ -#define MOVE 0 /* trNB, trEC, trCT, trJB */ -#define LOOP 1 /* trNN, trEJ, trCC, trJJ */ - -/* Declaration of Plan7 dynamic programming matrix structure. - */ -struct dpmatrix_s { - int **xmx; /* special scores [0.1..N][BECJN] */ - int **mmx; /* match scores [0.1..N][0.1..M] */ - int **imx; /* insert scores [0.1..N][0.1..M-1.M] */ - int **dmx; /* delete scores [0.1..N][0.1..M-1.M] */ -}; - -/* Declaration of Plan7 shadow matrix structure. - * In general, allowed values are STM, STI, etc. - * However, E state has M possible sources, from 1..M match states; - * hence the esrc array. - */ -struct dpshadow_s { - char **xtb; /* special state traces [0.1..N][BECJN] */ - char **mtb; /* match state traces [0.1..N][0.1..M] */ - char **itb; /* insert state traces [0.1..N][0.1..M-1.M] */ - char **dtb; /* delete state traces [0.1..N][0.1..M-1.M] */ - int *esrc; /* E trace is special; must store a M state number 1..M */ -}; - -/* Structure: HMMFILE - * - * Purpose: An open HMM file or HMM library. See hmmio.c. - */ -struct hmmfile_s { - FILE *f; /* pointer to file opened for reading */ - SSIFILE *ssi; /* pointer to open SSI index, or NULL */ - int (*parser)(struct hmmfile_s *, struct plan7_s **); /* parsing function */ - int is_binary; /* TRUE if format is a binary one */ - int byteswap; /* TRUE if binary and byteswapped */ - - /* Ewan (GeneWise) needs the input API to know the offset of each - * HMM on the disk, as it's being read. This might be enough - * support for him. hmmindex also uses this. Ewan, see - * HMMFilePositionByIndex() for an example of how to use this - * opaque offset type in the SSI API - the call you need - * is SSISetFilePosition(). - */ - int is_seekable; /* TRUE if we use offsets in this HMM file */ - int mode; /* type of offset */ - SSIOFFSET offset; /* Disk offset for beginning of the current HMM */ -}; -typedef struct hmmfile_s HMMFILE; - - -/* Plan 7 model state types - * used in traceback structure - */ -#define STBOGUS 0 -#define STM 1 -#define STD 2 -#define STI 3 -#define STS 4 -#define STN 5 -#define STB 6 -#define STE 7 -#define STC 8 -#define STT 9 -#define STJ 10 - -/* Structure: p7trace_s - * - * Traceback structure for alignments of model to sequence. - * Each array in a trace_s is 0..tlen-1. - * Element 0 is always to STATE_S. Element tlen-1 is always to STATE_T. - */ -struct p7trace_s { - int tlen; /* length of traceback */ - char *statetype; /* state type used for alignment */ - int *nodeidx; /* index of aligned node, 1..M (if M,D,I), or 0 */ - int *pos; /* position in dsq, 1..L, or 0 if none */ -}; - -/* Structure: p7prior_s - * - * Dirichlet priors on HMM parameters. - */ -struct p7prior_s { - int strategy; /* PRI_DCHLET, etc. */ - - int tnum; /* number of transition Dirichlet mixtures */ - float tq[MAXDCHLET]; /* probabilities of tnum components */ - float t[MAXDCHLET][7]; /* transition terms per mix component */ - - int mnum; /* number of mat emission Dirichlet mixtures */ - float mq[MAXDCHLET]; /* probabilities of mnum components */ - float m[MAXDCHLET][MAXABET]; /* match emission terms per mix component */ - - int inum; /* number of insert emission Dirichlet mixes */ - float iq[MAXDCHLET]; /* probabilities of inum components */ - float i[MAXDCHLET][MAXABET]; /* insert emission terms */ -}; -#define PRI_DCHLET 0 /* simple or mixture Dirichlets */ -#define PRI_PAM 1 /* PAM prior hack */ - - -/********************************************************************** - * Other structures, not having to do with HMMs. - **********************************************************************/ - -/* Structure: histogram_s - * - * Keep a score histogram. - * - * The main implementation issue here is that the range of - * scores is unknown, and will go negative. histogram is - * a 0..max-min array that represents the range min..max. - * A given score is indexed in histogram array as score-min. - * The AddToHistogram() function deals with dynamically - * resizing the histogram array when necessary. - */ -struct histogram_s { - int *histogram; /* counts of hits */ - int min; /* elem 0 of histogram == min */ - int max; /* last elem of histogram == max */ - int highscore; /* highest active elem has this score */ - int lowscore; /* lowest active elem has this score */ - int lumpsize; /* when resizing, overalloc by this */ - int total; /* total # of hits counted */ - - float *expect; /* expected counts of hits */ - int fit_type; /* flag indicating distribution type */ - float param[3]; /* parameters used for fits */ - float chisq; /* chi-squared val for goodness of fit*/ - float chip; /* P value for chisquared */ -}; -#define HISTFIT_NONE 0 /* no fit done yet */ -#define HISTFIT_EVD 1 /* fit type = extreme value dist */ -#define HISTFIT_GAUSSIAN 2 /* fit type = Gaussian */ -#define EVD_MU 0 /* EVD fit parameter mu */ -#define EVD_LAMBDA 1 /* EVD fit parameter lambda */ -#define EVD_WONKA 2 /* EVD fit fudge factor */ -#define GAUSS_MEAN 0 /* Gaussian parameter mean */ -#define GAUSS_SD 1 /* Gaussian parameter std. dev. */ - -/* Structure: fancyali_s - * - * Alignment of a hit to an HMM, for printing. - */ -struct fancyali_s { - char *rfline; /* reference coord info */ - char *csline; /* consensus structure info */ - char *model; /* aligned query consensus sequence */ - char *mline; /* "identities", conservation +'s, etc. */ - char *aseq; /* aligned target sequence */ - int len; /* length of strings */ - char *query; /* name of query HMM */ - char *target; /* name of target sequence */ - int sqfrom; /* start position on sequence (1..L) */ - int sqto; /* end position on sequence (1..L) */ -}; - -/* Structure: hit_s - * - * Info about a high-scoring database hit. - * We keep this info in memory, so we can output a - * sorted list of high hits at the end. - * - * sqfrom and sqto are the coordinates that will be shown - * in the results, not coords in arrays... therefore, reverse - * complements have sqfrom > sqto - */ -struct hit_s { - double sortkey; /* number to sort by; big is better */ - float score; /* score of the hit */ - double pvalue; /* P-value of the hit */ - float mothersc; /* score of whole sequence */ - double motherp; /* P-value of whole sequence */ - char *name; /* name of the target */ - char *acc; /* accession of the target */ - char *desc; /* description of the target */ - int sqfrom; /* start position in seq (1..N) */ - int sqto; /* end position in seq (1..N) */ - int sqlen; /* length of sequence (N) */ - int hmmfrom; /* start position in HMM (1..M) */ - int hmmto; /* end position in HMM (1..M) */ - int hmmlen; /* length of HMM (M) */ - int domidx; /* index of this domain */ - int ndom; /* total # of domains in this seq */ - struct fancyali_s *ali; /* ptr to optional alignment info */ -}; - - -/* Structure: tophit_s - * - * Array of high scoring hits, suitable for efficient sorting - * when we prepare to output results. "hit" list is NULL and - * unavailable until after we do a sort. - */ -struct tophit_s { - struct hit_s **hit; /* array of ptrs to top scoring hits */ - struct hit_s *unsrt; /* unsorted array */ - int alloc; /* current allocation size */ - int num; /* number of hits in list now */ - int lump; /* allocation lumpsize */ -}; - -/* struct threshold_s - * Contains score/evalue threshold settings. - * - * made first for hmmpfam: - * Since we're going to loop over all HMMs in a Pfam (or pfam-like) - * database in main_loop_{serial,pvm}, and we're going to - * allow autocutoffs using Pfam GA, NC, TC lines, we will need - * to reset those cutoffs with each HMM in turn. Therefore the - * main loops need to know whether they're supposed to be - * doing autocutoff. This amount of info was unwieldy enough - * to pass through the argument list that I put it - * in a structure. - */ -struct threshold_s { - float globT; /* T parameter: keep only hits > globT bits */ - double globE; /* E parameter: keep hits < globE E-value */ - float domT; /* T parameter for individual domains */ - double domE; /* E parameter for individual domains */ - /* autosetting of cutoffs using Pfam annot: */ - enum { CUT_NONE, CUT_GA, CUT_NC, CUT_TC } autocut; - int Z; /* nseq to base E value calculation on */ -}; - -/********************************************************** - * PVM parallelization - **********************************************************/ -#ifdef HMMER_PVM - -/* Message tags - */ -#define HMMPVM_INIT 0 /* an initialization packet to all slaves */ -#define HMMPVM_WORK 1 /* a work packet sent to a slave */ -#define HMMPVM_RESULTS 2 /* a results packet sent back to master */ -#define HMMPVM_TASK_TROUBLE 3 /* a notification of bad things in a slave task */ -#define HMMPVM_HOST_TROUBLE 4 /* a notification of bad things in a PVM host */ - -/* error codes - */ -#define HMMPVM_OK 0 -#define HMMPVM_NO_HMMFILE 1 -#define HMMPVM_NO_INDEX 2 -#define HMMPVM_BAD_INIT 3 /* failed to initialize a slave somehow */ - -#endif - - -/********************************************************** - * Plan 9: obsolete HMMER1.x code. We still need these structures - * for reading old HMM files (e.g. backwards compatibility) - **********************************************************/ - -/* We define a "basic" state, which covers the basic match, insert, and - * delete states from the Haussler paper. Numbers are stored as - * pre-calculated negative logs. - */ -struct basic_state { - float t[3]; /* state transitions to +1 M, +0 I, +1 D */ - float p[MAXABET]; /* symbol emission probabilities */ -}; - -/* A complete hidden Markov model - */ -struct plan9_s { - int M; /* length of the model */ - struct basic_state *ins; /* insert states 0..M+1 */ - struct basic_state *mat; /* match 0..M+1; 0 = BEGIN, M+1 = END */ - struct basic_state *del; /* delete 0..M+1 */ - - float null[MAXABET]; /* the *suggested* null model */ - - /* Optional annotation on the HMM, taken from alignment - */ - char *name; /* a name for the HMM */ - char *ref; /* reference coords and annotation */ - char *cs; /* consensus structure annotation */ - float *xray; /* Structural annotation: xray[0..M+1][NINPUTS], indexed manually */ - - int flags; /* flags for what optional info is in HMM */ -}; - -/* Flags for optional info in an HMM structure - */ -#define HMM_REF (1<<0) -#define HMM_CS (1<<1) -#define HMM_XRAY (1<<2) - -#define MATCH 0 -#define INSERT 1 -#define DELETE 2 -#define BEGIN MATCH -#define END MATCH - -#endif /* STRUCTSH_INCLUDED */ diff --git a/forester/archive/RIO/others/hmmer/src/threads.c b/forester/archive/RIO/others/hmmer/src/threads.c deleted file mode 100644 index d2eb450..0000000 --- a/forester/archive/RIO/others/hmmer/src/threads.c +++ /dev/null @@ -1,90 +0,0 @@ -/************************************************************ - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - ************************************************************/ - -/* threads.c - * SRE, Fri Jul 10 10:05:44 1998 - * - * Pthreads code shared by hmmsearch, hmmcalibrate, and hmmpfam - * to coarse-grain parallelize on platforms capable of POSIX - * threads. Most of the threads code, however, is in the respective - * main's, i.e. hmmsearch.c, hmmpfam.c, hmmcalibrate.c - * - * RCS $Id: threads.c,v 1.1.1.1 2005/03/22 08:34:02 cmzmasek Exp $ - */ - -#ifdef HMMER_THREADS /* conditional inclusion of the entire file */ - -#include -#include -#include -#include - -#include "structs.h" -#include "funcs.h" -#include "squid.h" -#include "sqfuncs.h" - - -/* Function: ThreadNumber() - * Date: SRE, Sat Jul 11 11:03:50 1998 [St. Louis] - * - * Purpose: Recommend how many threads to use. - * - * - if we can determine the number of processors - * on the machine by SQD_NPROC, use that. This - * should succeed for SGI IRIX, Digital UNIX, and - * Sun Solaris platforms. - * - if not, assume two processors. We're probably - * on a FreeBSD or Linux box, and odds are that its - * a dualprocessor. - * - if HMMER_NCPU is defined in config.h, use that - * number instead; allows Linux or FreeBSD machines - * to compile code for a quadprocessor, for instance. - * That define can be overridden at compile - * time by a -DHMMER_NCPU=x, where x is the - * number of threads.. - * - if HMMER_NCPU is defined in the environment, - * use that number, overriding all others. - * - * Typically, we'll set the default number of - * threads with ThreadNumber() but allow it - * to be overridden at the command line with --cpu. - * - * Summarizing priority: - * --ncpu option - * environment variable, setenv HMMER_NCPU x - * compile-time, MDEFS=HMMER_NCPU=x - * compile-time, config.h definition of HMMER_NCPU - * SQD_NPROC, or 2 if SQD_NPROC doesn't work. - * - * Args: void - * - * Returns: >= 1, recommended number of threads - */ -int -ThreadNumber(void) -{ - int num; - char *env; - - num = SQD_NPROC; /* SGI, Sun, Digital: get # of available CPUs */ - if (num == -1) num = 2; /* Linux, FreeBSD: assume dualprocessor */ -#ifdef HMMER_NCPU - num = HMMER_NCPU; /* allow config.h to override; usually we don't */ -#endif - /* allow environment variable to override */ - if ((env = getenv("HMMER_NCPU")) != NULL) - num = atoi(env); - if (num <= 0) num = 1; /* silent sanity check */ - SQD_DPRINTF1(("ThreadNumber(): setting number of threads to %d\n", num)); - return num; -} - -#endif /*HMMER_THREADS*/ diff --git a/forester/archive/RIO/others/hmmer/src/tophits.c b/forester/archive/RIO/others/hmmer/src/tophits.c deleted file mode 100644 index bbd1b05..0000000 --- a/forester/archive/RIO/others/hmmer/src/tophits.c +++ /dev/null @@ -1,376 +0,0 @@ -/************************************************************ - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - ************************************************************/ - -/* tophits.c - * - * Routines for storing, sorting, displaying high scoring hits - * and alignments. - * - ***************************************************************************** - * - * main API: - * - * AllocTophits() - allocation - * FreeTophits() - free'ing - * RegisterHit() - put information about a hit in the list - * GetRankedHit() - recovers information about a hit - * FullSortTophits() - sorts the top H hits. - * - ***************************************************************************** - * Brief example of use: - * - * struct tophit_s *yourhits; // list of hits - * struct fancyali_s *ali; // (optional structure) alignment of a hit - * - * yourhits = AllocTophits(200); - * (for every hit in a search) { - * if (do_alignments) - * ali = Trace2FancyAli(); // You provide a function/structure here - * if (score > threshold) - * RegisterHit(yourhits, ...) - * } - * - * FullSortTophits(yourhits); // Sort hits by evalue - * for (i = 0; i < 100; i++) // Recover hits out in ranked order - * { - * GetRankedHit(yourhits, i, ...); - * // Presumably you'd print here... - * } - * FreeTophits(yourhits); - *************************************************************************** - * - * Estimated storage per hit: - * coords: 16 bytes - * scores: 8 bytes - * name/acc/desc: 192 bytes - * alignment: 1000 bytes total = ~1200 bytes with alignment; - * = ~200 bytes without - * Designed for: 10^5 hits (20 MB) or 10^4 alignments (10 MB) - */ - -#include -#include -#include -#include "structs.h" -#include "funcs.h" - -/* Function: AllocTophits() - * - * Purpose: Allocate a struct tophit_s, for maintaining - * a list of top-scoring hits in a database search. - * - * Args: lumpsize - allocation lumpsize - * - * Return: An allocated struct hit_s. Caller must free. - */ -struct tophit_s * -AllocTophits(int lumpsize) -{ - struct tophit_s *hitlist; - - hitlist = MallocOrDie (sizeof(struct tophit_s)); - hitlist->hit = NULL; - hitlist->unsrt = MallocOrDie (lumpsize * sizeof(struct hit_s)); - hitlist->alloc = lumpsize; - hitlist->num = 0; - hitlist->lump = lumpsize; - return hitlist; -} -void -GrowTophits(struct tophit_s *h) -{ - h->unsrt = ReallocOrDie(h->unsrt,(h->alloc + h->lump) * sizeof(struct hit_s)); - h->alloc += h->lump; -} -void -FreeTophits(struct tophit_s *h) -{ - int pos; - for (pos = 0; pos < h->num; pos++) - { - if (h->unsrt[pos].ali != NULL) FreeFancyAli(h->unsrt[pos].ali); - if (h->unsrt[pos].name != NULL) free(h->unsrt[pos].name); - if (h->unsrt[pos].acc != NULL) free(h->unsrt[pos].acc); - if (h->unsrt[pos].desc != NULL) free(h->unsrt[pos].desc); - } - free(h->unsrt); - if (h->hit != NULL) free(h->hit); - free(h); -} - -struct fancyali_s * -AllocFancyAli(void) -{ - struct fancyali_s *ali; - - ali = MallocOrDie (sizeof(struct fancyali_s)); - ali->rfline = ali->csline = ali->model = ali->mline = ali->aseq = NULL; - ali->query = ali->target = NULL; - ali->sqfrom = ali->sqto = 0; - return ali; -} -void -FreeFancyAli(struct fancyali_s *ali) -{ - if (ali != NULL) { - if (ali->rfline != NULL) free(ali->rfline); - if (ali->csline != NULL) free(ali->csline); - if (ali->model != NULL) free(ali->model); - if (ali->mline != NULL) free(ali->mline); - if (ali->aseq != NULL) free(ali->aseq); - if (ali->query != NULL) free(ali->query); - if (ali->target != NULL) free(ali->target); - free(ali); - } -} - -/* Function: RegisterHit() - * - * Purpose: Add a new hit to a list of top hits. - * - * "ali", if provided, is a pointer to allocated memory - * for an alignment output structure. - * Management is turned over to the top hits structure. - * Caller should not free them; they will be free'd by - * the FreeTophits() call. - * - * In contrast, "name", "acc", and "desc" are copied, so caller - * is still responsible for these. - * - * Number of args is unwieldy. - * - * Args: h - active top hit list - * key - value to sort by: bigger is better - * pvalue - P-value of this hit - * score - score of this hit - * motherp - P-value of parent whole sequence - * mothersc - score of parent whole sequence - * name - name of target - * acc - accession of target (may be NULL) - * desc - description of target (may be NULL) - * sqfrom - 1..L pos in target seq of start - * sqto - 1..L pos; sqfrom > sqto if rev comp - * sqlen - length of sequence, L - * hmmfrom - 0..M+1 pos in HMM of start - * hmmto - 0..M+1 pos in HMM of end - * hmmlen - length of HMM, M - * domidx - number of this domain - * ndom - total # of domains in sequence - * ali - optional printable alignment info - * - * Return: (void) - * hitlist is modified and possibly reallocated internally. - */ -void -RegisterHit(struct tophit_s *h, double key, - double pvalue, float score, double motherp, float mothersc, - char *name, char *acc, char *desc, - int sqfrom, int sqto, int sqlen, - int hmmfrom, int hmmto, int hmmlen, - int domidx, int ndom, - struct fancyali_s *ali) -{ - /* Check to see if list is full and we must realloc. - */ - if (h->num == h->alloc) GrowTophits(h); - - h->unsrt[h->num].name = Strdup(name); - h->unsrt[h->num].acc = Strdup(acc); - h->unsrt[h->num].desc = Strdup(desc); - h->unsrt[h->num].sortkey = key; - h->unsrt[h->num].pvalue = pvalue; - h->unsrt[h->num].score = score; - h->unsrt[h->num].motherp = motherp; - h->unsrt[h->num].mothersc= mothersc; - h->unsrt[h->num].sqfrom = sqfrom; - h->unsrt[h->num].sqto = sqto; - h->unsrt[h->num].sqlen = sqlen; - h->unsrt[h->num].hmmfrom = hmmfrom; - h->unsrt[h->num].hmmto = hmmto; - h->unsrt[h->num].hmmlen = hmmlen; - h->unsrt[h->num].domidx = domidx; - h->unsrt[h->num].ndom = ndom; - h->unsrt[h->num].ali = ali; - h->num++; - return; -} - -/* Function: GetRankedHit() - * Date: SRE, Tue Oct 28 10:06:48 1997 [Newton Institute, Cambridge UK] - * - * Purpose: Recover the data from the i'th ranked hit. - * Any of the data ptrs may be passed as NULL for fields - * you don't want. hitlist must have been sorted first. - * - * name, acc, desc, and ali are returned as pointers, not copies; - * don't free them! - */ -void -GetRankedHit(struct tophit_s *h, int rank, - double *r_pvalue, float *r_score, - double *r_motherp, float *r_mothersc, - char **r_name, char **r_acc, char **r_desc, - int *r_sqfrom, int *r_sqto, int *r_sqlen, - int *r_hmmfrom, int *r_hmmto, int *r_hmmlen, - int *r_domidx, int *r_ndom, - struct fancyali_s **r_ali) -{ - if (r_pvalue != NULL) *r_pvalue = h->hit[rank]->pvalue; - if (r_score != NULL) *r_score = h->hit[rank]->score; - if (r_motherp != NULL) *r_motherp = h->hit[rank]->motherp; - if (r_mothersc!= NULL) *r_mothersc= h->hit[rank]->mothersc; - if (r_name != NULL) *r_name = h->hit[rank]->name; - if (r_acc != NULL) *r_acc = h->hit[rank]->acc; - if (r_desc != NULL) *r_desc = h->hit[rank]->desc; - if (r_sqfrom != NULL) *r_sqfrom = h->hit[rank]->sqfrom; - if (r_sqto != NULL) *r_sqto = h->hit[rank]->sqto; - if (r_sqlen != NULL) *r_sqlen = h->hit[rank]->sqlen; - if (r_hmmfrom != NULL) *r_hmmfrom = h->hit[rank]->hmmfrom; - if (r_hmmto != NULL) *r_hmmto = h->hit[rank]->hmmto; - if (r_hmmlen != NULL) *r_hmmlen = h->hit[rank]->hmmlen; - if (r_domidx != NULL) *r_domidx = h->hit[rank]->domidx; - if (r_ndom != NULL) *r_ndom = h->hit[rank]->ndom; - if (r_ali != NULL) *r_ali = h->hit[rank]->ali; -} - -/* Function: TophitsMaxName() - * - * Purpose: Returns the maximum name length in a top hits list; - * doesn't need to be sorted yet. - */ -int -TophitsMaxName(struct tophit_s *h) -{ - int i; - int len, maxlen; - - maxlen = 0; - for (i = 0; i < h->num; i++) - { - len = strlen(h->unsrt[i].name); - if (len > maxlen) maxlen = len; - } - return maxlen; -} - -/* Function: FullSortTophits() - * - * Purpose: Completely sort the top hits list. Calls - * qsort() to do the sorting, and uses - * hit_comparison() to do the comparison. - * - * Args: h - top hits structure - */ -int -hit_comparison(const void *vh1, const void *vh2) -{ - /* don't ask. don't change. and, Don't Panic. */ - struct hit_s *h1 = *((struct hit_s **) vh1); - struct hit_s *h2 = *((struct hit_s **) vh2); - - if (h1->sortkey < h2->sortkey) return 1; - else if (h1->sortkey > h2->sortkey) return -1; - else if (h1->sortkey == h2->sortkey) return 0; - /*NOTREACHED*/ - return 0; -} -void -FullSortTophits(struct tophit_s *h) -{ - int i; - - /* If we don't have /any/ hits, then don't - * bother. - */ - if (h->num == 0) return; - - /* Assign the ptrs in h->hit. - */ - h->hit = MallocOrDie(h->num * sizeof(struct hit_s *)); - for (i = 0; i < h->num; i++) - h->hit[i] = &(h->unsrt[i]); - - /* Sort the pointers. Don't bother if we've only got one. - */ - if (h->num > 1) - qsort(h->hit, h->num, sizeof(struct hit_s *), hit_comparison); -} - - - -/* Function: TophitsReport() - * Date: Thu Dec 18 13:19:18 1997 - * - * Purpose: Generate a printout summarizing how much - * memory is used by a tophits structure, - * how many hits are stored, and how much - * waste there is from not knowing nseqs. - * - * Args: h - the sorted tophits list - * E - the cutoff in Evalue - * nseq - the final number of seqs used for Eval - * - * Return: (void) - * Prints information on stdout - */ -void -TophitsReport(struct tophit_s *h, double E, int nseq) -{ - int i; - int memused; - int x; - int n; - - /* Count up how much memory is used - * in the whole list. - */ - memused = sizeof(struct hit_s) * h->alloc + sizeof(struct tophit_s); - for (i = 0; i < h->num; i++) - { - if (h->unsrt[i].name != NULL) - memused += strlen(h->unsrt[i].name) + 1; - if (h->unsrt[i].acc != NULL) - memused += strlen(h->unsrt[i].acc) + 1; - if (h->unsrt[i].desc != NULL) - memused += strlen(h->unsrt[i].desc) + 1; - if (h->unsrt[i].ali != NULL) - { - memused += sizeof(struct fancyali_s); - x = 0; - if (h->unsrt[i].ali->rfline != NULL) x++; - if (h->unsrt[i].ali->csline != NULL) x++; - if (h->unsrt[i].ali->model != NULL) x++; - if (h->unsrt[i].ali->mline != NULL) x++; - if (h->unsrt[i].ali->aseq != NULL) x++; - memused += x * (h->unsrt[i].ali->len + 1); - - if (h->unsrt[i].ali->query != NULL) - memused += strlen(h->unsrt[i].ali->query) + 1; - if (h->unsrt[i].ali->target != NULL) - memused += strlen(h->unsrt[i].ali->target) + 1; - } - } - - /* Count how many hits actually satisfy the E cutoff. - */ - n = 0; - for (i = 0; i < h->num; i++) - { - if (h->hit[i]->pvalue * (double) nseq >= E) break; - n++; - } - - /* Format and print a summary - */ - printf("tophits_s report:\n"); - printf(" Total hits: %d\n", h->num); - printf(" Satisfying E cutoff: %d\n", n); - printf(" Total memory: %dK\n", memused / 1000); -} diff --git a/forester/archive/RIO/others/hmmer/src/trace.c b/forester/archive/RIO/others/hmmer/src/trace.c deleted file mode 100644 index 424d3ba..0000000 --- a/forester/archive/RIO/others/hmmer/src/trace.c +++ /dev/null @@ -1,1203 +0,0 @@ -/************************************************************ - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - ************************************************************/ - -/* trace.c - * SRE, Sat Nov 16 12:34:57 1996 - * RCS $Id: trace.c,v 1.1.1.1 2005/03/22 08:34:07 cmzmasek Exp $ - * - * Support for Plan 7 traceback data structure, p7trace_s. - */ - -#include -#include -#include - -#include "structs.h" -#include "config.h" -#include "squid.h" -#include "funcs.h" -#include "version.h" - -#ifdef MEMDEBUG -#include "dbmalloc.h" -#endif - -static void rightjustify(char *s, int n); - -/* Function: P7AllocTrace(), P7ReallocTrace(), P7FreeTrace() - * - * Purpose: allocation and freeing of traceback structures - */ -void -P7AllocTrace(int tlen, struct p7trace_s **ret_tr) -{ - struct p7trace_s *tr; - - tr = MallocOrDie (sizeof(struct p7trace_s)); - tr->statetype = MallocOrDie (sizeof(char) * tlen); - tr->nodeidx = MallocOrDie (sizeof(int) * tlen); - tr->pos = MallocOrDie (sizeof(int) * tlen); - *ret_tr = tr; -} -void -P7ReallocTrace(struct p7trace_s *tr, int tlen) -{ - tr->statetype = ReallocOrDie (tr->statetype, tlen * sizeof(char)); - tr->nodeidx = ReallocOrDie (tr->nodeidx, tlen * sizeof(int)); - tr->pos = ReallocOrDie (tr->pos, tlen * sizeof(int)); -} -void -P7FreeTrace(struct p7trace_s *tr) -{ - free(tr->pos); - free(tr->nodeidx); - free(tr->statetype); - free(tr); -} - -/* Function: TraceSet() - * Date: SRE, Sun Mar 8 12:39:00 1998 [St. Louis] - * - * Purpose: Convenience function; set values at position tpos - * in a trace. - * - * - * Args: tr - trace object to write to - * tpos - ptr to position in trace to set - * type - statetype e.g. STS, etc. - * idx - nodeidx 1..M or 0 - * pos - seq position 1..L or 0 - * - * Returns: void - */ -void -TraceSet(struct p7trace_s *tr, int tpos, char type, int idx, int pos) -{ - tr->statetype[tpos] = type; - tr->nodeidx[tpos] = idx; - tr->pos[tpos] = pos; -} - - -/* Function: MergeTraceArrays() - * Date: SRE, Sun Jul 5 15:09:10 1998 [St. Louis] - * - * Purpose: Combine two arrays of traces into a single array. - * Used in hmmalign to merge traces from a fixed alignment - * with traces from individual unaligned seqs. - * - * t1 traces always precede t2 traces in the resulting array. - * - * Args: t1 - first set of traces - * n1 - number of traces in t1 - * t2 - second set of traces - * n2 - number of traces in t2 - * - * Returns: pointer to new array of traces. - * Both t1 and t2 are free'd here! Do not reuse. - */ -struct p7trace_s ** -MergeTraceArrays(struct p7trace_s **t1, int n1, struct p7trace_s **t2, int n2) -{ - struct p7trace_s **tr; - int i; /* index in traces */ - - tr = MallocOrDie(sizeof(struct p7trace_s *) * (n1+n2)); - for (i = 0; i < n1; i++) tr[i] = t1[i]; - for (i = 0; i < n2; i++) tr[n1+i] = t2[i]; - free(t1); - free(t2); - return tr; -} - - - -/* Function: P7ReverseTrace() - * Date: SRE, Mon Aug 25 12:57:29 1997; Denver CO. - * - * Purpose: Reverse the arrays in a traceback structure. - * Tracebacks from Forward() and Viterbi() are - * collected backwards, and call this function - * when they're done. - * - * It's possible to reverse the arrays in place - * more efficiently; but the realloc/copy strategy - * has the advantage of reallocating the trace - * into the right size of memory. (Tracebacks - * overallocate.) - * - * Args: tr - the traceback to reverse. tr->tlen must be set. - * - * Return: (void) - * tr is modified. - */ -void -P7ReverseTrace(struct p7trace_s *tr) -{ - char *statetype; - int *nodeidx; - int *pos; - int opos, npos; - - /* Allocate - */ - statetype = MallocOrDie (sizeof(char)* tr->tlen); - nodeidx = MallocOrDie (sizeof(int) * tr->tlen); - pos = MallocOrDie (sizeof(int) * tr->tlen); - - /* Reverse the trace. - */ - for (opos = tr->tlen-1, npos = 0; npos < tr->tlen; npos++, opos--) - { - statetype[npos] = tr->statetype[opos]; - nodeidx[npos] = tr->nodeidx[opos]; - pos[npos] = tr->pos[opos]; - } - - /* Swap old, new arrays. - */ - free(tr->statetype); - free(tr->nodeidx); - free(tr->pos); - tr->statetype = statetype; - tr->nodeidx = nodeidx; - tr->pos = pos; -} - - - -/* Function: P7TraceCount() - * - * Purpose: Count a traceback into a count-based HMM structure. - * (Usually as part of a model parameter re-estimation.) - * - * Args: hmm - counts-based HMM - * dsq - digitized sequence that traceback aligns to the HMM (1..L) - * wt - weight on the sequence - * tr - alignment of seq to HMM - * - * Return: (void) - */ -void -P7TraceCount(struct plan7_s *hmm, char *dsq, float wt, struct p7trace_s *tr) -{ - int tpos; /* position in tr */ - int i; /* symbol position in seq */ - - for (tpos = 0; tpos < tr->tlen; tpos++) - { - i = tr->pos[tpos]; - - /* Emission counts. - * Don't bother counting null states N,J,C. - */ - if (tr->statetype[tpos] == STM) - P7CountSymbol(hmm->mat[tr->nodeidx[tpos]], dsq[i], wt); - else if (tr->statetype[tpos] == STI) - P7CountSymbol(hmm->ins[tr->nodeidx[tpos]], dsq[i], wt); - - /* State transition counts - */ - switch (tr->statetype[tpos]) { - case STS: - break; /* don't bother; P=1 */ - case STN: - switch (tr->statetype[tpos+1]) { - case STB: hmm->xt[XTN][MOVE] += wt; break; - case STN: hmm->xt[XTN][LOOP] += wt; break; - default: - Die("illegal state transition %s->%s in traceback", - Statetype(tr->statetype[tpos]), - Statetype(tr->statetype[tpos+1])); - } - break; - case STB: - switch (tr->statetype[tpos+1]) { - case STM: hmm->begin[tr->nodeidx[tpos+1]] += wt; break; - case STD: hmm->tbd1 += wt; break; - default: - Die("illegal state transition %s->%s in traceback", - Statetype(tr->statetype[tpos]), - Statetype(tr->statetype[tpos+1])); - } - break; - case STM: - switch (tr->statetype[tpos+1]) { - case STM: hmm->t[tr->nodeidx[tpos]][TMM] += wt; break; - case STI: hmm->t[tr->nodeidx[tpos]][TMI] += wt; break; - case STD: hmm->t[tr->nodeidx[tpos]][TMD] += wt; break; - case STE: hmm->end[tr->nodeidx[tpos]] += wt; break; - default: - Die("illegal state transition %s->%s in traceback", - Statetype(tr->statetype[tpos]), - Statetype(tr->statetype[tpos+1])); - } - break; - case STI: - switch (tr->statetype[tpos+1]) { - case STM: hmm->t[tr->nodeidx[tpos]][TIM] += wt; break; - case STI: hmm->t[tr->nodeidx[tpos]][TII] += wt; break; - default: - Die("illegal state transition %s->%s in traceback", - Statetype(tr->statetype[tpos]), - Statetype(tr->statetype[tpos+1])); - } - break; - case STD: - switch (tr->statetype[tpos+1]) { - case STM: hmm->t[tr->nodeidx[tpos]][TDM] += wt; break; - case STD: hmm->t[tr->nodeidx[tpos]][TDD] += wt; break; - case STE: /* ignore; p(D->E) = 1.0 */ break; - default: - Die("illegal state transition %s->%s in traceback", - Statetype(tr->statetype[tpos]), - Statetype(tr->statetype[tpos+1])); - } - break; - case STE: - switch (tr->statetype[tpos+1]) { - case STC: hmm->xt[XTE][MOVE] += wt; break; - case STJ: hmm->xt[XTE][LOOP] += wt; break; - default: - Die("illegal state transition %s->%s in traceback", - Statetype(tr->statetype[tpos]), - Statetype(tr->statetype[tpos+1])); - } - break; - case STJ: - switch (tr->statetype[tpos+1]) { - case STB: hmm->xt[XTJ][MOVE] += wt; break; - case STJ: hmm->xt[XTJ][LOOP] += wt; break; - default: - Die("illegal state transition %s->%s in traceback", - Statetype(tr->statetype[tpos]), - Statetype(tr->statetype[tpos+1])); - } - break; - case STC: - switch (tr->statetype[tpos+1]) { - case STT: hmm->xt[XTC][MOVE] += wt; break; - case STC: hmm->xt[XTC][LOOP] += wt; break; - default: - Die("illegal state transition %s->%s in traceback", - Statetype(tr->statetype[tpos]), - Statetype(tr->statetype[tpos+1])); - } - break; - case STT: - break; /* T is the last. It makes no transitions. */ - default: - Die("illegal state %s in traceback", - Statetype(tr->statetype[tpos])); - } - } -} - - -/* Function: P7TraceScore() - * - * Purpose: Score a traceback and return the score in scaled bits. - * - * Args: hmm - HMM with valid log odds scores. - * dsq - digitized sequence that traceback aligns to the HMM (1..L) - * tr - alignment of seq to HMM - * - * Return: (void) - */ -float -P7TraceScore(struct plan7_s *hmm, char *dsq, struct p7trace_s *tr) -{ - int score; /* total score as a scaled integer */ - int tpos; /* position in tr */ - int sym; /* digitized symbol in dsq */ - - /* P7PrintTrace(stdout, tr, hmm, dsq); */ - score = 0; - for (tpos = 0; tpos < tr->tlen-1; tpos++) - { - sym = (int) dsq[tr->pos[tpos]]; - - /* Emissions. - * Don't bother counting null states N,J,C. - */ - if (tr->statetype[tpos] == STM) - score += hmm->msc[sym][tr->nodeidx[tpos]]; - else if (tr->statetype[tpos] == STI) - score += hmm->isc[sym][tr->nodeidx[tpos]]; - - /* State transitions. - */ - score += TransitionScoreLookup(hmm, - tr->statetype[tpos], tr->nodeidx[tpos], - tr->statetype[tpos+1], tr->nodeidx[tpos+1]); - } - return Scorify(score); -} - - - -/* Function: P7Traces2Alignment() - * - * Purpose: Convert an array of traceback structures for a set - * of sequences into a new multiple alignment. - * - * Insertions are put into lower case and - * are not aligned; instead, Nterm is right-justified, - * Cterm is left-justified, and internal insertions - * are split in half and the halves are justified in - * each direction (the objective being to increase - * the chances of getting insertions aligned well enough - * for them to become a match). SAM gap char conventions - * are used: - in match columns, . in insert columns - * - * NOTE: Does not recognize J state. - * - * Args: dsq - digitized unaligned sequences - * sqinfo - array of info about the sequences - * wgt - weights on seqs - * nseq - number of sequences - * mlen - length of model (number of match states) - * tr - array of tracebacks - * matchonly - TRUE if we don't print insert-generated symbols at all - * Return: MSA structure; NULL on failure. - * Caller responsible for freeing msa with MSAFree(msa); - */ -MSA * -P7Traces2Alignment(char **dsq, SQINFO *sqinfo, float *wgt, int nseq, int mlen, - struct p7trace_s **tr, int matchonly) -{ - MSA *msa; /* RETURN: new alignment */ - int idx; /* counter for sequences */ - int alen; /* width of alignment */ - int *inserts; /* array of max gaps between aligned columns */ - int *matmap; /* matmap[k] = apos of match k [1..M] */ - int nins; /* counter for inserts */ - int apos; /* position in aligned sequence (0..alen-1)*/ - int rpos; /* position in raw digital sequence (1..L)*/ - int tpos; /* position counter in traceback */ - int statetype; /* type of current state, e.g. STM */ - int k; /* counter over states in model */ - - /* Here's the problem. We want to align the match states in columns, - * but some sequences have inserted symbols in them; we need some - * sort of overall knowledge of where the inserts are and how long - * they are in order to create the alignment. - * - * Here's our trick. inserts[] is a 0..hmm->M array; inserts[i] stores - * the maximum number of times insert substate i was used. This - * is the maximum number of gaps to insert between canonical - * column i and i+1. inserts[0] is the N-term tail; inserts[M] is - * the C-term tail. - * - * Remember that N and C emit on transition, hence the check for an - * N->N or C->C transition before bumping nins. - */ - inserts = (int *) MallocOrDie (sizeof(int) * (mlen+1)); - for (k = 0; k <= mlen; k++) - inserts[k] = 0; - for (idx = 0; idx < nseq; idx++) { - nins = 0; - for (tpos = 0; tpos < tr[idx]->tlen; tpos++) { - switch (tr[idx]->statetype[tpos]) { - case STI: nins++; break; - case STN: if (tr[idx]->statetype[tpos-1] == STN) nins++; break; - case STC: if (tr[idx]->statetype[tpos-1] == STC) nins++; break; - case STM: - case STD: /* M,D: record max. reset ctr. */ - if (nins > inserts[tr[idx]->nodeidx[tpos]-1]) - inserts[tr[idx]->nodeidx[tpos]-1] = nins; - nins = 0; - break; - case STB: /* B; record N-tail max, reset ctr */ - if (nins > inserts[0]) - inserts[0] = nins; - nins = 0; - break; - case STT: /* T: record C-tail max */ - if (nins > inserts[mlen]) - inserts[mlen] = nins; - break; - case STS: case STE: break; /* ignore other states */ - case STJ: - Die("yo! you don't support J in Traces2Alignment(), remember?"); - default: - Die("Traces2Alignment reports unrecognized statetype %c", - Statetype(tr[idx]->statetype[tpos])); - } - } - } - - /* Insert compression option. */ - if (matchonly) - for (k = 0; k <= mlen; k++) - if (inserts[k] > 1) - inserts[k] = 1; - - /*********************************************** - * Construct the alignment - ***********************************************/ - /* calculate alignment length and matmap */ - matmap= (int *) MallocOrDie (sizeof(int) * (mlen+1)); - matmap[0] = -1; - alen = inserts[0]; - for (k = 1; k <= mlen ; k++) { - matmap[k] = alen; - alen += inserts[k] + 1; - } - /* allocation for new alignment */ - msa = MSAAlloc(nseq, alen); - - for (idx = 0; idx < nseq; idx++) { - /* blank an aseq */ - for (apos = 0; apos < alen; apos++) - msa->aseq[idx][apos] = '.'; - for (k = 1; k <= mlen; k++) - msa->aseq[idx][matmap[k]] = '-'; - msa->aseq[idx][alen] = '\0'; - /* align the sequence */ - apos = 0; - for (tpos = 0; tpos < tr[idx]->tlen; tpos++) { - statetype = tr[idx]->statetype[tpos]; /* just for clarity */ - rpos = tr[idx]->pos[tpos]; - k = tr[idx]->nodeidx[tpos]; - - if (statetype == STM) { - apos = matmap[k]; - msa->aseq[idx][apos] = Alphabet[(int) dsq[idx][rpos]]; - apos++; - } - else if (statetype == STI) { - if (matchonly) - msa->aseq[idx][apos] = '*'; /* insert compression option */ - else { - msa->aseq[idx][apos] = (char) tolower((int) Alphabet[(int) dsq[idx][rpos]]); - apos++; - } - } - else if ((statetype == STN || statetype == STC) && rpos > 0) { - if (matchonly) - msa->aseq[idx][apos] = '*'; /* insert compression option */ - else { - msa->aseq[idx][apos] = (char) tolower((int) Alphabet[(int) dsq[idx][rpos]]); - apos++; - } - } - else if (statetype == STE) - apos = matmap[mlen]+1; /* set position for C-term tail */ - } - - /* N-terminal extension is right-justified. - * Internal inserts are split in half, and C-term is right-justified. - * C-terminal extension remains left-justified. - */ - if (! matchonly) { - rightjustify(msa->aseq[idx], inserts[0]); - - for (k = 1; k < mlen; k++) - if (inserts[k] > 1) { - for (nins = 0, apos = matmap[k]+1; islower((int) (msa->aseq[idx][apos])); apos++) - nins++; - nins /= 2; /* split the insertion in half */ - rightjustify(msa->aseq[idx]+matmap[k]+1+nins, inserts[k]-nins); - } - } - - } - - /*********************************************** - * Build the rest of the MSA annotation. - ***********************************************/ - - msa->nseq = nseq; - msa->alen = alen; - msa->au = MallocOrDie(sizeof(char) * (strlen(RELEASE)+7)); - sprintf(msa->au, "HMMER %s", RELEASE); - /* copy sqinfo array and weights */ - for (idx = 0; idx < nseq; idx++) - { - msa->sqname[idx] = sre_strdup(sqinfo[idx].name, -1); - if (sqinfo[idx].flags & SQINFO_ACC) - MSASetSeqAccession(msa, idx, sqinfo[idx].acc); - if (sqinfo[idx].flags & SQINFO_DESC) - MSASetSeqAccession(msa, idx, sqinfo[idx].desc); - - if (sqinfo[idx].flags & SQINFO_SS) { - if (msa->ss == NULL) msa->ss = MallocOrDie(sizeof(char *) * nseq); - MakeAlignedString(msa->aseq[idx], alen, - sqinfo[idx].ss, &(msa->ss[idx])); - } - if (sqinfo[idx].flags & SQINFO_SA) { - if (msa->sa == NULL) msa->sa = MallocOrDie(sizeof(char *) * nseq); - MakeAlignedString(msa->aseq[idx], alen, - sqinfo[idx].sa, &(msa->sa[idx])); - } - msa->wgt[idx] = wgt[idx]; - } - - /* #=RF annotation: x for match column, . for insert column - */ - msa->rf = (char *) MallocOrDie (sizeof(char) * (alen+1)); - for (apos = 0; apos < alen; apos++) - msa->rf[apos] = '.'; - for (k = 1; k <= mlen; k++) - msa->rf[matmap[k]] = 'x'; - msa->rf[alen] = '\0'; - - /* Currently, we produce no consensus structure. - * #=CS, generated from HMM structural annotation, would go here. - */ - - free(inserts); - free(matmap); - return msa; -} - -/* Function: TransitionScoreLookup() - * - * Purpose: Convenience function used in PrintTrace() and TraceScore(); - * given state types and node indices for a transition, - * return the integer score for that transition. - */ -int -TransitionScoreLookup(struct plan7_s *hmm, char st1, int k1, - char st2, int k2) -{ - switch (st1) { - case STS: return 0; /* S never pays */ - case STN: - switch (st2) { - case STB: return hmm->xsc[XTN][MOVE]; - case STN: return hmm->xsc[XTN][LOOP]; - default: Die("illegal %s->%s transition", Statetype(st1), Statetype(st2)); - } - break; - case STB: - switch (st2) { - case STM: return hmm->bsc[k2]; - case STD: return Prob2Score(hmm->tbd1, 1.); - default: Die("illegal %s->%s transition", Statetype(st1), Statetype(st2)); - } - break; - case STM: - switch (st2) { - case STM: return hmm->tsc[k1][TMM]; - case STI: return hmm->tsc[k1][TMI]; - case STD: return hmm->tsc[k1][TMD]; - case STE: return hmm->esc[k1]; - default: Die("illegal %s->%s transition", Statetype(st1), Statetype(st2)); - } - break; - case STI: - switch (st2) { - case STM: return hmm->tsc[k1][TIM]; - case STI: return hmm->tsc[k1][TII]; - default: Die("illegal %s->%s transition", Statetype(st1), Statetype(st2)); - } - break; - case STD: - switch (st2) { - case STM: return hmm->tsc[k1][TDM]; - case STD: return hmm->tsc[k1][TDD]; - case STE: return 0; /* D_m->E has probability 1.0 by definition in Plan7 */ - default: Die("illegal %s->%s transition", Statetype(st1), Statetype(st2)); - } - break; - case STE: - switch (st2) { - case STC: return hmm->xsc[XTE][MOVE]; - case STJ: return hmm->xsc[XTE][LOOP]; - default: Die("illegal %s->%s transition", Statetype(st1), Statetype(st2)); - } - break; - case STJ: - switch (st2) { - case STB: return hmm->xsc[XTJ][MOVE]; - case STJ: return hmm->xsc[XTJ][LOOP]; - default: Die("illegal %s->%s transition", Statetype(st1), Statetype(st2)); - } - break; - case STC: - switch (st2) { - case STT: return hmm->xsc[XTC][MOVE]; - case STC: return hmm->xsc[XTC][LOOP]; - default: Die("illegal %s->%s transition", Statetype(st1), Statetype(st2)); - } - break; - case STT: return 0; /* T makes no transitions */ - default: Die("illegal state %s in traceback", Statetype(st1)); - } - /*NOTREACHED*/ - return 0; -} - - -/* Function: CreateFancyAli() - * Date: SRE, Mon Oct 27 06:49:44 1997 [Sanger Centre UK] - * - * Purpose: Output of an HMM/sequence alignment, using a - * traceback structure. Deliberately similar to - * the output of BLAST, to make it easier for - * people to adapt their Perl parsers (or what have - * you) from BLAST to HMMER. - * - * Args: tr - traceback structure that gives the alignment - * hmm - the model - * dsq - the sequence (digitized form) - * name- name of the sequence - * - * Return: allocated, filled fancy alignment structure. - */ -struct fancyali_s * -CreateFancyAli(struct p7trace_s *tr, struct plan7_s *hmm, - char *dsq, char *name) -{ - struct fancyali_s *ali; /* alignment to create */ - int tpos; /* position in trace and alignment */ - int bestsym; /* index of best symbol at this pos */ - float mthresh; /* above this P(x), display uppercase */ - - /* Allocate and initialize the five lines of display - */ - ali = AllocFancyAli(); - ali->rfline = NULL; - ali->csline = NULL; - ali->model = MallocOrDie (sizeof(char) * (tr->tlen+1)); - ali->mline = MallocOrDie (sizeof(char) * (tr->tlen+1)); - ali->aseq = MallocOrDie (sizeof(char) * (tr->tlen+1)); - - memset(ali->model, ' ', tr->tlen); - memset(ali->mline, ' ', tr->tlen); - memset(ali->aseq, ' ', tr->tlen); - - if (hmm->flags & PLAN7_RF) - { - ali->rfline = (char *) MallocOrDie (sizeof(char) * (tr->tlen+1)); - memset(ali->rfline, ' ', tr->tlen); - } - if (hmm->flags & PLAN7_CS) - { - ali->csline = (char *) MallocOrDie (sizeof(char) * (tr->tlen+1)); - memset(ali->csline, ' ', tr->tlen); - } - - ali->query = Strdup(hmm->name); - ali->target = Strdup(name); - - if (Alphabet_type == hmmAMINO) mthresh = 0.5; - else mthresh = 0.9; - - /* Find first, last seq position - * HMM start/end positions currently not recorded, because there - * might be multiple HMM hits per sequence. - */ - for (tpos = 0; tpos < tr->tlen; tpos++) - if (tr->pos[tpos] > 0) { - ali->sqfrom = tr->pos[tpos]; - break; - } - for (tpos = tr->tlen-1; tpos >= 0; tpos--) - if (tr->pos[tpos] > 0) { - ali->sqto = tr->pos[tpos]; - break; - } - - /* Fill in the five lines of display - */ - for (tpos = 0; tpos < tr->tlen; tpos++) { - switch (tr->statetype[tpos]) { - case STS: - case STT: - ali->model[tpos] = '*'; - break; - - case STN: - case STJ: - case STC: - ali->model[tpos] = '-'; - if (tr->pos[tpos] > 0) { - ali->aseq[tpos] = tolower(Alphabet[(int) dsq[tr->pos[tpos]]]); - } - break; - - case STB: - ali->model[tpos] = '>'; - break; - - case STE: - ali->model[tpos] = '<'; - break; - - case STM: - if (hmm->flags & PLAN7_RF) ali->rfline[tpos] = hmm->rf[tr->nodeidx[tpos]]; - if (hmm->flags & PLAN7_CS) ali->csline[tpos] = hmm->cs[tr->nodeidx[tpos]]; - bestsym = FMax(hmm->mat[tr->nodeidx[tpos]], Alphabet_size); - ali->model[tpos] = Alphabet[bestsym]; - if (hmm->mat[tr->nodeidx[tpos]][bestsym] < mthresh) - ali->model[tpos] = tolower(ali->model[tpos]); - if (dsq[tr->pos[tpos]] == bestsym) - { - ali->mline[tpos] = Alphabet[(int) dsq[tr->pos[tpos]]]; - if (hmm->mat[tr->nodeidx[tpos]][bestsym] < mthresh) - ali->mline[tpos] = tolower(ali->mline[tpos]); - } - else if (hmm->msc[(int) dsq[tr->pos[tpos]]] [tr->nodeidx[tpos]] > 0) - ali->mline[tpos] = '+'; - ali->aseq[tpos] = Alphabet[(int) dsq[tr->pos[tpos]]]; - break; - - case STD: - if (hmm->flags & PLAN7_RF) ali->rfline[tpos] = hmm->rf[tr->nodeidx[tpos]]; - if (hmm->flags & PLAN7_CS) ali->csline[tpos] = hmm->cs[tr->nodeidx[tpos]]; - bestsym = FMax(hmm->mat[tr->nodeidx[tpos]], Alphabet_size); - ali->model[tpos] = Alphabet[bestsym]; - if (hmm->mat[tr->nodeidx[tpos]][bestsym] < mthresh) - ali->model[tpos] = tolower(ali->model[tpos]); - ali->aseq[tpos] = '-'; - break; - - case STI: - ali->model[tpos] = '.'; - if (hmm->isc[(int) dsq[tr->pos[tpos]]] [tr->nodeidx[tpos]] > 0) - ali->mline[tpos] = '+'; - ali->aseq[tpos] = (char) tolower((int) Alphabet[(int) dsq[tr->pos[tpos]]]); - break; - - default: - Die("bogus statetype"); - } /* end switch over statetypes */ - } /* end loop over tpos */ - - ali->len = tpos; - if (hmm->flags & PLAN7_RF) ali->rfline[tpos] = '\0'; - if (hmm->flags & PLAN7_CS) ali->csline[tpos] = '\0'; - ali->model[tpos] = '\0'; - ali->mline[tpos] = '\0'; - ali->aseq[tpos] = '\0'; - return ali; -} - - -/* Function: PrintFancyAli() - * Date: SRE, Mon Oct 27 06:56:42 1997 [Sanger Centre UK] - * - * Purpose: Print an HMM/sequence alignment from a fancyali_s - * structure. Line length controlled by ALILENGTH in - * config.h (set to 50). - * - * Args: fp - where to print it (stdout or open FILE) - * ali - alignment to print - * - * Return: (void) - */ -void -PrintFancyAli(FILE *fp, struct fancyali_s *ali) -{ - char buffer[ALILENGTH+1]; /* output line buffer */ - int starti, endi; - int pos; - int i; - - buffer[ALILENGTH] = '\0'; - endi = ali->sqfrom - 1; - for (pos = 0; pos < ali->len; pos += ALILENGTH) - { - /* coords of target seq for this line */ - starti = endi + 1; - for (i = pos; ali->aseq[i] != '\0' && i < pos + ALILENGTH; i++) - if (!isgap(ali->aseq[i])) endi++; - - if (ali->csline != NULL) { - strncpy(buffer, ali->csline+pos, ALILENGTH); - fprintf(fp, " %16s %s\n", "CS", buffer); - } - if (ali->rfline != NULL) { - strncpy(buffer, ali->rfline+pos, ALILENGTH); - fprintf(fp, " %16s %s\n", "RF", buffer); - } - if (ali->model != NULL) { - strncpy(buffer, ali->model+pos, ALILENGTH); - fprintf(fp, " %16s %s\n", " ", buffer); - } - if (ali->mline != NULL) { - strncpy(buffer, ali->mline+pos, ALILENGTH); - fprintf(fp, " %16s %s\n", " ", buffer); - } - if (ali->aseq != NULL) { - strncpy(buffer, ali->aseq+pos, ALILENGTH); - if (endi >= starti) - fprintf(fp, " %10.10s %5d %s %-5d\n\n", ali->target, starti, buffer, endi); - else - fprintf(fp, " %10.10s %5s %s %-5s\n\n", ali->target, "-", buffer, "-"); - } - } - - /* Cleanup and return - */ - fflush(fp); - return; -} - - - -/* Function: TraceDecompose() - * Date: Sat Aug 30 11:18:40 1997 (Denver CO) - * - * Purpose: Decompose a long multi-hit trace into zero or more - * traces without N,C,J transitions: for consistent - * scoring and statistical evaluation of single domain - * hits. - * - * Args: otr - original trace structure - * ret_tr - RETURN: array of simpler traces - * ret_ntr- RETURN: number of traces. - * - * Return: (void) - * ret_tr alloc'ed here; free individuals with FreeTrace(). - */ -void -TraceDecompose(struct p7trace_s *otr, struct p7trace_s ***ret_tr, int *ret_ntr) -{ - struct p7trace_s **tr; /* array of new traces */ - int ntr; /* number of traces */ - int i,j; /* position counters in traces */ - int idx; /* index over ntr subtraces */ - - /* First pass: count begin states to get ntr. - */ - for (ntr = 0, i = 0; i < otr->tlen; i++) - if (otr->statetype[i] == STB) ntr++; - - /* Allocations. - */ - if (ntr == 0) { - *ret_ntr = 0; - *ret_tr = NULL; - return; - } - tr = (struct p7trace_s **) MallocOrDie (sizeof(struct p7trace_s *) * ntr); - - for (idx = 0, i = 0; i < otr->tlen; i++) /* i = position in old trace */ - if (otr->statetype[i] == STB) - { - for (j = i+1; j < otr->tlen; j++) /* j = tmp; get length of subtrace */ - if (otr->statetype[j] == STE) break; - /* trace = S-N-(B..E)-C-T : len + 4 : j-i+1 + 4*/ - P7AllocTrace(j-i+5, &(tr[idx])); - tr[idx]->tlen = j-i+5; - - tr[idx]->statetype[0] = STS; - tr[idx]->nodeidx[0] = 0; - tr[idx]->pos[0] = 0; - tr[idx]->statetype[1] = STN; - tr[idx]->nodeidx[1] = 0; - tr[idx]->pos[1] = 0; - j = 2; /* now j = position in new subtrace */ - while (1) /* copy subtrace */ - { - tr[idx]->statetype[j] = otr->statetype[i]; - tr[idx]->nodeidx[j] = otr->nodeidx[i]; - tr[idx]->pos[j] = otr->pos[i]; - if (otr->statetype[i] == STE) break; - i++; j++; - } - j++; - tr[idx]->statetype[j] = STC; - tr[idx]->nodeidx[j] = 0; - tr[idx]->pos[j] = 0; - j++; - tr[idx]->statetype[j] = STT; - tr[idx]->nodeidx[j] = 0; - tr[idx]->pos[j] = 0; - idx++; - } - - *ret_tr = tr; - *ret_ntr = ntr; - return; -} - - -/* Function: TraceDomainNumber() - * - * Purpose: Count how many times we traverse the - * model in a single Plan7 trace -- equivalent - * to counting the number of domains. - * - * (A weakness is that we might discard some of - * those domains because they have low scores - * below E or T threshold.) - */ -int -TraceDomainNumber(struct p7trace_s *tr) -{ - int i; - int ndom = 0; - - for (i = 0; i < tr->tlen; i++) - if (tr->statetype[i] == STB) ndom++; - return ndom; -} - - -/* Function: TraceSimpleBounds() - * - * Purpose: For a trace that contains only a single - * traverse of the model (i.e. something that's - * come from TraceDecompose(), or a global - * alignment), determine the bounds of - * the match on both the sequence [1..L] and the - * model [1..M]. - * - * Args: tr - trace to look at - * i1 - RETURN: start point in sequence [1..L] - * i2 - RETURN: end point in sequence [1..L] - * k1 - RETURN: start point in model [1..M] - * k2 - RETURN: end point in model [1..M] - */ -void -TraceSimpleBounds(struct p7trace_s *tr, int *ret_i1, int *ret_i2, - int *ret_k1, int *ret_k2) -{ - int i1, i2, k1, k2, tpos; - - i1 = k1 = i2 = k2 = -1; - - /* Look forwards to find start of match */ - for (tpos = 0; tpos < tr->tlen; tpos++) - { - if (k1 == -1 && (tr->statetype[tpos] == STM || tr->statetype[tpos] == STD)) - k1 = tr->nodeidx[tpos]; - if (tr->statetype[tpos] == STM) - { - i1 = tr->pos[tpos]; - break; - } - } - if (tpos == tr->tlen || i1 == -1 || k1 == -1) - Die("sanity check failed: didn't find a match state in trace"); - - /* Look backwards to find end of match */ - for (tpos = tr->tlen-1; tpos >= 0; tpos--) - { - if (k2 == -1 && (tr->statetype[tpos] == STM || tr->statetype[tpos] == STD)) - k2 = tr->nodeidx[tpos]; - if (tr->statetype[tpos] == STM) - { - i2 = tr->pos[tpos]; - break; - } - } - if (tpos == tr->tlen || i2 == -1 || k2 == -1) - Die("sanity check failed: didn't find a match state in trace"); - - *ret_k1 = k1; - *ret_i1 = i1; - *ret_k2 = k2; - *ret_i2 = i2; -} - - -/* Function: MasterTraceFromMap() - * Date: SRE, Tue Jul 7 18:51:11 1998 [St. Louis] - * - * Purpose: Convert an alignment map (e.g. hmm->map) to - * a master trace. Used for mapping an alignment - * onto an HMM. Generally precedes a call to - * ImposeMasterTrace(). Compare P7ViterbiAlignAlignment(), - * which aligns an alignment to the model using a - * Viterbi algorithm to get a master trace. - * MasterTraceFromMap() only works if the alignment - * is exactly the one used to train the model. - * - * Args: map - the map (usually hmm->map is passed) 1..M - * M - length of map (model; usually hmm->M passed) - * alen - length of alignment that map refers to - * - * Returns: ptr to master trace - * Caller must free: P7FreeTrace(). - */ -struct p7trace_s * -MasterTraceFromMap(int *map, int M, int alen) -{ - struct p7trace_s *tr; /* RETURN: master trace */ - int tpos; /* position in trace */ - int apos; /* position in alignment, 1..alen */ - int k; /* position in model */ - - /* Allocate for the trace. - * S-N-B- ... - E-C-T : 6 states + alen is maximum trace, - * because each of alen columns is an N*, M*, I*, or C* metastate. - * No D* metastates possible. - */ - P7AllocTrace(alen+6, &tr); - - /* Initialize the trace - */ - tpos = 0; - TraceSet(tr, tpos, STS, 0, 0); tpos++; - TraceSet(tr, tpos, STN, 0, 0); tpos++; - - /* Leading N's - */ - for (apos = 1; apos < map[1]; apos++) { - TraceSet(tr, tpos, STN, 0, apos); tpos++; - } /* now apos == map[1] */ - TraceSet(tr, tpos, STB, 0, 0); tpos++; - - for (k = 1; k < M; k++) - { - TraceSet(tr, tpos, STM, k, apos); tpos++; - apos++; - - for (; apos < map[k+1]; apos++) { - TraceSet(tr, tpos, STI, k, apos); tpos++; - } - } /* now apos == map[M] and k == M*/ - - TraceSet(tr, tpos, STM, M, apos); tpos++; - apos++; - - /* Trailing C's - */ - TraceSet(tr, tpos, STE, 0, 0); tpos++; - TraceSet(tr, tpos, STC, 0, 0); tpos++; - for (; apos <= alen; apos++) { - TraceSet(tr, tpos, STC, 0, apos); tpos++; - } - - /* Terminate and return - */ - TraceSet(tr, tpos, STT, 0, 0); tpos++; - tr->tlen = tpos; - return tr; -} - - - -/* Function: ImposeMasterTrace() - * Date: SRE, Sun Jul 5 14:27:16 1998 [St. Louis] - * - * Purpose: Goes with P7ViterbiAlignAlignment(), which gives us - * a "master trace" for a whole alignment. Now, given - * the alignment and the master trace, construct individual - * tracebacks for each sequence. Later we'll hand these - * (and presumably other traces) to P7Traces2Alignment(). - * - * It is possible to generate individual traces that - * are not consistent with Plan7 (e.g. D->I and I->D - * transitions may be present). P7Traces2Alignment() - * can handle such traces; other functions may not. - * See modelmaker.c:trace_doctor() if this is a problem. - * - * Akin to modelmaker.c:fake_tracebacks(). - * - * Args: aseq - aligned seqs - * nseq - number of aligned seqs - * mtr - master traceback - * ret_tr- RETURN: array of individual tracebacks, one for each aseq - * - * Returns: (void) - */ -void -ImposeMasterTrace(char **aseq, int nseq, struct p7trace_s *mtr, struct p7trace_s ***ret_tr) -{ - struct p7trace_s **tr; - int idx; /* counter over sequences */ - int i; /* position in raw sequence (1..L) */ - int tpos; /* position in traceback */ - int mpos; /* position in master trace */ - - tr = (struct p7trace_s **) MallocOrDie (sizeof(struct p7trace_s *) * nseq); - - for (idx = 0; idx < nseq; idx++) - { - P7AllocTrace(mtr->tlen, &tr[idx]); /* we're guaranteed that individuals len < master len */ - - tpos = 0; - i = 1; - for (mpos = 0; mpos < mtr->tlen; mpos++) - { - switch (mtr->statetype[mpos]) - { - case STS: /* straight copies w/ no emission: S, B, D, E, T*/ - case STB: - case STD: - case STE: - case STT: - TraceSet(tr[idx], tpos, mtr->statetype[mpos], mtr->nodeidx[mpos], 0); - tpos++; - break; - - case STM: /* M* implies M or D */ - if (isgap(aseq[idx][mtr->pos[mpos]-1])) - TraceSet(tr[idx], tpos, STD, mtr->nodeidx[mpos], 0); - else { - TraceSet(tr[idx], tpos, STM, mtr->nodeidx[mpos], i); - i++; - } - tpos++; - break; - - case STI: /* I* implies I or nothing */ - if (!isgap(aseq[idx][mtr->pos[mpos]-1])) { - TraceSet(tr[idx], tpos, STI, mtr->nodeidx[mpos], i); - i++; - tpos++; - } - break; - - case STJ: /* N,J,C: first N* -> N. After that, N* -> N or nothing. */ - case STN: - case STC: - if (mtr->pos[mpos] == 0) { - TraceSet(tr[idx], tpos, mtr->statetype[mpos], 0, 0); - tpos++; - } else if (!isgap(aseq[idx][mtr->pos[mpos]-1])) { - TraceSet(tr[idx], tpos, mtr->statetype[mpos], 0, i); - i++; - tpos++; - } - break; - - case STBOGUS: - Die("never happens. Trust me."); - } - } - tr[idx]->tlen = tpos; - } - *ret_tr = tr; -} - - -/* Function: rightjustify() - * - * Purpose: Given a gap-containing string of length n, - * pull all the non-gap characters as far as - * possible to the right, leaving gaps on the - * left side. Used to rearrange the positions - * of insertions in HMMER alignments. - */ -static void -rightjustify(char *s, int n) -{ - int npos; - int opos; - - npos = n-1; - opos = n-1; - while (opos >= 0) { - if (isgap(s[opos])) opos--; - else s[npos--]=s[opos--]; - } - while (npos >= 0) - s[npos--] = '.'; -} - - diff --git a/forester/archive/RIO/others/hmmer/src/weetest.c b/forester/archive/RIO/others/hmmer/src/weetest.c deleted file mode 100644 index 406a41e..0000000 --- a/forester/archive/RIO/others/hmmer/src/weetest.c +++ /dev/null @@ -1,55 +0,0 @@ -/* This is a throwaway wrapper program for doing quick - * and dirty tests on sequence databases. Archives of past - * versions are kept and logged in RCS. - * RCS $Id: weetest.c,v 1.1.1.1 2005/03/22 08:34:07 cmzmasek Exp $ - * - * Compile with: - * -cc -g -o weetest -I ~/lib/squid.linux -L/nfs/wol2/people/eddy/lib/squid.linux weetest.c alphabet.o camJul97.o core_algorithms.o histogram.o hmmio.o mathsupport.o masks.o misc.o modelmakers.o debug.o prior.o trace.o plan7.o states.o tophits.o -lsquid-debug -lm - * - * or, for optimized version: -cc -O2 -o weetest -I ~/lib/squid.linux -L/nfs/wol2/people/eddy/lib/squid.linux weetest.c alphabet.o camJul97.o core_algorithms.o histogram.o hmmio.o mathsupport.o masks.o misc.o modelmakers.o debug.o prior.o trace.o plan7.o states.o tophits.o -lsquid -lm - */ - -/* This test looks at histogram of protein lengths in Swissprot - */ -#include - -#include "structs.h" -#include "funcs.h" -#include "globals.h" -#include "squid.h" - -int -main(int argc, char **argv) -{ - char *file; - char *seq; - char *dsq; - int format; - SQFILE *sqfp; - SQINFO sqinfo; - int i,x; - - struct histogram_s *h; - - file = argv[1]; - if (! SeqfileFormat(file, &format, "BLASTDB")) - Die("SeqfileFormat()"); - if ((sqfp = SeqfileOpen(file, format, "BLASTDB")) == NULL) - Die("SeqfileOpen()"); - - h = AllocHistogram(0, 10000, 1000); - while (ReadSeq(sqfp, format, &seq, &sqinfo)) - AddToHistogram(h, (float) sqinfo.len); - - GaussianFitHistogram(h, 999999.); - PrintASCIIHistogram(stdout, h); - - printf("mean = %f\n", h->param[GAUSS_MEAN]); - printf("sd = %f\n", h->param[GAUSS_SD]); - - SeqfileClose(sqfp); - - return EXIT_SUCCESS; -} diff --git a/forester/archive/RIO/others/hmmer/testsuite/Exercises.sh b/forester/archive/RIO/others/hmmer/testsuite/Exercises.sh deleted file mode 100644 index 612ac99..0000000 --- a/forester/archive/RIO/others/hmmer/testsuite/Exercises.sh +++ /dev/null @@ -1,17 +0,0 @@ -#! /bin/sh - -# Various exercises to test the package. -# SRE, Fri Oct 23 10:38:44 1998 -# RCS $Id: Exercises.sh,v 1.1.1.1 2005/03/22 08:34:49 cmzmasek Exp $ - -# Test binary formats and interconversion. -# (tests for bug detected in 2.1, fixed in 2.1.1a.) -# -../binaries/hmmconvert -F fn3-bin ex1.tmp > /dev/null -../binaries/hmmconvert -F fn3-bin-swap ex2.tmp > /dev/null -diff ex1.tmp ex2.tmp > /dev/null -if (test $? != 0) then - echo FAILED: hmmconvert byteswap test -fi -rm ex1.tmp ex2.tmp - diff --git a/forester/archive/RIO/others/hmmer/testsuite/Makefile.in b/forester/archive/RIO/others/hmmer/testsuite/Makefile.in deleted file mode 100644 index 4bda272..0000000 --- a/forester/archive/RIO/others/hmmer/testsuite/Makefile.in +++ /dev/null @@ -1,83 +0,0 @@ -################################################################ -# Makefile for HMMER testsuite -# CVS $Id: Makefile.in,v 1.1.1.1 2005/03/22 08:34:49 cmzmasek Exp $ -########## -# HMMER - Biological sequence analysis with profile HMMs -# Copyright (C) 1992-1999 Washington University School of Medicine -# All Rights Reserved -# -# This source code is distributed under the terms of the -# GNU General Public License. See the files COPYING and LICENSE -# for details. -########### - -CC = @CC@ -CFLAGS = @CFLAGS@ -MDEFS = @MDEFS@ @DEFS@ - -# Configuration for optional pthreads multiprocessor support -# -PTHREAD_LIBS = @PTHREAD_LIBS@ -PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ - -SHELL = /bin/sh -MYLIBS = -lhmmer -lsquid -LIBS = @LIBS@ -lm - -SHIVA = alignalign_test\ - evd_test\ - masks_test\ - parsingviterbi_test\ - tophits_test\ - trace_test\ - viterbi_exercise\ - weeviterbi_test - -####### -## Targets defining how to make Shiva executables. -####### - -.c.o: - $(CC) $(CFLAGS) $(PTHREAD_CFLAGS) $(MDEFS) -I../squid -I../src -c $< - -all: $(SHIVA) - -$(SHIVA): @EXEC_DEPENDENCY@ - $(CC) $(CFLAGS) $(PTHREAD_CFLAGS) $(MDEFS) -o $@ -L../squid -L../src $@.o $(MYLIBS) $(PTHREAD_LIBS) $(LIBS) - -####### -## `make check` actually runs the tests. -####### - -check: $(SHIVA) - @echo - @echo Running compiled Shiva exercises: - @echo Warning: some tests may take several minutes to complete. - @for shivatest in $(SHIVA); do\ - if ./$$shivatest; then\ - echo $$shivatest: ok;\ - else\ - echo $$shivatest: FAILED;\ - fi;\ - done - @echo - @echo Running scripted Shiva exercises: - @echo Warning: This also may take several minutes. - perl ./Optiontests.pl - sh ./Exercises.sh - -####### -## Miscellaneous -####### - -clean: - -rm -f *.o *~ Makefile.bak core $(SHIVA) TAGS gmon.out - -distclean: - make clean - -rm -f Makefile - -TAGS: - etags -t *.c *.h Makefile.in - - diff --git a/forester/archive/RIO/others/hmmer/testsuite/Optiontests.fa b/forester/archive/RIO/others/hmmer/testsuite/Optiontests.fa deleted file mode 100644 index cdaa297..0000000 --- a/forester/archive/RIO/others/hmmer/testsuite/Optiontests.fa +++ /dev/null @@ -1,16 +0,0 @@ ->seq1 -ACDEFGHIKLMNPQRSTVWY ->seq2 -ACDEFGHIKLMNPQRSTVWY ->seq3 -ACDEFGHIKLMNPQRSTVWY ->seq4 -ACDEFGHIKLMNPQRSTVWY ->seq5 -ACDEFGHIKLMNPQRSTVWY ->seq6 -ACDEFGHIKLMNPQRSTVWY ->seq7 -ACDEFGHIKLMNPQRSTVWY ->seq8 -ACDEFGHIKLMNPQRSTVWY diff --git a/forester/archive/RIO/others/hmmer/testsuite/Optiontests.nfa b/forester/archive/RIO/others/hmmer/testsuite/Optiontests.nfa deleted file mode 100644 index 03d614d..0000000 --- a/forester/archive/RIO/others/hmmer/testsuite/Optiontests.nfa +++ /dev/null @@ -1,12 +0,0 @@ ->seq1 -AAACCCGGGTTT ->seq1 -AAACCCGGGTTT ->seq1 -AAACCCGGGTTT ->seq1 -AAACCCGGGTTT ->seq1 -AAACCCGGGTTT ->seq1 -AAACCCGGGTTT diff --git a/forester/archive/RIO/others/hmmer/testsuite/Optiontests.nslx b/forester/archive/RIO/others/hmmer/testsuite/Optiontests.nslx deleted file mode 100644 index aa19616..0000000 --- a/forester/archive/RIO/others/hmmer/testsuite/Optiontests.nslx +++ /dev/null @@ -1,9 +0,0 @@ -# A simple DNA alignment for Optiontests.pl - -#=RF xxxxxx -seq1 AAACCCGGGTTT -seq1 AAACCCGGGTTT -seq1 AAACCCGGGTTT -seq1 AAACCCGGGTTT -seq1 AAACCCGGGTTT -seq1 AAACCCGGGTTT diff --git a/forester/archive/RIO/others/hmmer/testsuite/Optiontests.pam b/forester/archive/RIO/others/hmmer/testsuite/Optiontests.pam deleted file mode 100644 index 205f139..0000000 --- a/forester/archive/RIO/others/hmmer/testsuite/Optiontests.pam +++ /dev/null @@ -1,31 +0,0 @@ -# Matrix made by matblas from blosum62.iij -# * column uses minimum score -# BLOSUM Clustered Scoring Matrix in 1/2 Bit Units -# Blocks Database = /data/blocks_5.0/blocks.dat -# Cluster Percentage: >= 62 -# Entropy = 0.6979, Expected = -0.5209 - A R N D C Q E G H I L K M F P S T W Y V B Z X * -A 4 -1 -2 -2 0 -1 -1 0 -2 -1 -1 -1 -1 -2 -1 1 0 -3 -2 0 -2 -1 0 -4 -R -1 5 0 -2 -3 1 0 -2 0 -3 -2 2 -1 -3 -2 -1 -1 -3 -2 -3 -1 0 -1 -4 -N -2 0 6 1 -3 0 0 0 1 -3 -3 0 -2 -3 -2 1 0 -4 -2 -3 3 0 -1 -4 -D -2 -2 1 6 -3 0 2 -1 -1 -3 -4 -1 -3 -3 -1 0 -1 -4 -3 -3 4 1 -1 -4 -C 0 -3 -3 -3 9 -3 -4 -3 -3 -1 -1 -3 -1 -2 -3 -1 -1 -2 -2 -1 -3 -3 -2 -4 -Q -1 1 0 0 -3 5 2 -2 0 -3 -2 1 0 -3 -1 0 -1 -2 -1 -2 0 3 -1 -4 -E -1 0 0 2 -4 2 5 -2 0 -3 -3 1 -2 -3 -1 0 -1 -3 -2 -2 1 4 -1 -4 -G 0 -2 0 -1 -3 -2 -2 6 -2 -4 -4 -2 -3 -3 -2 0 -2 -2 -3 -3 -1 -2 -1 -4 -H -2 0 1 -1 -3 0 0 -2 8 -3 -3 -1 -2 -1 -2 -1 -2 -2 2 -3 0 0 -1 -4 -I -1 -3 -3 -3 -1 -3 -3 -4 -3 4 2 -3 1 0 -3 -2 -1 -3 -1 3 -3 -3 -1 -4 -L -1 -2 -3 -4 -1 -2 -3 -4 -3 2 4 -2 2 0 -3 -2 -1 -2 -1 1 -4 -3 -1 -4 -K -1 2 0 -1 -3 1 1 -2 -1 -3 -2 5 -1 -3 -1 0 -1 -3 -2 -2 0 1 -1 -4 -M -1 -1 -2 -3 -1 0 -2 -3 -2 1 2 -1 5 0 -2 -1 -1 -1 -1 1 -3 -1 -1 -4 -F -2 -3 -3 -3 -2 -3 -3 -3 -1 0 0 -3 0 6 -4 -2 -2 1 3 -1 -3 -3 -1 -4 -P -1 -2 -2 -1 -3 -1 -1 -2 -2 -3 -3 -1 -2 -4 7 -1 -1 -4 -3 -2 -2 -1 -2 -4 -S 1 -1 1 0 -1 0 0 0 -1 -2 -2 0 -1 -2 -1 4 1 -3 -2 -2 0 0 0 -4 -T 0 -1 0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -2 -1 1 5 -2 -2 0 -1 -1 0 -4 -W -3 -3 -4 -4 -2 -2 -3 -2 -2 -3 -2 -3 -1 1 -4 -3 -2 11 2 -3 -4 -3 -2 -4 -Y -2 -2 -2 -3 -2 -1 -2 -3 2 -1 -1 -2 -1 3 -3 -2 -2 2 7 -1 -3 -2 -1 -4 -V 0 -3 -3 -3 -1 -2 -2 -3 -3 3 1 -2 1 -1 -2 -2 0 -3 -1 4 -3 -2 -1 -4 -B -2 -1 3 4 -3 0 1 -1 0 -3 -4 0 -3 -3 -2 0 -1 -4 -3 -3 4 1 -1 -4 -Z -1 0 0 1 -3 3 4 -2 0 -3 -3 1 -1 -3 -1 0 -1 -3 -2 -2 1 4 -1 -4 -X 0 -1 -1 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 0 0 -2 -1 -1 -1 -1 -1 -4 -* -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 1 diff --git a/forester/archive/RIO/others/hmmer/testsuite/Optiontests.pl b/forester/archive/RIO/others/hmmer/testsuite/Optiontests.pl deleted file mode 100644 index 1b5fbb9..0000000 --- a/forester/archive/RIO/others/hmmer/testsuite/Optiontests.pl +++ /dev/null @@ -1,116 +0,0 @@ -#! /usr/local/bin/perl - -@tests = ( - "hmmbuild --informat selex -F Optiontests.hmm Optiontests.slx", # Make a protein HMM - "hmmbuild --informat selex -F Optiontests.nhmm Optiontests.nslx", # Make a DNA HMM - "hmmalign -h", - "hmmalign Optiontests.hmm Optiontests.fa", - "hmmalign -m Optiontests.hmm Optiontests.fa", - "hmmalign -o tmp Optiontests.hmm Optiontests.fa", - "hmmalign -q Optiontests.hmm Optiontests.fa", - "hmmalign --withali Optiontests.slx Optiontests.hmm Optiontests.fa", - "hmmalign --mapali Optiontests.slx Optiontests.hmm Optiontests.fa", - "hmmbuild -h", - "hmmbuild --informat selex tmp.hmm Optiontests.slx", - "hmmbuild --informat selex -F tmp.hmm Optiontests.slx", # Need -F to force - "hmmbuild --informat selex -n foo -F tmp.hmm Optiontests.slx", - "hmmbuild --informat selex -o tmp -F tmp.hmm Optiontests.slx", - "hmmbuild --informat selex -A tmp.hmm Optiontests.slx", - "hmmbuild --informat selex -f -F tmp.hmm Optiontests.slx", - "hmmbuild --informat selex -g -F tmp.hmm Optiontests.slx", - "hmmbuild --informat selex -s -F tmp.hmm Optiontests.slx", - "hmmbuild --informat selex --fast -F tmp.hmm Optiontests.slx", - "hmmbuild --informat selex --hand -F tmp.hmm Optiontests.slx", - "hmmbuild --informat selex --null ../tutorial/amino.null -F tmp.hmm Optiontests.slx", - "hmmbuild --informat selex --pam Optiontests.pam -F tmp.hmm Optiontests.slx", - "hmmbuild --informat selex --prior ../tutorial/amino.pri -F tmp.hmm Optiontests.slx", - "hmmbuild --informat selex --wblosum -F tmp.hmm Optiontests.slx", - "hmmbuild --informat selex --wgsc -F tmp.hmm Optiontests.slx", - "hmmbuild --informat selex --wme -F tmp.hmm Optiontests.slx", - "hmmbuild --informat selex --wvoronoi -F tmp.hmm Optiontests.slx", - "hmmbuild --informat selex --wnone -F tmp.hmm Optiontests.slx", - "hmmbuild --informat selex --noeff -F tmp.hmm Optiontests.slx", - "hmmbuild --informat selex --amino -F tmp.hmm Optiontests.slx", - "hmmbuild --informat selex --nucleic -F tmp.hmm Optiontests.nslx", - "hmmbuild --informat selex --archpri 0.9 -F tmp.hmm Optiontests.slx", - "hmmbuild --informat selex --binary -F tmp.hmm Optiontests.slx", - "hmmbuild --informat selex --cfile tmp -F tmp.hmm Optiontests.slx", - "hmmbuild --informat selex --gapmax 0.6 --fast -F tmp.hmm Optiontests.slx", - "hmmbuild --informat selex --idlevel 0.5 -F tmp.hmm Optiontests.slx", - "hmmbuild --informat selex --pamwgt 10 --pam Optiontests.pam -F tmp.hmm Optiontests.slx", - "hmmbuild --informat selex --swentry 0.3 -F -s tmp.hmm Optiontests.slx", - "hmmbuild --informat selex --swexit 0.3 -F -s tmp.hmm Optiontests.slx", - "hmmbuild --informat selex --verbose -F tmp.hmm Optiontests.slx", - "hmmcalibrate -h", - "hmmcalibrate Optiontests.hmm", - "hmmcalibrate --fixed 15 Optiontests.hmm", - "hmmcalibrate --mean 25 Optiontests.hmm", - "hmmcalibrate --histfile tmp --fixed 15 Optiontests.hmm", - "hmmcalibrate --num 4500 --fixed 15 Optiontests.hmm", - "hmmcalibrate --sd 50 --mean 25 Optiontests.hmm", - "hmmcalibrate --seed 666 --fixed 15 Optiontests.hmm", - "hmmconvert -h", - "hmmconvert Optiontests.hmm tmp2.hmm", - "hmmconvert -F Optiontests.hmm tmp2.hmm", - "hmmconvert -a -F Optiontests.hmm tmp2.hmm", - "hmmconvert -A Optiontests.hmm tmp2.hmm", # order sensitive. tmp2.hmm must be HMM - "hmmconvert -b -F Optiontests.hmm tmp2.hmm", - "hmmconvert -p -F Optiontests.hmm tmp2.hmm", - "hmmconvert -P -F Optiontests.hmm tmp2.hmm", - "hmmemit -h", - "hmmemit Optiontests.hmm", - "hmmemit -a Optiontests.hmm", - "hmmemit -n 6 Optiontests.hmm", - "hmmemit -o tmp Optiontests.hmm", - "hmmemit -q Optiontests.hmm", - "hmmemit --seed 666 Optiontests.hmm", - "hmmindex -h", - "hmmindex Optiontests.hmm", - "hmmfetch -h", - "hmmfetch Optiontests.hmm Optiontests", - "hmmpfam -h", - "hmmpfam -n Optiontests.nhmm Optiontests.nfa", - "hmmpfam -A 0 Optiontests.hmm Optiontests.fa", - "hmmpfam -E 1 Optiontests.hmm Optiontests.fa", - "hmmpfam -T 1 Optiontests.hmm Optiontests.fa", - "hmmpfam -Z 10 Optiontests.hmm Optiontests.fa", - "hmmpfam --domE 1 Optiontests.hmm Optiontests.fa", - "hmmpfam --domT 1 Optiontests.hmm Optiontests.fa", - "hmmpfam --forward Optiontests.hmm Optiontests.fa", - "hmmpfam --null2 Optiontests.hmm Optiontests.fa", - "hmmpfam --xnu Optiontests.hmm Optiontests.fa", - "hmmsearch -h", - "hmmsearch -A 0 Optiontests.hmm Optiontests.fa", - "hmmsearch -E 1 Optiontests.hmm Optiontests.fa", - "hmmsearch -T 1 Optiontests.hmm Optiontests.fa", - "hmmsearch -Z 10 Optiontests.hmm Optiontests.fa", - "hmmsearch --domE 1 Optiontests.hmm Optiontests.fa", - "hmmsearch --domT 1 Optiontests.hmm Optiontests.fa", - "hmmsearch --forward Optiontests.hmm Optiontests.fa", - "hmmsearch --null2 Optiontests.hmm Optiontests.fa", - "hmmsearch --xnu Optiontests.hmm Optiontests.fa", - ); - - -unlink "tmp.hmm"; -while ($testline = shift(@tests)) -{ - $status = system("../binaries/$testline 2>&1 > tmp.out"); - if ($status > 0) { - print "failure: $testline\n"; - $failed++; - } - $total++; -} - -$passed = $total - $failed; -printf "Option tests: %d. Passed: %d. Failed: %d\n", $total, $passed, $failed; - -unlink "tmp"; -unlink "tmp.out"; -unlink "tmp.hmm"; -unlink "tmp2.hmm"; -unlink "Optiontests.hmm"; -unlink "Optiontests.nhmm"; -unlink "Optiontests.hmm.ssi"; - diff --git a/forester/archive/RIO/others/hmmer/testsuite/Optiontests.pri b/forester/archive/RIO/others/hmmer/testsuite/Optiontests.pri deleted file mode 100644 index a2f19d3..0000000 --- a/forester/archive/RIO/others/hmmer/testsuite/Optiontests.pri +++ /dev/null @@ -1,59 +0,0 @@ -# This file incorporates Blocks9.plib, the UCSC mixture -# Dirichlet prior created by Kimmen Sjolander. -# - -Dirichlet # Strategy (mixture Dirichlet) -Amino # type of prior (Amino or Nucleic) - -# Transitions -1 # Single component -1.0 # with probability = 1.0 -0.7939 0.0278 0.0135 # m->m, m->i, m->d alpha's -0.1551 0.1331 # i->m, i->i alpha's -0.9002 0.5630 # d->m, d->d alpha's - -# Match emissions -# -9 # 9 components - -0.178091 -0.270671 0.039848 0.017576 0.016415 0.014268 0.131916 0.012391 0.022599 0.020358 0.030727 0.015315 0.048298 0.053803 0.020662 0.023612 0.216147 0.147226 0.065438 0.003758 0.009621 -# S A T , C G P >< N V M , Q H R I K F L D W , E Y - -0.056591 -0.021465 0.0103 0.011741 0.010883 0.385651 0.016416 0.076196 0.035329 0.013921 0.093517 0.022034 0.028593 0.013086 0.023011 0.018866 0.029156 0.018153 0.0361 0.07177 0.419641 -# Y , F W , H ,>< L M , N Q I C V S R , T P A K D G E - -0.0960191 -0.561459 0.045448 0.438366 0.764167 0.087364 0.259114 0.21494 0.145928 0.762204 0.24732 0.118662 0.441564 0.174822 0.53084 0.465529 0.583402 0.445586 0.22705 0.02951 0.12109 -# Q E , K N R S H D T A >< M P Y G , V L I W C F - -0.0781233 -0.070143 0.01114 0.019479 0.094657 0.013162 0.048038 0.077 0.032939 0.576639 0.072293 0.02824 0.080372 0.037661 0.185037 0.506783 0.073732 0.071587 0.042532 0.011254 0.028723 -# K R , Q , H >< N E T M S , P W Y A L G V C I , D F - -0.0834977 -0.041103 0.014794 0.00561 0.010216 0.153602 0.007797 0.007175 0.299635 0.010849 0.999446 0.210189 0.006127 0.013021 0.019798 0.014509 0.012049 0.035799 0.180085 0.012744 0.026466 -# L M , I , F V ><, W Y C T Q , A P H R , K S E N , D G - -0.0904123 -0.115607 0.037381 0.012414 0.018179 0.051778 0.017255 0.004911 0.796882 0.017074 0.285858 0.075811 0.014548 0.015092 0.011382 0.012696 0.027535 0.088333 0.94434 0.004373 0.016741 -# I V ,, L M >< C T A , F , Y S P W N , E Q K R D G H - -0.114468 -0.093461 0.004737 0.387252 0.347841 0.010822 0.105877 0.049776 0.014963 0.094276 0.027761 0.01004 0.187869 0.050018 0.110039 0.038668 0.119471 0.065802 0.02543 0.003215 0.018742 -# D , E N , Q H S >< K G P T A , R Y , M V L F W I C - -0.0682132 -0.452171 0.114613 0.06246 0.115702 0.284246 0.140204 0.100358 0.55023 0.143995 0.700649 0.27658 0.118569 0.09747 0.126673 0.143634 0.278983 0.358482 0.66175 0.061533 0.199373 -# M , V I L F T Y C A >< W S H Q R N K , P E G , D - -0.234585 -0.005193 0.004039 0.006722 0.006121 0.003468 0.016931 0.003647 0.002184 0.005019 0.00599 0.001473 0.004158 0.009055 0.00363 0.006583 0.003172 0.00369 0.002967 0.002772 0.002686 -# P G W , C H R D E >< N Q K F Y T L A M , S V I - - -## Insert emissions -1 # Single component -1.0 # with probability 1.0 -681 120 623 651 313 902 241 371 687 676 143 548 647 415 551 926 623 505 102 269 diff --git a/forester/archive/RIO/others/hmmer/testsuite/Optiontests.slx b/forester/archive/RIO/others/hmmer/testsuite/Optiontests.slx deleted file mode 100644 index a821515..0000000 --- a/forester/archive/RIO/others/hmmer/testsuite/Optiontests.slx +++ /dev/null @@ -1,15 +0,0 @@ -# A simple amino acid test alignment for Optiontests.pl - -#=RF xxxxx -seq1 ACDEFGHIKLMNPQRSTVWY -seq2 ACDEFGHIKLMNPQRSTVWY -seq3 ACDEFGHIKLMNPQRSTVWY -seq4 ACDEFGHIKLMNPQRSTVWY -seq5 ACDEFGHIKLMNPQRSTVWY -seq6 ACDEFGHIKLMNPQRSTVWY -seq7 ACDEFGHIKLMNPQRSTVWY -seq8 ACDEFGHIKLMNPQRSTVWY - - - - diff --git a/forester/archive/RIO/others/hmmer/testsuite/README b/forester/archive/RIO/others/hmmer/testsuite/README deleted file mode 100644 index 4a4e2ac..0000000 --- a/forester/archive/RIO/others/hmmer/testsuite/README +++ /dev/null @@ -1,82 +0,0 @@ -Shiva: HMMER testsuite - -##################################################################### -I. Compiled test drivers. ---------------------------------------------------------------------- - -- A test driver runs with no arguments, gives no output, - and returns EXIT_SUCCESS if the test passes. -- If the test fails, the test driver calls Die() to print a diagnostic on - stderr, and exit with EXIT_FAILURE. -- The -v option always activates rudimentary verbose output on stdout. - - -Current tests: ---------------- - -alignalign_test - Exercises P7ViterbiAlignAlignment() -- alignment of a fixed - multiple alignment to an HMM. Aligns fn3 seed alignment to - fn3 model, compares to results of aligning sequences individually; - if an excessive number of discrepancies are detected, test - fails. - Other files: fn3.seed, fn3.hmm. - -evd_test - Exercises code in histogram.c, especially EVD fitting - Default generates 1000 EVD samples; fits EVD; tests that fitted - parameters are reasonably close to real ones. - -masks_test - Exercises code in masks.c - Default runs XNU on a sequence and compares to a known result. - -parsingviterbi_test - Exercises P7ParsingViterbi() in core_algorithms.c - Runs Fn3 model against titin using both standard Viterbi and - ParsingViterbi; compares results for identity. - -tophits_test - Exercises tophits.c - Generates random scores in three tiers (good, middling, bad). - Uses RegisterHit() API; FullSort's them; tests that they - end up in the right number/order. - -trace_test - Exercises traceback code in core_algorithms.c - Runs a simple HMM against synthetic sequences designed to - exercise all possible arrangements of transitions, and - does a TraceVerify() to be sure resulting trace is internally - consistent. - -viterbi_exercise - Exercises P7Viterbi and P7SmallViterbi in core_algorithms.c - Configures Fn3 model into various modes; generates 100 random - seqs from each configuration; does P7Viterbi and P7SmallViterbi - alignments, TraceVerify()'s them, checks them for identity. - -weeviterbi_test - Exercises P7WeeViterbi in core_algorithms.c - Runs RRM model against two subsequences of human U1A; - compares Viterbi trace to WeeViterbi trace. - -##################################################################### -II. Scripted test drivers. ---------------------------------------------------------------------- - -Optiontests.pl - Runs every documented option for every program, to be - sure they're really connected up. - - -##################################################################### -Obsolete: kept in RCS archive only for reproducibility of old results ---------------------------------------------------------------------- - -fitting_test.c Tests of EVD and Gaussian histogram fitting code. - 17 June 1997 - - - - - diff --git a/forester/archive/RIO/others/hmmer/testsuite/alignalign_test.c b/forester/archive/RIO/others/hmmer/testsuite/alignalign_test.c deleted file mode 100644 index 99679d2..0000000 --- a/forester/archive/RIO/others/hmmer/testsuite/alignalign_test.c +++ /dev/null @@ -1,206 +0,0 @@ -/* alignalign_test.c - * Sun Jul 5 13:42:41 1998 - * - * Test driver for P7ViterbiAlignAlignment(). - * - * The test is to - * 1) read an alignment and a corresponding HMM - * 2) align the alignment to the HMM to get a master trace - * 3) map the alignment to the HMM to get another master trace - * 4) Test that the two master traces are identical; if not, fail. - * This doesn't have to be true always, but it's true for the - * fn3 test example. - * 5) Get imposed traces for each sequence - * 6) Viterbi align individual seqs to the model; - * compare the imposed trace with the Viterbi trace; - * 7) If an excessive number of individual traces differ from - * those imposed by master, fail. - * - * CVS $Id: alignalign_test.c,v 1.1.1.1 2005/03/22 08:34:49 cmzmasek Exp $ - */ - -#include - -#include "structs.h" -#include "funcs.h" -#include "globals.h" -#include "squid.h" - -static char banner[] = "\ -alignalign_test : testing of P7ViterbiAlignAlignment() code"; - -static char usage[] = "\ -Usage: alignalign_test [-options]\n\ - Available options are:\n\ - -h : help; display this usage info\n\ - -v : be verbose\n\ -"; - -static char experts[] = "\ - --ali : read alignment from \n\ - --hmm : read HMM from \n\ -\n"; - -static struct opt_s OPTIONS[] = { - { "-h", TRUE, sqdARG_NONE }, - { "-v", TRUE, sqdARG_NONE }, - { "--ali", FALSE, sqdARG_STRING }, - { "--hmm", FALSE, sqdARG_STRING }, -}; -#define NOPTIONS (sizeof(OPTIONS) / sizeof(struct opt_s)) - -int -main(int argc, char **argv) -{ - char *hmmfile; /* file to read HMM(s) from */ - HMMFILE *hmmfp; /* opened hmmfile for reading */ - struct plan7_s *hmm; /* HMM to search with */ - char *afile; /* file to read alignment from */ - int format; /* format determined for afile */ - MSAFILE *afp; /* afile, open for reading */ - MSA *msa; /* multiple sequence alignment from afile */ - char **rseq; /* raw, dealigned aseq */ - char *dsq; /* digitized target sequence */ - struct p7trace_s *mtr; /* master traceback from alignment */ - struct p7trace_s *maptr; /* master traceback from mapping */ - struct p7trace_s **tr; /* individual tracebacks imposed by mtr */ - struct p7trace_s **itr; /* individual trace from P7Viterbi() */ - int idx; /* counter for seqs */ - int ndiff; /* number of differing traces */ - int rlen; /* length of an unaligned sequence */ - - int be_verbose; - int be_standard; /* TRUE when running standard test */ - - char *optname; /* name of option found by Getopt() */ - char *optarg; /* argument found by Getopt() */ - int optind; /* index in argv[] */ - - /*********************************************** - * Parse command line - ***********************************************/ - - hmmfile = "fn3.hmm"; - afile = "fn3.seed"; - format = MSAFILE_STOCKHOLM; - be_verbose = FALSE; - be_standard = TRUE; - - while (Getopt(argc, argv, OPTIONS, NOPTIONS, usage, - &optind, &optname, &optarg)) { - if (strcmp(optname, "-v") == 0) be_verbose = TRUE; - else if (strcmp(optname, "--ali") == 0) { afile = optarg; be_standard = FALSE; } - else if (strcmp(optname, "--hmm") == 0) { hmmfile = optarg; be_standard = FALSE; } - else if (strcmp(optname, "-h") == 0) { - Banner(stdout, banner); - puts(usage); - puts(experts); - exit(0); - } - } - if (argc - optind != 0) - Die("Incorrect number of arguments.\n%s\n", usage); - - /*********************************************** - * Get one alignment from test file: must be Stockholm format. - ***********************************************/ - - if ((afp = MSAFileOpen(afile, format, NULL)) == NULL) - Die("Alignment file %s could not be opened for reading", afile); - if ((msa = MSAFileRead(afp)) == NULL) - Die("Didn't read an alignment from %s", afile); - MSAFileClose(afp); - - for (idx = 0; idx < msa->nseq; idx++) - s2upper(msa->aseq[idx]); - DealignAseqs(msa->aseq, msa->nseq, &rseq); - - /*********************************************** - * Open HMM file - * Read a single HMM from it. - ***********************************************/ - - if ((hmmfp = HMMFileOpen(hmmfile, NULL)) == NULL) - Die("Failed to open HMM file %s\n", hmmfile); - if (!HMMFileRead(hmmfp, &hmm)) - Die("Failed to read any HMMs from %s\n", hmmfile); - if (hmm == NULL) - Die("HMM file %s corrupt or in incorrect format? Parse failed", hmmfile); - P7Logoddsify(hmm, TRUE); - - if (! (hmm->flags & PLAN7_MAP)) - Die("HMM in %s has no map", hmmfile); - if (GCGMultchecksum(msa->aseq, msa->nseq) != hmm->checksum) - Die("Checksum for alignment in %s does not match that in HMM (%d != %d)", - afile, GCGMultchecksum(msa->aseq, msa->nseq), hmm->checksum); - - /*********************************************** - * First test: - * mapped alignment should match re-aligned alignment: - * obtain and compare the two master traces - ***********************************************/ - - mtr = P7ViterbiAlignAlignment(msa, hmm); - maptr = MasterTraceFromMap(hmm->map, hmm->M, msa->alen); - if (! TraceVerify(mtr, hmm->M, msa->alen)) - Die("Trace verify on P7ViterbiAlignAlignment() result failed\n"); - if (! TraceVerify(maptr, hmm->M, msa->alen)) - Die("Trace verify on MasterTraceFromMap() result failed\n"); - if (! TraceCompare(mtr, maptr)) - Die("Master traces differ for alignment versus map\n"); - - /************************************************** - * Second test: - * seq traces implied by mapped alignment should generally match - * re-aligned individual sequences. - ***************************************************/ - - ImposeMasterTrace(msa->aseq, msa->nseq, mtr, &tr); - - itr = MallocOrDie(sizeof(struct p7trace_s *) * msa->nseq); - /* align individuals, compare traces */ - ndiff = 0; - for (idx = 0; idx < msa->nseq; idx++) - { - rlen = strlen(rseq[idx]); - dsq = DigitizeSequence(rseq[idx], rlen); - P7Viterbi(dsq, rlen, hmm, &(itr[idx])); - - if (! TraceCompare(itr[idx], tr[idx])) - ndiff++; - free(dsq); - } - - /* Determine success/failure. - */ - if (ndiff > msa->nseq / 2) - Die("alignalign: Test FAILED; %d/%d differ\n", ndiff, msa->nseq); - - if (be_standard) { - if (ndiff != 12) - Die("alignalign: Test FAILED; %d traces differ, should be 12\n", ndiff); - if (msa->nseq != 109) - Die("alignalign: Test FAILED; %d seqs read, should be 109\n", msa->nseq); - } - - if (be_verbose) printf("alignalign: Test passed; %d/%d differ, as expected\n", - ndiff, msa->nseq); - - /* Cleanup. - */ - P7FreeTrace(mtr); - P7FreeTrace(maptr); - for (idx = 0; idx < msa->nseq; idx++) - { - P7FreeTrace(tr[idx]); - P7FreeTrace(itr[idx]); - } - free(tr); - free(itr); - Free2DArray((void **) rseq, msa->nseq); - MSAFree(msa); - FreePlan7(hmm); - SqdClean(); - - return EXIT_SUCCESS; -} diff --git a/forester/archive/RIO/others/hmmer/testsuite/evd_test.c b/forester/archive/RIO/others/hmmer/testsuite/evd_test.c deleted file mode 100644 index 5a2446e..0000000 --- a/forester/archive/RIO/others/hmmer/testsuite/evd_test.c +++ /dev/null @@ -1,295 +0,0 @@ -/* evd_test.c - * SRE, Wed Nov 12 11:17:27 1997 [St. Louis] - * - * Test driver for EVD distribution support in histogram.c - * Generates random EVD samples; fits them; checks fitted mu, lambda - * against parametric mu, lambda. If they differ badly, calls Die(). - * If OK, returns EXIT_SUCCESS. - * - * RCS $Id: evd_test.c,v 1.1.1.1 2005/03/22 08:34:45 cmzmasek Exp $ - */ - - -#include -#include -#include - -#include "structs.h" -#include "funcs.h" -#include "globals.h" -#include "squid.h" - -#ifdef MEMDEBUG -#include "dbmalloc.h" -#endif - -static char banner[] = "\ -evd_test : testing of EVD code in histogram.c"; - -static char usage[] = "\ -Usage: testdriver [-options]\n\ - Available options are:\n\ - -h : help; display this usage info\n\ - -c : censor data below \n\ - -e : sample times from EVD\n\ - -g : add Gaussian samples of \"noise\"\n\ - -n : set number of trials to \n\ - -s : set random seed to \n\ - -v : be verbose (default is to simply exit with status 1 or 0)\n\ -"; - -static char experts[] = "\ - --xmgr : save graphical data to \n\ - --hist : fit to histogram instead of raw samples\n\ - --loglog : save log log regression line to \n\ - --regress : do old-style linear regression fit, not ML\n\ - --mu : set EVD mu to \n\ - --lambda : set EVD lambda to \n\ - --mean : set Gaussian mean to \n\ - --sd : set Gaussian std. dev. to \n\ -\n"; - -static struct opt_s OPTIONS[] = { - { "-h", TRUE, sqdARG_NONE }, - { "-c", TRUE, sqdARG_FLOAT }, - { "-e", TRUE, sqdARG_INT }, - { "-g", TRUE, sqdARG_INT }, - { "-n", TRUE, sqdARG_INT }, - { "-s", TRUE, sqdARG_INT }, - { "-v", TRUE, sqdARG_NONE }, - { "--xmgr", FALSE, sqdARG_STRING}, - { "--hist", FALSE, sqdARG_NONE}, - { "--loglog", FALSE, sqdARG_STRING}, - { "--regress",FALSE, sqdARG_NONE}, - { "--mu", FALSE, sqdARG_FLOAT}, - { "--lambda", FALSE, sqdARG_FLOAT}, - { "--mean", FALSE, sqdARG_FLOAT}, - { "--sd", FALSE, sqdARG_FLOAT}, -}; -#define NOPTIONS (sizeof(OPTIONS) / sizeof(struct opt_s)) - -int -main(int argc, char **argv) -{ - struct histogram_s *h; /* histogram structure */ - int ntrials; /* number of different fits */ - int be_verbose; /* option: TRUE to show output */ - int seed; /* option: random number seed */ - int nevd; /* # of samples from EVD */ - float mu; /* EVD mu parameter */ - float lambda; /* EVD lambda parameter */ - int ngauss; /* # of samples from Gaussian */ - float mean; /* Gaussian "noise" mean */ - float sd; /* Gaussian "noise" std. dev. */ - float x; /* a random sample */ - int i, idx; - float *val; /* array of samples */ - float mlmu; /* estimate of mu */ - float mllambda; /* estimate of lambda */ - - char *xmgrfile; /* output file for XMGR graph data */ - char *logfile; /* output file for regression line */ - FILE *xmgrfp; /* open output file */ - FILE *logfp; /* open log log file */ - int do_ml; /* TRUE to do a max likelihood fit */ - int fit_hist; /* TRUE to fit histogram instead of samples */ - int censoring; /* TRUE to left-censor the data */ - float censorlevel; /* value to censor at */ - - char *optname; /* name of option found by Getopt() */ - char *optarg; /* argument found by Getopt() */ - int optind; /* index in argv[] */ - -#ifdef MEMDEBUG - unsigned long histid1, histid2, orig_size, current_size; - orig_size = malloc_inuse(&histid1); - fprintf(stderr, "[... memory debugging is ON ...]\n"); -#endif - - /*********************************************** - * Parse command line - ***********************************************/ - be_verbose = FALSE; - seed = (int) time ((time_t *) NULL); - ntrials = 1; - nevd = 1000; - mu = -20.0; - lambda = 0.4; - ngauss = 0; - mean = 20.; - sd = 20.; - xmgrfile = NULL; - logfile = NULL; - xmgrfp = NULL; - logfp = NULL; - do_ml = TRUE; - censoring = FALSE; - censorlevel= 0.; - fit_hist = FALSE; - - while (Getopt(argc, argv, OPTIONS, NOPTIONS, usage, - &optind, &optname, &optarg)) { - if (strcmp(optname, "-e") == 0) { nevd = atoi(optarg); } - else if (strcmp(optname, "-c") == 0) { censoring = TRUE; - censorlevel= atof(optarg); } - else if (strcmp(optname, "-g") == 0) { ngauss = atoi(optarg); } - else if (strcmp(optname, "-n") == 0) { ntrials = atoi(optarg); } - else if (strcmp(optname, "-s") == 0) { seed = atoi(optarg); } - else if (strcmp(optname, "-v") == 0) { be_verbose = TRUE; } - else if (strcmp(optname, "--xmgr") == 0) { xmgrfile = optarg; } - else if (strcmp(optname, "--hist") == 0) { fit_hist = TRUE; } - else if (strcmp(optname, "--loglog") == 0) { logfile = optarg; } - else if (strcmp(optname, "--regress")== 0) { do_ml = FALSE; } - else if (strcmp(optname, "--mu") == 0) { mu = atof(optarg); } - else if (strcmp(optname, "--lambda") == 0) { lambda = atof(optarg); } - else if (strcmp(optname, "--mean") == 0) { mean = atof(optarg); } - else if (strcmp(optname, "--sd") == 0) { sd = atof(optarg); } - else if (strcmp(optname, "-h") == 0) { - Banner(stdout, banner); - puts(usage); - puts(experts); - exit(0); - } - } - if (argc - optind != 0) - Die("Incorrect number of arguments.\n%s\n", usage); - - sre_srandom(seed); - - /**************************************************************** - * Print options - ****************************************************************/ - - if (be_verbose) - { - puts("--------------------------------------------------------"); - printf("EVD samples = %d\n", nevd); - printf("mu, lambda = %f, %f\n", mu, lambda); - if (ngauss > 0) { - printf("Gaussian noise = %d\n", ngauss); - printf("mean, sd = %f, %f\n", mean, sd); - } - if (censoring) printf("pre-censoring = ON, at %f\n", censorlevel); - printf("total trials = %d\n", ntrials); - printf("random seed = %d\n", seed); - printf("fit method = %s\n", do_ml ? "ML" : "linear regression"); - printf("fit is to = %s\n", fit_hist ? "histogram" : "list"); - puts("--------------------------------------------------------"); - } - - if (xmgrfile != NULL) - if ((xmgrfp = fopen(xmgrfile, "w")) == NULL) - Die("Failed to open output file %s", xmgrfile); - if (logfile != NULL) - if ((logfp = fopen(logfile, "w")) == NULL) - Die("Failed to open output file %s", logfile); - - /* Generate random EVD "signal" (and Gaussian "noise") - * samples and put them in the histogram - */ - while (ntrials--) - { - val = MallocOrDie(sizeof(double) * (nevd+ngauss)); - h = AllocHistogram(-20, 20, 10); - - /* EVD signal */ - idx = 0; - for (i = 0; i < nevd; i++) - { - x = EVDrandom(mu, lambda); - if (! censoring || x > censorlevel) - { - AddToHistogram(h, x); - val[idx] = x; - idx++; - } - } - /* Gaussian noise */ - for (; i < nevd + ngauss; i++) - { - x = Gaussrandom(mean, sd); - if (! censoring || x > censorlevel) - { - AddToHistogram(h, x); - val[idx] = x; - idx++; - } - } - - if (do_ml) - { - - if (censoring) - { - if (be_verbose) - printf("I have censored the data at %f: %d observed, %d censored\n", censorlevel, idx, (nevd+ngauss)-idx); - - EVDCensoredFit(val, NULL, idx, - (nevd+ngauss)-idx, censorlevel, - &mlmu, &mllambda); - ExtremeValueSetHistogram(h, (float) mlmu, (float) mllambda, - censorlevel, h->highscore, 1); - } - else - { - if (fit_hist) - { - ExtremeValueFitHistogram(h, TRUE, 20.); - } - else - { - EVDMaxLikelyFit(val, NULL, idx, &mlmu, &mllambda); - ExtremeValueSetHistogram(h, (float) mlmu, (float) mllambda, - h->lowscore, h->highscore, 2); - } - } - } - else - EVDBasicFit(h); - - if (be_verbose) { - printf("%f\tmu\n", h->param[EVD_MU]); - printf("%f\tlambda\n", h->param[EVD_LAMBDA]); - printf("%f\t%% error on mu\n", - fabs(100. * (h->param[EVD_MU] - mu) / mu)); - printf("%f\t%% error on lambda\n", - fabs(100. * (h->param[EVD_LAMBDA] - lambda) / lambda)); - printf("%f\tchi-squared P value\n", h->chip); - } - if (xmgrfp != NULL) PrintXMGRHistogram(xmgrfp, h); - /* if (xmgrfp != NULL) PrintXMGRDistribution(xmgrfp, h); */ - if (logfp != NULL) PrintXMGRRegressionLine(logfp, h); - - /* Generate the expected lines: sets 5,7 of xmgrfile (manually delete 4,6) - * set 3 of loglogfile (manually delete 2) - */ - ExtremeValueSetHistogram(h, mu, lambda, h->lowscore, h->highscore, 0); - if (xmgrfp != NULL) PrintXMGRHistogram(xmgrfp, h); - /* if (xmgrfp != NULL) PrintXMGRDistribution(xmgrfp, h); */ - if (logfp != NULL) PrintXMGRRegressionLine(logfp, h); - - /* Do the internal test. - * Criterion: on a 1000 sample EVD of u = -40 and lambda = 0.4, - * estimate u to within +/- 2 and lambda to within +/- 0.05. - */ - if (fabs(h->param[EVD_MU] - mu) > 2.) - Die("evd_test: tolerance to mu exceeded (%f)", - fabs(h->param[EVD_MU] - mu)); - if (fabs(h->param[EVD_LAMBDA] - lambda) > 0.05) - Die("evd_test: tolerance to lambda exceeded (%f)", - fabs(h->param[EVD_LAMBDA] - lambda)); - - FreeHistogram(h); - free(val); - } - -#ifdef MEMDEBUG - current_size = malloc_inuse(&histid2); - if (current_size != orig_size) Die("evd_test failed memory test"); - else fprintf(stderr, "[No memory leaks.]\n"); -#endif - - if (xmgrfp != NULL) fclose(xmgrfp); - if (logfp != NULL) fclose(logfp); - return EXIT_SUCCESS; -} diff --git a/forester/archive/RIO/others/hmmer/testsuite/fitting_test.c b/forester/archive/RIO/others/hmmer/testsuite/fitting_test.c deleted file mode 100644 index 5eefdfb..0000000 --- a/forester/archive/RIO/others/hmmer/testsuite/fitting_test.c +++ /dev/null @@ -1,71 +0,0 @@ -/* fitting_test.c - * 17 June 1997 (see notebook) - */ - -#include -#include -#include -#include -#include -#include - -#include "structs.h" -#include "funcs.h" -#include "squid.h" - -#include "globals.h" - -int -main(int argc, char **argv) -{ - int n; /* number of EVD samples */ - float p1, p2; - struct histogram_s *histog; - int i,j; - float x; - int seed; - int do_evd, set, fit_evd, show_hist; - - p1 = atof(argv[1]); /* mu or mean */ - p2 = atof(argv[2]); /* lambda or sd */ - n = atoi(argv[3]); /* # of histograms */ - do_evd = atoi(argv[4]); /* 1 to sample EVD; 0 to sample Gaussian */ - set = atoi(argv[5]); /* 1 to set instead of fit the dist */ - fit_evd = atoi(argv[6]); /* 1 to fit EVD; 0 to fit Gaussian */ - show_hist = atoi(argv[7]); /* 1 to show histogram */ - - seed = (int) time ((time_t *) NULL); - sre_srandom(seed); - - for (j = 0; j < n; j++) - { - histog = AllocHistogram(-200, 200, 100); - for (i = 0; i < 2500; i++) - { - if (do_evd) x = EVDrandom(p1, p2); - else x = Gaussrandom(p1, p2); - - assert(x > -100.); - assert(x < 100.); - AddToHistogram(histog, x); - } - - if (set && fit_evd) - ExtremeValueSetHistogram(histog, p1, p2); - else if (set && !fit_evd) - GaussianSetHistogram(histog, p1, p2); - else if (!set && fit_evd) - ExtremeValueFitHistogram(histog, 9999.); - else - GaussianFitHistogram(histog, 9999.); - - printf("%f\n", histog->chip); - - if (show_hist) - PrintASCIIHistogram(stdout, histog); - - FreeHistogram(histog); - } - - return 0; -} diff --git a/forester/archive/RIO/others/hmmer/testsuite/fn3-bin b/forester/archive/RIO/others/hmmer/testsuite/fn3-bin deleted file mode 100644 index 59163b3442cc672a3ec1d25972ef1b7f7e08de8c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 16930 zcmeHu2~>}1`+rS~3@T|8(ViueP|y8b*Zq_dh3u521tH45j3uG8mtB-(gfdK)8rcWg zm&Di^BwpE@NomFy|LcC{Fz1Y!_y3;XJHOxYo|kj3Q@X$R{oKp7d_JH1`F`{4S%rY*LDqz}c%29pgT8%j2eEQltk5?U-wO=Hh1MB1!HVwN^CH zVJ4^eMJx0<)!zO8-A}JB(TdI141Cxm8CJ4DG?_3TAAIhJUpS>;yR;%WUOX1(g-*e* z0>4E=lf%F=By4@v7sGGb;UnLE_)GON%o#flv(_!d1e2a%W}t;L+M5DBPs6e>N<8z^ zzfU{xk?z1erTE%@=)EhFIbB%I!mRaibe5qUY;6j^oHK*9%bTHwaXSdedX4jX8_JPC zo@Z&lS}0p8I>Pbtr>sMg296Hc19SDKqVX9`=5(eLcuekw$I~uKE^bb|@#6Xzbu}14Jqh|z z+BUsEUTT8O;rka5=6sY{J!;16PPi(!A6l_6-wklm?;s0v_6Cy_Q=WNpxip~D6xQAO zCpa708cdIG1jC)1(E6zfPP}sqhYwGOYK=`yZ&Wgd4sC`R9h<=Xz5QV22@BM_yc^Fq zxrOq6BTk>^Wz)Zedk(hn4=r;jc-S0ipQSd3&G`m%Cwv4}&}Vx+?${?2l(tQArc?qj z*c!d_btMaMAW2n4Hm+?R4WM&17@I} zFrS9;cFUOWr9MpLJ(?#Aw7J0__BTg!t(mOgVKz#W7Bc@^D_Iw2BzLdFF72aHFz_qNcxr_D^oN_x}>eVxu#pL_(qubX)eyIWL+-f2FqEACU>SRakzHbye znDae)KXU-n`bC(hy9LHfNMHxrw*trG51{=VOR#yG1<#&4fk#jZPcd!7kKD*2ZRfxjPW9z& z)8kl~fi`dVJPuEORSWAiH87xZDLy%Cjh^>MVZ+jelEFh)DAZ5D2XxM~Ul3>42(w)h zSvx&_x#~g%yISqe6HLy*59vp7k##oAy3`q+>s7(Ivpw*((KH;dZH=9y((#*=DLf0m zlsww5VHcD3(K(bzr9XP8b<4J~58Ju!b6%KI#kwbNWK_=}{X_;tzA}f#5qFW+!)V{p zsNN6^^tR+ZbC+?QFE!-JZ;YYbAqN(3DO1<$dHD*~biYTR_hFUK!_eI)S6%Pzh$HYk zz9WeB>KB+{nPkq>Z;S?tMM%BgmoIlJ!XGEB0-7&u)@=<#r)SGSxh0bDbKRB)#ksI8 zhdgm$hdhWW-^dQdodJu*_2q#F;~3=|?t1FD@I5&y*awX!w&0PkR`KG}#Smf^C+UJI zHtFW8&hMPbw=jVxVd64tFiadM>ajY%j#b^McK>!B{NOlsy^*%RK)o_8+_j*D_bJro zW+vs3c)s=VjC|*?mwK1@xv-jbhT{p7(MLB$U^cbYGWBoP; zlaDT8tqyKrrrK8WITHm&A@XKQ1GYGVLrK1s8tWzb>p~t1av* zx-D%Q-kqKB<5;cN3FK=n+1f0Hn-tDuYoF}m!oN$-^kg(1#q(h1abh~#Q>T)7=DLF9 zlgm@DUzBw0w}||u?%!iaSD>7zp07Cfy;>e9{Mq0{2lNWD#L;V3z~nc})%|l^m4%Pw zO?Y?umpp2wvFb0Eyp7>CYdW<4xNb;CrKfo7KiPXk94qxtj zsh+E8eK)XQl@8B+9>dk}ukp~`K{#`MQ=q-Vb+Z-90b8+KR#&VE@5EDDIP&66H{s&L zh0<_c8=%ke@pj)yqaIF`55;xIN~tHio?(lcH|yi5hc994+%GW6>@@@qPh?m2dV|r# zJbpHG3p~0#0t@seV5z-_s0nBrnFbBZEnrVXCbpY(P0~F56}+uJ2DV4~GwY1UkUs1M zOCIeC{6#zT>@*thUTO+gt9L`J)mu(A2`2wCPyXf7b<}@Y00Y`jq&-TK2xBtJxj3bT zB@1)>25ak4aF-M~FnKrj*f)x>UaaP_ zFIH4zUS0)Ijg<`J=7Y&AQ{47+8l>^Lc{(eo zo;Up!#N2%b>sU z@nBvJO_mJB%RzAhW59sj3srw^xBM!K{44U6Re&bOPJ9f*qbs0RBM~o8Y6rWvv;v2b zjdA75!JOh2d+&M%zNNjX1}`OSvslf~jV88$rm2mPe41SgZix>(7c!5@CsE7#OYpcq zilwy3|{VAtTwU7Du+U~&Wk)XC}|h-Bn(xszz1F2wqhaWdQ;*g z*%^&N`19MxKZD4pQC3zWKjMg_RJiVy!-v)xi9K4ihee|{Ag9_S;@-}7J(7>IQt2fu ziHX7f8}A@CcnGmMsX#Fb^IZUkn9q|o=zj0+VD`lQpjL(Zy0vXN?Uh6|7=)j7ot6Rp zD!zjLou){28l)0N(v`5mXz6xDXUSpYYN^4Q0n){3>j?WeKs$vgr#Alr-z+GR-1~YX z#TgI7a`IObl)>BR2gd*P$GV86*ug(3gXrgV!6Z>w*Lv2>0%d7xQf4fjh zUwH%asD@BZmYOWN=jrR-YOBkTdi*ze3bq`l#WRW;D@@gk{J*h}hbgCd8#!!P2z zsWvljwiLFMDa0Y&kZ>ao&}l9U{Nn2!!j9}-!U|HVK;yM9Xbpcx_2U4Bd2PW4XG=J7 zP}zC=K`Ao#YdKWc2B{V>%ANA9fSY(?ZZa!g6ai*B2J+lpBbfi5&r!tpKCMGC#h&uO z%mhsxW!Y%jQAT?s1vHw^1O^JN*a>6X1i*kZ*?9B(cDNSv8Tq)DobXd0q}tDO?80%v z`n{}1Z!#FX>I`14_Edk8<-q*w*r>=5HK~rN@lQB}&I8Bj8o>aaY<%*vE+1IglRfp_ z33blhA-wAVOT%~K^{PjZ{aqQDR*b{PJ38}87JXDMz18*@Mt!js?QJat-`|46NueJu?+;d3?K zh>KuCS2OH-#fZjTpw>&mn;4Mv2HqCe<>Ks9zRd)I|3tmCDzwCckr$-nI-B9F!3m_i z45Qix^QK#62${4J24%g5MG+hmhLl0rsn0m&Lk!xr4X?ghAPd|aaZbim<5p{*sdghCI%tC86tMkkB z-6e1&npHeOhgDgUQs|CV%liR~E0xL`rlXFchg%LE2En7gTAB?9ZZ0F7*AS@=;`Z{p z5cNef{&kiCF5}&?NkS5iYjX`LXYhgs1CV%x^z>ySE4`dbIkF3@=nJ^w0iwm;mc+}~ zLem4e>{5s=C)@`2+zj{=!gDd@_QD559E!aXcmk>}jKO51HT-(x+fsJcw@hGS@>f{= zK!Xdcce5rF`n$NJ(sm6*HrvFg-XP&As!?&w{zL)%S~!{O!b(~28NrX7iWXsC}EUlsL8tar`b&#=Fh5j(tl35NQ0 z#^$9qNIVAxwiH~Cun}1@*l)W6!~=2Wr6Z8C_$35euf%RY?-%ued)&DV8S@TEgwqM@ z8RFz;k0t6OFDl-Pv|k|p=2f&G3{5=2A=nrPxgCb=18pH|T>#qM+CzFkGWI0Cy`>_7 zj|+6*2Xlr&nY2opA6f;36Zn$w_IS<97W7`nGO>?|ecK6tM&hrSpSO{X^4)+&7EQ3@ zpaeF*>2LLja4TG#W+f|>U#}4lozbTuoG2d4bs%fzr>61?WPVY{y6`1IEI{eL&}+y`$mX7#3Sbn z;|$06oDAJe}+q=^j*nn=_GHi?0E@;h@KQLNO5MVB}v)_{|Mag3Z~I z$Dgx%ffjIgW>-czjx~PJ22K;E`ZBUe@|~2bYAWyIjdU?JJ}5W9?T70?j2hltf5?> z_kwxV`G%R)F9Mx%V|mX|8?e~B8HgJyf*%!>%#&v?+YLR}dSZ(u?Lg#STjI=lq`Og^ zOOK*9i=W_e`3me0E9X?>QRrcT)urI>;|L<&L>&_Nlg^F@Jrsy!P@l&>r!8W^j0#&?C91jdXXx?sXo@f0(d}J)k;AoCcE2W-|}gi&3s4Z6{su6PfCf zY!{iv6~oWith^q$yK4cIf3q5D=llZpX5&=vfFCnLuUvL?ln}tTNRA1iT^(yqYS}JfPQv6|AvsJ2IyyC$DcCVos_el5wb-Zhct7PL?v$L4c zYPr-6O7T`r6Xj`uGu!EZQX=dI6sIi3dJsvselH%EikzUS- zL$NK5y_jL*6--L6$1Qj3;h~ttQc%KZ(&X#~_JY$7Td;B+89k%SD5q{@q|Jeb@phpr zBJoz7rPCQU@@1^1dl<`$3dXS$z9W4v7tDGV!JUWegg(UXo{2@ljY&s>BYrcGG)y4f zjJc*e!!JQd8r>yWL-$gS6rZC0k|xC;x;=xUq9B-+Sp$Qsdvh^Ap%V+egw~2LgNjwn zl{h8}ZCzk4(oQ((9w2nx33mO2zA6j-+3|Q73f+)2I?@paqYG)DwZ3ccT=WnXza&mb z5}E}Uc*F8QF7B#agS)f!iL>kQmc*Ux=fliC=kD~j`Mz%lUN;<4AP<;bBPbM%IosGOU z>svW{ey&V=N}MzY1F~kzq0to*-D3zHfDy)%4!;a$q@A-^t7wV#Q^|cD2%cATS(lc( zgoXfw38mkoFkG9NMpp z$28YabzcqQ=HnfsYQp|qu~qAnK$wm{&)bO|1|?8_(Z&(?-;h?MA@nAR^*0jS7tJH? zO2k*?Z|@G``L&KH@HlBDg#U6NsDSPw@6IHB&H+Tu2%PG~7tlRJM6*p8mKBD?&(NIs zVrt+*Bz+1!7iwdUd{(O7H&yoUwvX;PyqUgRE6gCar=QR85yj2O-Qg zNa*bp`>x78suA^$|0=XnY4GuQ^?su=-J4=F8ilWs=A+(keC2Zt1oqavnSwIi+XP#V zCH*-BsUL>c2;DUlas2_bnjMXEO?nCK8t!$z1;lYt=x_qNi#cEK^4oZOQ;tDdGjpVK zSFv*J#If*VSqL6q6N{w%WA=BU_^|i@ihjkF{-jqsLenLG|9DC1$@p%)qz|d<4wfJ=qVZ~ za5!lzqHe13kuV_%35%=oA>xfVBgPL2#gz|!j}I@pSEGB%k3Qc1+s^mXdOxmw|DSLk zLbEj}oR8wpZ0%4DB(6xf{J2_w#d}O|ixcsg(-ufkv{q zxO2osTM|*nKOAl}4nooiklsDP64Fvnuh@^LH_yQUy2nks>ow>7uk&-#HR1E)b5Z1H@eZxX&w{@hZ!$+QKXDf* z_?x(!C5=gqH&eO`7kY`9->wDzDEM1xlTZ2i-;KBb4gdRuhj!z~qSt*o-%os6Wyfkf=+OOip^bh*=lIJRx|5zxk)c<{+Be(WZ zdKpVxw29}5x%}~Y|E$gD{?y;*`HczwvERnpfr@94B_H^cwv^b#S6RJxFn2EwqrJO; z|7^^TR&&%bOWpPVbPWT4ij^{3{J!@4PTor5;0f>7Kwmoh(>2tdh>oVZGr~6*1i2iI=`Nh0 zh%sNa9P@#HI+R5ziTTM=!qz(P*Zw*vQmOP?f%M&3F8Zx@;L0D@H`;wEk7nhlpJ=XJ z*0E4BE6=FD=a+U%e&{hK<+L-gggVbXD>KLb-U}UFSq;QH$UAn(_p{krkj+ zdW}_*%o3%}nQPw9bI{PyBEO*Mx1xEPw5i_b@8`K+dy>+5+eEbuR>w%4Hhy>x;*3Sx zKQbgQJT*#5zw>VBa|UF4S+eU-UD{jFt4y{a$m{ zm337hJcNt3YB@&Is{HkX9O<`jmf9A2!}NWgcsHh>MwFr-5i5me|30Ru@06E$i)vG~ zU$U-;<2$dvSMwdSJnwsa&+~o9Jl}8}radzRM~qzS7dF zW38p7r89PtD}B+bBmWyrrY*mrfASaddmS=evbtpYWcA2CCaX`@fUF^z0a+t5W3t9% zCS;~$O~}m1%*mRPS&%g&YfjdJOisXt%$CfK%%04FtTkC1GDotuWKLx5$l8;+kaZ-J zv+hLJnXC($8(CMfZe(6$-N|~8`H=M{>qFL;Y#`YnvY}+d$o$Cs$%d1SAR9?GiY$OE zkSvHSm~1rJ7_v{uLdeFFjUx*sn?N>^EQ~CiY!cZFvYBKNWS^4Rwr}q|ecBl3@!{d4 zXNHD-V%e^TCGCBC`dR%tJtRa9tv=s$@XQcPpP6BnmTs0EJ8;*I+{Mk(g)w*XTfS2G z(e|48KFJ#TN!t6L-+$H2@7@(Nwp&9|vq$L9({SNrQ*`+FAtpP02KlKSa6;ipoF2RY zzxMwEj+z=F%i!pJtu5N#3dKR*<+!~1XPh`83u889LSNH()HXDOCLQG>v^0@>i%w`@6`~krj)Nvvfnaw2XKcA*9dt0!!q=I8ic$Xs z{K6068jD|2p>x?b{S*5V} zxDBA_J`k&pwqqA}7{P#qyO^ZSx%c8U$UVHCzudTz=L~egCH?1v%h?njUmOfy_BIvc zU-!gR{SIhidM~~)Pw5l;_tUk6J84S%jXTYEX3(?|K8t%KI zkGGp$5XOfTL4OaQN0fk#{e3Xk{s*>tl*eeFh56BT=w!YLETLS~-#Gx*3~<2a?KM!u zW}($^;BmQLBJHIqR3zKsl-+|d#Ks#AEF28?Z7wNw&YH3~JzFqOsso!ge9vbrNs{wN zE6l{1B9Je)jpG9@d&|1VX`bSywLh8Ko8$UNc|2w+<7u~j6=U9A5p{cu2^G_M zojnHpV!}PxK4UmEogW0b+Aq=94jc}*;B7Acm=NMoPmKub>RLCYoN7)!_887d*v2R3wSJ? zcC8je;Wf^Fq6IA)EQ8jahT_SlHmu0xfw0aKqWOLUc-H9+9&fM;Iwm0B9R3XE+Jq|& zoIc~R5j%vAVF|`Q>j1C5IE`9$HsZ2MKeRgc1jjz`WHH5!!1a+K#vAORbAAZ4U)b0- zmM6HZR}Az!@Un|l!oB($oTnagJpFrEZ?zb^UY-qA^_{WLx$`*F_zqg>Ov3cYu3~!f zSL}1l=$y6;Hh1yGD+OA=76#T2C<+t6qk~lHx)ZYZin} zKo7lpHIws$uT9TmqL>U;%M);Df)P+Zmh;2jRh{3T2Tm*ya=q}gO*m?o)E7mI_6nUn z1EAE@3`{R3V4KKtxn3TX@t7M9@a55a`Xl9wj9;)dv=+2>zS*^_$jX0!!LeM4>$4*6aS((??!`Nau%;CvOk zwCRp@er$((&$fldnO~x%{|IQizzym3ap}Zvczw_}LR6KBdRnibeqUeMRL~aW8MeQj z#qQ+q<-JF5LE&=>y69K)HrExQ$y~>0?J*U5pRAMd&HJ7$;xrx~Ztc$_PI>Uky8FR3 z+f!usVC>S36u!+)7pZ=!`{(NLm|`(cp09fD9@+=g{p)nf5L*OwgOzK?;fgoY)Og8U z<$zn1a`Ab@Jz+8{@?E@mU8WlTnnj(-cK>dOjr&ozq3$wvI}D6xGXC@_SKHoN>rQ(Q zG(XXr|dQ3=Lah+LxJx{jb}ydGmLd^hc6Dk zkmm|+l>h0g?BH>aXZU64H5_*D2sB?f3u&)FGiL*Ce^!A#Gkb|Hp*6yxbu!%AQpjQ+ zT>@*p(SrVteQP^Onek{W_KrQm`?5;3$+&?HZq$oKB@Lb@yE}qc*@;%sBeNDWzj0WmS%7UM#;f>2bL*63;oYHX!OQby_jEPipd5hN5 z@hgWA{6W74CrpWgCE7<}xos>YcOQ=XrX9wy2m5fjw;wqS!3RHWg7w220T0+jRNSmsp0E^rU* z8xVlA;?D4%NAkt?A%(oEeg}A}+=1O6?*chL>sd+geccOSRh|Wt0(;;e}VSZu%*kQgIo-w?H4U0cyX)?c+=)$y~0$5W!AZ2o9Wpj_%O^$Iv{+lx&9a0EGEKuun>nV+~aY}PeF^z2kuk#qshj@CL2Yha&&Bs3Q5ZSGh zx$N_gc11wnMVEm3IyTFCBKtqz+7WW7pnDph?6vdrWxyH zJi||q-^zN5mX?u7^$}7M$B3+MH}I3XBcY>XAKWx{G4&=P^ByDrza- z-8>i`HAG0siU!J21eb+uf%ypc@ANjgKQXg+FVij%?bmH!v{yhqSjHJ-%KnLG^AxvKc*%N?{mq8S`C-oo?S!R{DefGYfUDabLBH89Eaq)G+)PWS z9+JbTCcEFJGq=ev zzO=&4SA1WZtkO=Ufred4RFrcwDbLMAUong!io+3cJ0Z!rk+A z!JhE$?$BjWUUdUXzFm(K%bJL;yB^~Ji>WG?K4G&R*L@a=7B+T__%=`7Uk`WqRlsa! zhZ5iJ4qeI#L%^=~CeZQpW*lQ`hOf;MP{voznaj$;#=|h+t{|-TMD70>{`c|Nt|yd_ zpR(kw4f*2CT!nBV`|hl%u#GCfMfcW#^WIS+c-s(ctGQf zZPxrkaTh{0$_Kn{$3gDvFL`>|b4=Xd7k2C&g=?~&U`hQo;F3I__>dj!usDpSeXe2D z?jA_%fz0BT@X1e`aLa{kIJsyKjHG(@=}&Pub<}u_Drq43i~5=rQ@kE=z3QhslwILk zRJ$z{!eA)taFn4LN<5u)`2`UFK&oxO^MgJd+x1xDKWLru3g`DNLbI}GIORJ7wa%Zf9E#@xAK;vt zJra9E>E8LsZrjSUgJ%h8NIeO1> zhnowVpvyqGO&Ge}2UmwDXh(ROVnC63DEX$u6XS_jtTj$zcbeYdUuRNI76_nt6P#4)lP&a%gR!vb zHf8SISCpG$;ns`e&?;i2ta&Q_J7m-nop!B~SZ|Y*m5u~-FFnb`UInO zaVx5cTP3zsxt_#ENn}@d*dg&iICgm!yjYTqX;y)F=%-$S`T^Xz>%k5$$UwsBXrz6V zJ+7Fh&@*_Ta52dJqTj)x{BGb$dZS68Jsfo#2JNZN_>soA|F*5H>BP7Diukf^!rp%( zjLIrttB8kH1uqnY6R>0GQn2ZEokzdcN7_f|Kz&ulGx1lxIVTr4d5^_r7REe&SZ5y8 ze4*Np2)8PhGcQqn)esN84vhwUDkc=3g8goB?DAT*x8SNRCWPav6^dc_c10DE4?{QH z#h$ZD)cgspx(+t=zDMHZs6Ds>J#xngsViMu;liXoL)bx)csH^`IO+vl;qz8qM~R*3 zZ}>>kLnPl?=ix23D35_~H(%s1gb&?er{ci&);EE-v{$+(9}u!H*G=olZ@E;Xo>`EJ z|M~^i!-B9>SU&B!ijUnIm`eUjwWSI=ZF#HWkI_#rpxcEYq?(CiFhp`eF=}o&w#%Lb zg!_Qv1^ge|wXlu_XV1FD5%fKiXHHt_Nj2 zOMaxrpS^z=bc4R!*2xn{_u#Fl|59z`K$@Gr{Y@w&A8`&1#TRTM{@MzQ_{Wczf`$Km z-gDMHPBjkNJ#0#t>Km3H{y9&X>Mb>u+I%BjOy&nrZGa7r?#Nmqbi`>)@pFMzq#wxI zM%WH_pWZAacUUwe6(tW{I?+p=-<|w?B>upzpK7E`t$P6}hWHTID8*<(Oa5FxkAGFS z8{{`IRZL2C!FuF=*m}T1Be@~*BV|ER9_(8_2R+yIg$_$wtF_nG(1vuk947O8Yh*dl zSGan`LzIRbLhA8S4};=re{dizf$~lEAr+reA6DZ!VfG>DzT+&oRn}4Y`LF{KID=gP z3yzNkQ=lc%1ApT9)g|69hCJRqwI&{q&s~uiu&f1 zkuOo^lhnUHAC6LYQ2B44oLTOL+juxz^9qz^lP3wKwa>ZQ!4mZ<2e#NiXNbp?J5Z+wf|Fk*J@Zi2D0l zuq9DP__+80v?D!-uorLr=o?gYOVKRSlW#Vy2GZtmCu!{xvk`BFuDY}Na8^kQKf`6x=wddk zrTlb|{a27C1r=_OARs>Lo~P#lnCWs^%&=rqtF6M+nkRfOHS3 zi{T*K$qMPKQh&w}(qTzAMAGPRIOzzn&s=b=_bVJR|GbJ{5+^6fx+C!hX4A84EBAuI z`h7xo-w@(XZ7^=0o%*{cE5aq7#HcUa?1X-Y9iUl-$Q7 zcQnYm0;+Kk?J!V7xJR+B&{JGb*~uP5TK8Bk&y&tlp|erG&RnNtFIHdTFiFRV7iLxjXwsp`Hij5UO6W9pfN z{ZF@b6olz;cg{16AJz$VUhBh~2V;<8i%Gr7crcfGB)b=8u1LN*?B3Vt@G4j0aW1uz ztf0&2oku##y-=`M%GH|TG{c|n85ZG|mSGr@8G^*mh%cIBjDHGBeM%?Blbuttlo*E`y0H!r!fz<|o%X z+UC-`_0m51W(WS~toN^e9)Gv_k)A`^+Pi#{`7wQ*#Ni;d73!Ps@?&y55Eg%zA2Qz# zSYYsoI6V9?tu{aGE4l&QQ~vw&iri8+`Fy5&G3FBaN=~N`VjzOh6QECvf|N5S52zEJN!ks0ZKh%T28s7LD#`9kq z|NRG?hty0}JRV>64nLDFeIj%R(%pM4KA^i9wFX4Tc%b8C=XZGgJ$|NoAn(y{hra$$ z5B`6z2gGOkM7PCA;+=6_%z)mnfb_lvIC1GdnkQ+=^T&j3iy_ok7XzB!M5zif!4L8Bug1%n7An7-SJjQ~zW$3}{)SH{627nH(~|$v zok4I^JQmdLt>%5dA13|1`Q^+;sP3QSN9H5c_a@fr5lfo>sz)3=htc5$wfiUW&;okj z-v4cWPP!(}JNYrj$oibv3HQnREcx3O6HKQ0Nqs}*Z(cq~8k3rDV@#W<`hhyXMSDq0 zBL3E__(Q(^tNHdH(9b{MA<`57D*E4tc=`MB{rC9!z5e`o8BCz}qW)9;`2&7d?;QSe z`hBxU>ivR>58kFDeQrhVzlEn7$Sw z(DbX{&vWRj&7wtsC)4?HsK&c{f*5{awI-y{9WT<}{j=AbYuQ;{gMoW-?L7bYHAMe7 zl+|gyrq<`>^gv7=p786K^cloIT*IkXj6RX}*Y+8>v8X?pr?r)_^oe~2zQ0|AFzjNW zp>q_vlVdantyYTCyfkD#9n;JhU0*Yg#w@SA&BpNgwa$8`#3D6y);iS&{*hR5oasc$k5Kh_ARrN)!2KTp{yVUyLp97sSBOexxnD$$rxUfZE z(?sVoqZ|_%B~3IPV{63irdpatk2kAhCO5js^c(!TXOmp2Fg5cZ^0U2i%s+g+|5M+L zlrS_o`IQ_~6zJU1SbY>!TL=F~V}APGZ`b#3o(mtizx@N>bXSgTwAV?W$g(}JX&*4F z_Vt`sRI{js55!udl;4fnb5ND=4+v!LQ^sf}?JU8>@Z%bbVddg%kbx#Lt~pzIVLfXU zW^%ab8JXa?B1IB2FiN8`kUP#Z6i^j!2BQ>Jy^pmq&COE!q0 zP|!$Ulv3^fOQm+L?|p90KIkKT(Bx+D?|hM3J}f%SfN8D&)hGKq)n!eS>eli(ac)$v z-#!Ok{qi-l>n&qLHjdMDDeong4IF_nRBL2!ocnDkyVBt-^L^H`_PN2n-lF}F(=_t+ hl3u-HbzfYm^~w0AzMwuAWt;2%%du~2egCzF{{?vCgroof diff --git a/forester/archive/RIO/others/hmmer/testsuite/fn3.hmm b/forester/archive/RIO/others/hmmer/testsuite/fn3.hmm deleted file mode 100644 index ed949cb..0000000 --- a/forester/archive/RIO/others/hmmer/testsuite/fn3.hmm +++ /dev/null @@ -1,270 +0,0 @@ -HMMER2.0 [2.1.4] -NAME fn3 -LENG 84 -ALPH Amino -RF no -CS no -MAP yes -COM ../binaries/hmmbuild -F fn3.hmm fn3.seed -NSEQ 109 -DATE Sat Apr 29 15:36:08 2000 -CKSUM 9857 -XT -8455 -4 -1000 -1000 -8455 -4 -8455 -4 -NULT -4 -8455 -NULE 595 -1558 85 338 -294 453 -1158 197 249 902 -1085 -142 -21 -313 45 531 201 384 -1998 -644 -HMM A C D E F G H I K L M N P Q R S T V W Y - m->m m->i m->d i->m i->i d->m d->d b->m m->e - -13 * -6769 - 1 -1712 -4227 -5498 -865 -4208 -2901 -1274 -566 -2467 395 -3420 -4836 3619 -1858 -4835 -1203 -1345 -131 -4660 -1520 1 - - -150 -501 232 46 -382 399 104 -628 211 -461 -722 274 395 44 95 358 118 -368 -296 -251 - - -142 -3413 -12964 -19 -6286 -701 -1378 -13 * - 2 -626 -5402 1665 -881 -5720 541 -3570 -5469 -3152 -906 -4492 -539 1858 -1555 -2021 1928 -595 -1313 -5587 -1487 3 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -1 -11922 -12964 -894 -1115 -701 -1378 * * - 3 1982 -5408 -2052 103 -5729 -1682 -3568 -5480 246 -3106 -4497 -1099 2207 785 -341 -15 -969 -591 -5592 -1728 4 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -1 -11922 -12964 -894 -1115 -701 -1378 * * - 4 -1926 -4681 -2749 -6730 -4830 -6496 -5599 -144 -6364 -1674 -3977 -6224 3833 -6035 -1363 -5637 -2263 1172 -5463 -5108 5 - - -148 -501 232 42 -382 397 104 -620 209 -460 -713 274 394 44 98 358 116 -371 -296 -251 - - -124 -3599 -12990 -21 -6130 -701 -1378 * * - 5 -1312 -699 -1390 365 -156 656 -278 -5500 196 -1273 -1315 -1810 -383 1165 1059 988 1349 -1727 -330 -1786 7 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -14 -11948 -6745 -894 -1115 -701 -1378 * * - 6 -952 -5420 1492 -589 -2060 995 -241 -588 -1239 -1246 -4509 2702 -765 546 117 -1666 -1058 -5042 -5603 -1781 8 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -1 -11935 -12977 -894 -1115 -373 -2136 * * - 7 -1910 -4186 -1403 -683 554 -5930 -4807 398 -5685 2189 658 -5576 664 -5305 -5486 -5017 -4309 1814 -4661 -4323 9 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -22 -11948 -6043 -894 -1115 -701 -1378 * * - 8 -1843 -858 -1320 -329 -1890 -1639 2015 -1004 837 -3190 -4500 -528 -5007 1351 1283 933 1453 118 -5594 -4912 10 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -1 -11926 -12968 -894 -1115 -288 -2468 * * - 9 1390 190 -6595 -5959 1799 -625 -4670 917 -2544 -64 -175 -5444 -2246 -521 -2424 -1906 -2445 2071 -270 -4193 11 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -1 -11948 -12990 -894 -1115 -701 -1378 * * - 10 -1212 -976 -129 110 -5720 -2845 -164 383 351 -777 230 -1215 -2266 -366 198 1223 1864 -91 -385 -1640 12 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -1 -11948 -12990 -894 -1115 -701 -1378 * * - 11 357 -5433 1583 1276 -488 484 -1040 -5505 -1949 -5449 -4522 1509 419 -407 -723 -10 -541 -211 -5616 -1543 13 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -54 -11948 -4764 -894 -1115 -701 -1378 * * - 12 -185 -872 -1228 -815 -5284 -4981 -3657 1656 -1247 39 -279 70 -732 313 214 -878 364 1866 -5347 -4747 14 - - -147 -503 232 43 -379 402 103 -625 210 -467 -723 275 395 42 93 360 117 -369 -297 -247 - - -243 -2693 -12936 -347 -2227 -156 -3286 * * - 13 -1722 -5433 -171 -632 -5754 984 -3592 -5504 -1911 -2306 -1336 1274 -5027 757 52 1088 2327 -5055 -461 -684 17 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -1 -11948 -12990 -894 -1115 -701 -1378 * * - 14 545 -5433 984 806 -5754 -286 -134 -5505 -1324 -3076 -4522 -291 1593 -1508 -1328 1511 638 -1810 257 -1616 18 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -13 -11948 -6839 -894 -1115 -701 -1378 * * - 15 -3956 -5429 1465 -1445 -5750 -533 -126 -5501 -1258 -5445 -4519 1773 -5022 -3128 -91 1187 2378 -1884 -5613 197 19 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -1 -11935 -12978 -894 -1115 -384 -2097 * * - 16 -620 -5433 -1185 95 -2048 -386 -1155 -5505 -255 -5449 -4522 345 -5027 -387 -680 2481 1173 -1235 -453 371 20 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -1 -11948 -12990 -894 -1115 -701 -1378 * * - 17 545 -4080 -6600 -5964 979 -2741 -1162 1755 -5559 1775 1268 -5448 -5852 -5182 -5359 -4887 -2301 1502 -4538 -1461 21 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -1 -11948 -12990 -894 -1115 -701 -1378 * * - 18 -945 -842 -1435 -275 -1139 -2672 314 -366 -905 -66 -427 -211 -2235 675 461 548 1698 1036 -558 -670 22 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -1 -11948 -12990 -894 -1115 -701 -1378 * * - 19 -1902 -670 -6964 -6348 188 -2623 -5112 1707 -5963 1935 -237 -5851 -6235 -5596 -5776 -5302 -2390 2324 -4960 -4615 23 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -1 -11948 -12990 -894 -1115 -701 -1378 * * - 20 -657 -5433 -2278 72 -5754 -1781 1065 -5505 -429 -1282 -368 683 -5027 935 654 2126 1335 -5055 -468 -4933 24 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -1 -11948 -12990 -894 -1115 -701 -1378 * * - 21 -8231 -7330 -9169 -9336 78 -9016 -5428 -2450 -8949 -2065 -5190 -7783 -2409 -7649 -8320 -8300 -8080 -6572 6181 -3498 25 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -1 -11948 -12990 -894 -1115 -701 -1378 * * - 22 -1358 -5432 305 919 -2019 -2615 -276 -1003 511 -826 -1303 41 -2076 1069 -82 1302 1506 -1283 -5615 -1694 26 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -1 -11948 -12990 -894 -1115 -701 -1378 * * - 23 1017 -5398 -3827 -347 -5703 -777 618 -939 30 -1608 -4491 -1916 2739 -944 109 -308 -293 -499 474 -1759 27 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -1 -11948 -12990 -894 -1115 -701 -1378 * * - 24 7 -5362 -1347 -1387 -5651 -1257 -3617 -2388 -460 -727 -1300 -1005 3338 -1501 -3712 558 -1388 -154 -5565 -4903 28 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -649 -11948 -1466 -894 -1115 -701 -1378 * * - 25 -313 -4824 738 903 489 -1961 -319 -1743 600 -730 -3913 -512 1182 -1049 456 69 1061 -1200 -5008 -1017 29 - - -149 -513 256 52 -380 388 111 -628 204 -474 -734 271 397 57 90 357 118 -370 -308 -236 - - -2828 -458 -2930 -849 -1169 -4461 -67 * * - 26 -1035 -178 227 -2617 -4671 2074 534 -1574 -1120 -142 -607 230 1117 -92 -7 -60 -1463 -921 262 -829 34 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -1 -11120 -12162 -894 -1115 -167 -3197 * * - 27 -857 -5371 794 -103 -5693 997 431 -981 -785 -3041 -4461 2028 -43 581 354 793 -605 -1682 -313 -1481 35 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -1 -11883 -12925 -894 -1115 -135 -3482 * * - 28 586 -5425 -81 -53 -5742 2298 -3595 -991 -1451 -2142 -1249 -623 -2318 -1468 -5 1095 -941 -277 -5610 -1606 36 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -1 -11948 -12990 -894 -1115 -701 -1378 * * - 29 -210 -5433 989 1029 -5754 -1116 353 -1014 171 -3117 -1178 -275 2115 -43 -830 -503 318 111 -5616 -1638 37 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -1 -11948 -12990 -894 -1115 -701 -1378 * * - 30 -1068 -4078 -359 -5958 1263 -1799 -4669 2510 -5554 276 -910 -2229 -375 -1944 -5354 -2555 -2191 1553 549 775 38 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -1 -11948 -12990 -894 -1115 -701 -1378 * * - 31 -980 -681 1466 40 -545 -5057 -3737 -672 -560 -87 -1269 -558 -5145 282 -90 219 2049 -46 -417 -1694 39 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -1 -11948 -12990 -894 -1115 -701 -1378 * * - 32 -1302 -5429 -440 -620 -1239 2220 1313 -2398 434 -3100 -1104 805 -5028 -433 316 596 -1027 -1916 -565 522 40 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -1 -11948 -12990 -894 -1115 -701 -1378 * * - 33 -8272 -7332 -8886 -9158 2064 -8716 -1014 -1559 -1656 -2163 -6667 -7396 -8586 -7513 -8120 -7948 -8139 -1736 -299 4507 41 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -1 -11948 -12990 -894 -1115 -701 -1378 * * - 34 -1292 -5430 -146 1338 -556 -2813 -206 -371 415 -197 -4519 165 -5028 278 1599 253 380 -304 -5614 -262 42 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -1 -11948 -12990 -894 -1115 -701 -1378 * * - 35 -2855 -5757 -8978 -8661 -6170 -8865 -8890 2503 -8646 1015 -1233 -8521 -8521 -8496 -8785 -8267 -6272 2826 -8027 -7630 43 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -85 -11948 -4130 -894 -1115 -701 -1378 * * - 36 -740 -868 121 1136 -1117 -4863 456 -330 599 -1234 -4412 -3505 -4956 1505 886 120 1536 -1114 -5511 -101 44 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -1 -11863 -12905 -894 -1115 -109 -3778 * * - 37 -932 1837 -6595 -5959 -91 -5798 -328 -291 -407 -8 -3280 -5444 -5848 -5178 -795 -2551 -988 502 2740 3658 45 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -1993 -11948 -418 -894 -1115 -701 -1378 * * - 38 -4510 -5545 -3323 -676 -6286 -4728 2257 -5751 -2041 -5420 -4762 -420 -4944 -2780 3841 -989 -4331 -5444 -5302 -5022 46 - - -149 -489 232 45 -381 398 105 -627 210 -467 -711 277 393 48 95 359 117 -370 -295 -250 - - -1797 -3160 -736 -733 -1328 -10 -7215 * * - 39 -147 363 -2007 304 -918 -1103 -3478 -161 1080 -715 -4049 -1617 -2050 2003 -3601 557 -702 -321 2877 644 49 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -16 -11712 -6548 -894 -1115 -359 -2184 * * - 40 -855 -930 550 1092 -5698 -1653 286 -2500 552 -1710 226 346 1948 343 -1203 414 429 -794 -5560 -4877 50 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -1 -11889 -12931 -894 -1115 -2701 -241 * * - 41 181 -153 516 463 -5664 -1662 -1038 -48 1045 -254 -194 -3524 143 556 -1145 61 726 794 -5544 -4868 51 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -116 -11889 -3697 -894 -1115 -146 -3378 * * - 42 -3855 -5327 1599 16 -1927 1687 -1100 -5398 -1840 -1553 -4416 2295 -4922 -736 263 13 -364 -765 -5511 -1538 52 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -1 -11837 -12879 -894 -1115 -87 -4089 * * - 43 -638 -5433 1340 850 -5754 1040 735 -2398 522 -1339 -1395 491 -5027 -79 -737 897 693 -1779 -5616 -4933 53 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -1 -11948 -12990 -894 -1115 -701 -1378 * * - 44 -972 -5410 -1362 621 -2330 1671 -968 -1642 -952 -1007 223 1495 819 1092 -3690 -29 -326 -652 -5600 -685 54 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -155 -11948 -3299 -894 -1115 -701 -1378 * * - 45 -2194 -5287 1087 1415 -5608 870 -907 -2395 349 -427 -4376 -492 291 1176 -317 355 -70 -2410 771 -4787 55 - - -148 -444 225 63 -389 400 108 -622 197 -475 -727 259 408 43 103 350 102 -361 -222 -237 - - -1290 -759 -12836 -1725 -520 -1218 -811 * * - 46 -157 337 519 798 -1912 -1225 -68 -653 114 -297 -291 -1697 725 -732 -1940 -255 36 -19 3575 -4817 66 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -1 -11858 -12900 -894 -1115 -104 -3844 * * - 47 -354 -5019 -821 -3545 401 -5079 1252 159 969 -224 -1342 1435 -694 1532 487 -1195 179 115 -703 -1756 67 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -1 -11948 -12990 -894 -1115 -701 -1378 * * - 48 -1345 -5433 -1224 1817 -5754 -2557 2299 -2518 -1322 -5449 -1196 460 -410 547 474 477 796 -85 1521 -203 68 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -1 -11948 -12990 -894 -1115 -701 -1378 * * - 49 -735 -922 -865 99 1424 -1378 -968 800 125 368 -298 -1091 -5130 1044 487 -1699 -652 820 -5380 495 69 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -1 -11948 -12990 -894 -1115 -701 -1378 * * - 50 -2548 -5416 476 -926 -786 -4939 -202 333 -743 -1027 220 1772 -491 302 391 547 1705 -327 -5604 -4926 70 - - -146 -501 231 42 -382 399 104 -628 211 -467 -722 274 394 47 94 361 116 -362 -296 -251 - - -54 -4771 -12990 -1430 -669 -701 -1378 * * - 51 -125 -4491 -206 -447 -1977 -1822 -4130 1477 -2342 582 -1108 -162 -2226 -3990 -445 -1290 -16 2224 -4885 -4452 74 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -1 -11948 -12990 -894 -1115 -701 -1378 * * - 52 -392 -879 1247 -518 -1099 -606 -233 -5499 1 -2146 -4519 -1046 2266 -3134 -723 1223 288 -67 -5614 -4932 75 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -1 -11948 -12990 -894 -1115 -701 -1378 * * - 53 110 -5432 -786 -62 -5753 1386 -3592 -2570 485 -1099 -4521 1153 1199 -1503 1373 399 -3899 -2642 1101 -1717 76 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -1 -11948 -12990 -894 -1115 -701 -1378 * * - 54 -933 -5432 792 -584 -5753 -910 1029 -2570 -3173 -5448 -4522 1505 989 185 -764 1043 1675 261 -5616 -1615 77 - - -149 -502 231 43 -383 404 104 -628 208 -461 -722 276 392 47 94 357 121 -371 -296 -251 - - -62 -4587 -12990 -1650 -554 -701 -1378 * * - 55 528 -5430 -2070 1507 -5750 -4935 -27 -1606 194 -768 -4520 -3570 -147 1018 -439 182 1977 -290 -5614 -4932 82 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -531 -11948 -1701 -894 -1115 -701 -1378 * * - 56 -1241 -4929 424 -414 -37 -713 1149 -537 -799 -2452 153 1422 -1051 -431 507 353 1641 -1262 -5114 735 83 - - -149 -490 230 45 -378 398 103 -621 208 -466 -723 273 391 43 104 358 121 -372 -297 -252 - - -348 -2223 -12460 -101 -3887 -730 -1332 * * - 57 -1103 -5244 -3635 529 -972 -961 40 -2282 -714 -1986 -1144 223 -1119 761 812 1888 1499 -2270 -5430 -107 86 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -1 -11760 -12802 -894 -1115 -56 -4726 * * - 58 990 1359 -2412 -5961 1171 -2669 1032 -583 -5555 -220 684 -5445 -5849 -5178 -2256 -4883 -79 1469 -497 2841 87 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -1 -11948 -12990 -894 -1115 -701 -1378 * * - 59 -1740 -5388 -543 238 -2017 -4947 205 658 -59 -940 305 334 -2308 69 -686 -619 2386 690 -5584 -1963 88 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -1 -11948 -12990 -894 -1115 -701 -1378 * * - 60 -4313 -4136 -693 -6024 1490 -5866 -4738 1755 -5621 1797 -1007 -5512 -5911 -452 -2279 -2669 -4253 1969 -4597 -4258 89 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -1 -11948 -12990 -894 -1115 -701 -1378 * * - 61 -1206 -5433 -67 557 -1180 -1815 996 -5504 429 -3083 -279 290 -135 714 105 843 1912 -622 -475 -1615 90 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -1 -11948 -12990 -894 -1115 -701 -1378 * * - 62 -2512 -5433 1789 -331 -5754 2420 -115 -2442 -207 -5449 -1143 1986 -2076 -952 -2025 -549 -3899 -5055 -5616 -1656 91 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -1 -11948 -12990 -894 -1115 -701 -1378 * * - 63 -2664 -4761 -2431 -6696 -927 -6570 -5431 -1266 -6307 3192 -228 -6220 -6539 -2095 -6083 -5670 -2437 -1081 -5161 -4890 92 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -1 -11948 -12990 -894 -1115 -701 -1378 * * - 64 -1265 -5432 -786 696 -5753 -2574 -190 -709 1120 -437 -1279 2 -869 1117 1301 -183 955 122 -5616 -235 93 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -1 -11948 -12990 -894 -1115 -701 -1378 * * - 65 667 -5360 -3850 234 -5648 -4957 -3619 -5375 -1411 -5362 -1250 -1129 3641 -944 -3714 -1681 -282 -2598 -323 -1676 94 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -1 -11948 -12990 -894 -1115 -701 -1378 * * - 66 -891 -5431 642 -1327 -637 2698 1447 -5500 -1220 -3194 -4520 1005 -5028 -1534 -1185 -106 -2252 -2539 -453 719 95 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -1 -11948 -12990 -894 -1115 -701 -1378 * * - 67 -341 -924 -3935 -309 -5457 -5000 -291 -1612 -499 -2284 675 -1099 -5091 -887 1180 235 2750 669 -5465 -4840 96 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -1 -11948 -12990 -894 -1115 -701 -1378 * * - 68 -728 -5432 601 1421 -1172 -1229 -74 -1011 998 -1077 -298 499 243 679 186 -154 551 -1728 -344 -4933 97 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -1 -11948 -12990 -894 -1115 -701 -1378 * * - 69 -8596 -7506 -8959 -9321 -422 -8843 -5022 -7480 -8878 -3146 -6880 -2244 -8697 -7593 -8237 -8093 -8445 -7638 -4268 4885 98 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -1 -11948 -12990 -894 -1115 -701 -1378 * * - 70 -1827 -5418 -2102 1225 -1065 -2710 630 -696 193 -993 -1227 1015 -5031 784 -68 445 1395 673 -5606 -199 99 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -1 -11948 -12990 -894 -1115 -701 -1378 * * - 71 -1785 -589 -6706 -6075 2739 -2734 -4797 1908 -5675 95 263 -5564 -5962 -5299 -5478 -5006 -4295 2092 -4657 643 100 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -1 -11948 -12990 -894 -1115 -701 -1378 * * - 72 -1787 144 -783 -139 -5753 -2709 -260 -1568 913 -5448 -373 39 -5027 1175 2142 801 910 -1336 1128 173 101 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -1 -11948 -12990 -894 -1115 -701 -1378 * * - 73 -1872 -5834 -9025 -8676 -1056 -8881 -8673 1755 -8630 359 -1237 -8541 -8490 -8319 -8674 -8265 -6337 3280 -7727 -7464 102 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -1 -11948 -12990 -894 -1115 -701 -1378 * * - 74 327 -924 -2319 -1494 -173 -4998 230 316 472 -1019 -4344 -1099 -5088 1874 1193 -1265 439 611 -5469 1668 103 - - -149 -500 233 43 -381 398 105 -626 210 -462 -721 275 394 45 96 359 117 -369 -295 -250 - - -42 -5742 -6616 -86 -4119 -701 -1378 * * - 75 3012 -4665 -7117 -2715 -5019 -382 -5509 -2260 -6298 -1487 -4254 -5905 -6197 -5917 -6115 536 1246 -967 -5505 -5179 105 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -1 -11933 -12975 -894 -1115 -357 -2190 * * - 76 -902 -714 -6363 -469 -141 -5764 290 585 -447 281 -993 -5334 -5816 439 1001 -1825 -1361 2268 -4558 1019 106 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -1 -11948 -12990 -894 -1115 -701 -1378 * * - 77 -2376 -795 187 -2095 -5752 -2675 -955 -1144 135 -3116 -4521 2792 -767 -1566 -777 1498 1246 -2809 -5615 251 107 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -14 -11948 -6747 -894 -1115 -701 -1378 * * - 78 337 -737 928 157 -2106 1805 -240 -2485 -205 -5436 -4509 308 -2438 892 51 -133 409 -284 -5604 -4921 108 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -1 -11935 -12977 -894 -1115 -373 -2135 * * - 79 751 -879 627 278 -2228 932 413 207 367 -1700 -291 502 -2136 495 118 -1224 -556 59 -5609 -760 109 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -746 -11948 -1309 -894 -1115 -701 -1378 * * - 80 -1113 -4530 -1626 -702 -1338 3243 -3172 -4385 -2856 -2220 -3677 -1218 -4564 -2783 -3335 -609 475 -4087 -4819 473 110 - - -153 -500 231 50 -364 398 93 -624 206 -464 -733 283 383 46 94 356 116 -368 -250 -237 - - -6367 -18 -12245 -485 -1807 -3943 -97 * * - 81 -5380 -6068 -5956 -5195 -7389 3194 -752 -6800 -1418 -2482 -5840 -5064 -706 -4120 -1351 1839 -5345 -6386 -6418 -6291 116 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -1 -11248 -12290 -894 -1115 -270 -2549 * * - 82 55 -5343 -399 930 -1066 -48 1295 -2492 90 -1643 1044 -891 1912 1116 368 -1154 -891 -1051 -5526 -1841 117 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -1 -11854 -12896 -894 -1115 -101 -3893 * * - 83 90 -5310 -3879 1738 8 -1225 -3636 -1523 -271 -615 -1216 -1875 1773 -1639 653 -366 -193 -4902 2379 453 118 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -1 -11948 -12990 -894 -1115 -701 -1378 * * - 84 -1223 -5446 -3828 -1467 -5768 -863 -152 -2326 -1293 -5461 -4537 -3588 -2253 -1611 -424 3277 371 -5069 -5628 -4949 119 - - * * * * * * * * * * * * * * * * * * * * - - * * * * * * * * 0 -// diff --git a/forester/archive/RIO/others/hmmer/testsuite/fn3.seed b/forester/archive/RIO/others/hmmer/testsuite/fn3.seed deleted file mode 100644 index aef27c1..0000000 --- a/forester/archive/RIO/others/hmmer/testsuite/fn3.seed +++ /dev/null @@ -1,332 +0,0 @@ -# STOCKHOLM 1.0 - -7LES_DROME/1799-1891 P.SPP.RNFSVRVL..SPRELEVSWLPPEq...LRSESVYYTLHW...QQ -7LES_DROVI/1917-1997 S.YAPlPPLQLIEL..NAYGMTLAWPGT......PDALSSLTLEC...QS -APU_THETY/928-1009 A.PQPiTDLKAVS...GNGKVDLSWSVV.......DKAVSYNI.YR...S -APU_THETY/1165-1248 P.TAP.V.LQQPGI..ESSRVTLNWSPSA....DDVAIFGYEIYK...SS -AXO1_CHICK/602-692 PpGPP.GGVVVRDI..GDTTVQLSWSRGFd...NHSPIARYSIEAR...T -AXO1_CHICK/807-896 PkVAP.FRVTAKAV..LSSEMDVSWEPVEqg.dMTGVLLGYEIRY...WK -CAML_HUMAN/812-907 P.QAI.PELEGIEIl.NSSAVLVKWRPVDla.qVKGHLRGYNVTY...WR -CHI1_BACCI/465-542 P.SVP.GNARSTGV..TANSVTLAWNAST....DNVGVTGYNV.YN.... -CHIT_STRLI/142-219 P.SAP.GTPTASNI..TDTSVKLSWSAAT....DDKGVKNYDV.LR.... -CHIX_STROI/169-240 P.PAPpTGLRTGSV..TATSVALSWSPV.......TGATGYAV.YR.... -CONT_CHICK/799-884 PtEVP.TDVSVKVL..SSSEISVSWHHVT.....EKSVEGYQIRY...WA -CPSF_CHICK/630-716 P.DPP.QSVRVTSV..GEDWAVLSWEAPPf..dGGMPITGYLMER...KK -CPSF_CHICK/923-1008 P.GPP.QAVRVMEV..WGSNALLQWEPPKd..dGNAEISGYTVQK...AD -ECK_HUMAN/329-420 P.SAP.HYLTAVGM..GA.KVELRWTPPQd..sGGREDIVYSVTCEqcWP -ECK_HUMAN/436-519 Q.TEP.PKVRLEGR..STTSLSVSWSIPPp...QQSRVWKYEVTYR...K -EPH1_HUMAN/333-435 P.SAP.RNLSFSA...SGTQLSLRWEPPAd..tGGRQDVRYSVRCS..QC -EPH3_CHICK/333-429 P.SAP.QAV.ISSV..NETSLMLEWTPPRd..sGGREDLVYNIIC...KS -EPH3_CHICK/444-528 P.SAV.SIMHQVSR..TVDSITLSWSQPDq...PNGVILDYELQY...YE -ETK1_CHICK/325-421 P.SAP.RNV.ISNI..NETSVILDWSWPLd..tGGRKDVTFNIIC...KK -FAS2_SCHAM/530-616 P.SAV.LQVKMDVM..TATTVTFKFFGPGn..dGGLPTKNYAVQY...KQ -FAS2_SCHAM/642-735 T.SGT.ENEVVVSP..YPNRYELRWQVPAd...NGEPITHYSVKS...CP -FINC_BOVIN/577-660 T.SGP.VQVIITETpsQPNSHPIQWSAPE.....SSHISKYILRW...KP -FINC_BOVIN/689-768 P.VVA.TSESVTEI..TASSFVVSWVSA......SDTVSGFRVEY...EL -FINC_BOVIN/780-858 P.DAP.PDPTVDQV..DDTSIVVRWSRP......RAPITGYRIVY...SP -FINC_BOVIN/875-955 KvPPP.RDLQFVEV..TDVKITIMWTPP......ESPVTGYRVDV...IP -FINC_BOVIN/1142-1225 PlSPP.TNLHLEANp.DTGVLTVSWERST.....TPDITGYRITT...TP -FINC_BOVIN/1236-1316 V.PPP.TDLRFTNV..GPDTMRVTWAPPS.....SIELTNLLVRY...SP -FINC_BOVIN/1327-1406 L.DSP.SGIDFSDI..TANSFTVHWIAP......RATITGYRIRHH...P -FINC_BOVIN/1417-1499 S.DVP.RDLEVIAA..TPTSLLISWDAP......AVTVRYYRITY...GE -FINC_BOVIN/1511-1590 I.DKP.SQMQVTDV..QDNSISVRWLPS......SSPVTGYRVTT...AP -FINC_BOVIN/1601-1680 I.PAP.TNLKFTQV..TPTSLTAQWTAP......NVQLTGYRVRV...TP -FINC_BOVIN/1693-1771 V.SPP.RRARVTDA..TETTITISWRTK......TETITGFQVDA...IP -FINC_BOVIN/1782-1861 I.DAP.SNLRFLAT..TPNSLLVSWQPP......RARITGYIIKY...EK -FINC_CHICK/551-630 I.DRP.KGLTFTEV..DVDSIKIAWESP......QGQVTRYRVTY...SS -FINC_RAT/1266-1346 V.PQL.TDLSFVDI..TDSSIGLRWTPLN.....SSTIIGYRITV...VA -GUNB_CELFI/651-733 P.TTP.GTPVATGV..TTVGASLSWAASTd...AGSGVAGYEL.YR...V -IL7R_HUMAN/129-221 P.EAP.FDLSVIYRe.GANDFVVTFNTSHlq.kKYVKVLMHDVAYR..QE -ITB4_HUMAN/1127-1208 L.GAP.QNPNAKAA..GSRKIHFNWLPP......SGKPMGYRVKY...WI -ITB4_HUMAN/1220-1310 P.SEP.GRLAFNVV..SSTVTQLSWAEPAe...TNGEITAYEVCY...GL -ITB4_HUMAN/1581-1665 P.DTP.TRLVFSAL..GPTSLRVSWQEPR....CERPLQGYSVEY...QL -ITB4_HUMAN/1694-1781 P.SAP.GPLVFTAL..SPDSLQLSWERPRr...PNGDIVGYLVTC...EM -KALM_CHICK/178-271 P.LKPrKELKFIEL..QSGDLEVKWSSKFn...ISIEPVIYVVQRR..WN -KALM_CHICK/544-642 L.AKP.ENLSASFIv.QEGNITGHFSWKIskavLHQPMTGFQVTW...AE -KMLC_CHICK/60-145 P.DPPaGTPCASDI..RSSSLTLSWYGSSy..dGGSAVQSYTVEI...WN -LAR_DROME/322-404 P.TAP.TDVQISEV..TATSVRLEWSYK.....GPEDLQYYVIQY...KP -LAR_DROME/417-503 E.SAP.RNVQVRTL..SSSTMVITWEPPEt...PNGQVTGYKV.Y...YT -LAR_DROME/515-598 P.SQP.SNFRATDI..GETAVTLQWTKPTh...SSENIVHYELYW...ND -LAR_DROME/709-800 P.GDP.QDVKATPL..NSTSIHVSWKPPLek.dRNGIIRGYHIHA...QE -LAR_DROME/909-995 PgGPP.SNITIRFQ..TPDVLCVTWDPPTre.hRNGIITRYDVQFH..KK -MPSF_CHICK/371-457 P.GAP.MDVKCHDA..NRDYVIVTWKPPNt..tSQNPVIGYFVDK...CE -MPSF_CHICK/499-585 P.GPP.TNVHASEI..SKTYVVLSWDPPVp...RGREPLTYFIEK...SM -MPSF_CHICK/600-684 P.SAP.GRVVATRN..TKTSVVVQWDKPK....HEENLYGYYIDY...SV -MPSF_CHICK/699-785 P.SYP.HGITLLNC..DGHSMTLGWKAPKy..sGGSPILGYYIDKR...E -MPSF_CHICK/801-887 P.GPA.YDLTVCEV..RNTSLVLLWKAPVy..eGKSPITGYLVDY...KE -NCA1_BOVIN/509-597 P.SSP.SIDQVEP...YSSTAQVQFDEPEa..tGGVPILKYKAEWR...A -NCA1_BOVIN/610-691 P.SAP.KLEGQMGE..DGNSIKVKLIKQDd...GGSPIRHYLVKYR...A -NGCA_CHICK/700-794 PeRNP.GGVHGEGN..ETGNLVITWEPLPpq.aWNAPWARYRVQWR...P -NRCA_CHICK/623-709 P.NPP.LDLELTGQ..LERSIELSWVPGEe...NNSPITNFVIEY...ED -NRCA_CHICK/726-810 ....P.SNVQGIGS..EPDNLVITWESLKgf.qSNGPGLQYKVSWR..QK -NRCA_CHICK/928-1014 P.SPP.SFLKITNP..TLDSLTLEWGSPTh...PNGVLTSYILKF...QP -NRG_DROME/717-799 ....P.DNVVGQGT..EPNNLVISWTPMPei.eHNAPNFHYYVSW...K. -NRG_DROME/815-905 PlDAP.TNFTMRQIt.SSTSGYMAWTPVSee.sVRGHFKGYKIQT...WT -NRG_DROME/917-1007 P.SPV.QGLDAYPL..GSSAFMLHWKKPLy...PNGKLTGYKIYY...EE -PHB_ALCFA/344-418 G.SAP.TGLAVTAT..TSTSVSLSWNAV.......ANASSYGV.YR.... -PTP1_DROME/123-205 P.DPP.SNLSVQVR..SGKNAIILWSPPT.....QGSYTAFKIKV...LG -PTP1_DROME/217-301 P.NTP.GKFIVWFR..NETTLLVLWQPPY....PAGIYTHYKVSI...EP -PTP1_DROME/312-394 P.LRP.LNVTFDRDfiTSNSFRVLWEAPK....GISEFDKYQVSV...AT -PTP1_DROME/405-485 P.LPV.RNLRSINDd.KTNTMIITWEADP.....ASTQDEYRIVYHe.LE -PTP1_DROME/583-661 P.NPP.RNMTIETV..RSNSVLVHWSPPE.....SGEFTEYSIRYR...T -PTP1_DROME/864-944 P.EPI.TQLHATNI..TDTEISLRWDLP......KGEYNDFDIAY...LT -PTP1_DROME/958-1044 P.GRV.ERFHPTDV..QPSEINFEWSLPSs..eANGVIRQFSIAY...TN -PTP6_DROME/236-321 V.PQV.SIDFAKAV..GANKIYLNWTVND....GNDPIQKFFITL...QE -PTP6_DROME/332-425 Y.DPI.FIPKVETTgsTASTITIGWNPPPp..dLIDYIQYYELIV...SE -PTP9_DROME/171-259 P.SKP.QNLTILDV..SANSITMSWHPPKn...QNGAIAGYHVFH...IH -PTPB_HUMAN/22-103 AePER.CNFTLAESkaSSHSVSIQWRIL.......GSPCNFSLIY...SS -PTPB_HUMAN/112-192 P.PAR.FGVSKEKT..TSTGLHVWWTPS......SGKVTSYEVQL...FD -PTPB_HUMAN/467-543 P.LAV.LQLRVKHA..NETSLSIMWQTP......VAEWEKYIISL...AD -PTPB_HUMAN/554-632 P.AQV.TDLHVANQg.MTSSLFTNWTQA......QGDVEFYQVLL...IH -PTPB_HUMAN/643-725 P.SSV.SGVTVNNSg.RNDYLSVSWLVA......PGDVDNYEVTL...SH -PTPB_HUMAN/731-808 P.DKV.QGVSVSNSa.RSDYLRVSWVHA......TGDFDHYEVTI...KN -PTPB_HUMAN/907-984 P.SAV.KNIHISPNg.ATDSLTVNWTPG......GGDVDSYTVSA...FR -PTPB_HUMAN/995-1074 P.ASV.QGVIADNAy.SSYSLIVSWQKA......AGVAERYDILL...LT -PTPB_HUMAN/1085-1162 P.AAV.TDLRITEN..STRHLSFRWTAS......EGELSWYNIFL...YN -PTPB_HUMAN/1173-1250 P.ASV.SHLRGSNRn.TTDSLWFNWSPA......SGDFDFYELIL...YN -PTPB_HUMAN/1261-1344 P.SPP.SLMSFADI..ANTSLAITWKGPP....DWTDYNDFELQW...LP -PTPB_HUMAN/1355-1434 P.DKI.QNLHCRPQ..NSTAIACSWIPP......DSDFDGYSIECR...K -PTPK_MOUSE/290-376 P.PRPiAPPQLLGV..GPTYLLIQLNANSi..iGDGPIILKEVEYR...M -PTPZ_HUMAN/312-401 S.SEP.ENVQADPE..NYTSLLVTWERPRv..vYDTMIEKFAVLY...QQ -SEK_MOUSE/441-525 P.SSI.ALVQAKEV..TRYSVALAWLEPDr...PNGVILEYEVKY...YE -TENA_CHICK/593-671 V.SPP.TELTVTNV..TDKTVNLEWKHE.......NLVNEYLVTY...VP -TENA_CHICK/682-767 L.PAP.EGLKFKSV..RETSVQVEWDPL......SISFDGWELVFRnmQK -TENA_CHICK/774-853 L.DAP.SQIEAKDV..TDTTALITWSKP......LAEIEGIELTY...GP -TENA_CHICK/864-945 L.DAP.RNLKRVSQ..TDNSITLEWKNS......HANIDNYRIKF...AP -TENA_CHICK/956-1033 L.DNP.KDLEVSDP..TETTLSLRWRRP......VAKFDRYRLTY...VS -TENA_CHICK/1045-1124 E.PEL.GNLSVSET..GWDGFQLTWTAA......DGAYENFVIQV...QQ -TENA_CHICK/1136-1215 H.PEV.GELTVSDI..TPESFNLSWTTT......NGDFDAFTIEI...ID -TENA_CHICK/1227-1306 E.PEV.DNLLVSDA..TPDGFRLSWTAD......DGVFDSFVLKIR..DT -TENA_CHICK/1317-1395 V.GSP.KGISFSDI..TENSATVSWTPP......RSRVDSYRVSY...VP -TENA_CHICK/1406-1483 L.DSP.SGLVVMNI..TDSEALATWQPA......IAAVDNYIVSY...SS -TENA_CHICK/1494-1571 L.DAP.KDLSATEV..QSETAVITWRPP......RAPVTDYLLTY...ES -TENA_HUMAN/1254-1334 E.VPDmGNLTVTEV..SWDALRLNWTTP......DGTYDQFTIQV...QE -TENA_HUMAN/1528-1607 L.PLL.ENLTISDI..NPYGFTVSWMAS......ENAFDSFLVTV...VD -TIE1_HUMAN/446-533 P.PVPlAAPRLLTK..QSRQLVVSPLVSFs...GDGPISTVRLHYR..PQ -TIE1_HUMAN/545-632 PlLQP.WLEGWHVE..GTDRLRVSWSLPLv..pGPLVGDGFLLRL...WD -TIE1_HUMAN/644-729 P.PAP.RHLHAQAL..SDSEIQLTWKHPEa...LPGPISKYVVEV...QV -TIE2_HUMAN/444-529 L.PKPlNAPNVIDT..GHNFAVINISSEPy..fGDGPIKSKKLLY...KP -TIE2_HUMAN/543-626 L.PPP.RGLNLLPK..SQTTLNLTWQPIFp...SSEDDFYVEVERR...S -TIE2_HUMAN/639-724 P.PQP.ENIKISNI..THSSAVISWTILD.....GYSISSITIRY...KV -UFO_HUMAN/327-411 L.GPP.ENISATR...NGSQAFVHWQEPRa..pLQGTLLGYRLAY...QG - -7LES_DROME/1799-1891 ELDGEnvqd..rrewEAHER...RLET....AG..THRLTGIKPGSGYSL -7LES_DROVI/1917-1997 LREQ............LQFN...VAGN....HT..QMRLAPLQPKTRYSC -APU_THETY/928-1009 TVKGG..........LYEKI...ASNV....TQi.TYTDTEVTNGLKYVY -APU_THETY/1165-1248 SETGPf.........IKIAT...VSDS....VY..NYVDTDVVNGNVYYY -AXO1_CHICK/602-692 LLSNKwkq.....mrTNPVN...IEGN....AE..TAQVVNLIPWMDYEF -AXO1_CHICK/807-896 DGDKEea.......aDRVRT...AGLV....T...SAHVTGLNPNTKYHV -CAML_HUMAN/812-907 EGSQRkhsk..rhihKDHVV...VPAN....TT..SVILSGLRPYSSYHL -CHI1_BACCI/465-542 .GAN............LATS...VTGT....T....ATISGLTAGTSYTF -CHIT_STRLI/142-219 .DGA............KVAT...VTGT....T....YTDNGLTKGTAYSY -CHIX_STROI/169-240 .DGV............KVAT...ASGT....S....ATVTGLTPDTAYAF -CONT_CHICK/799-884 AHDKEa........aAQRVQ...VSNQ....EY..STKLENLKPNTRYHI -CPSF_CHICK/630-716 KGSMRw........mKLNFE...VFPD....T...TYESTKMIEGVFYEM -CPSF_CHICK/923-1008 TRTME..........WFTVL...EHSR....PT..RCTVSELVMGNEYRF -ECK_HUMAN/329-420 E.SGEcgp....ceaSVRYS...EPPHgl.tRT..SVTVSDLEPHMNYTF -ECK_HUMAN/436-519 KGDS............NSYN...VRRT....EGf.SVTLDDLAPDTTYLV -EPH1_HUMAN/333-435 QGTAQdggpcqpcgvGVHFSpgaRGLT....TP..AVHVNGLEPYANYTF -EPH3_CHICK/333-429 CGSGRgact...rcgDNVQF...APRQlgltEP..RIYISDLLAHTQYTF -EPH3_CHICK/444-528 KNLSE..........LNSTA...VKSP....TN..TVTVQNLKAGTIYVF -ETK1_CHICK/325-421 CGGSSkice...pcsDNVRF...LPRQtg.lTNt.TVTVVDLLAHTNYTF -FAS2_SCHAM/530-616 DSQGW..........EDALN...RTWP....VDs.PYILENLKPQTRYNF -FAS2_SCHAM/642-735 VEKYDtewrl.lpypCQEHK...LEGQ....AT..TFQLESLQPDTHYKV -FINC_BOVIN/577-660 KNSPDr.........WKEAT...IPGH....LN..SYTIKGLRPGVVYEG -FINC_BOVIN/689-768 SEEGDe.........PQYLD...LPST....AT..SVNIPDLLPGRKYTV -FINC_BOVIN/780-858 SVEGS..........STELN...LPET....AN..SVTLSDLQPGVQYNI -FINC_BOVIN/875-955 VNLPGe........hGQRLP...VSRN....T...FAEVTGLSPGVTYHF -FINC_BOVIN/1142-1225 TNGQQg........ySLEEV...VHAD....QS..SCTFENLSPGLEYNV -FINC_BOVIN/1236-1316 VKNEEd.........VAELS...ISPS....DN..AVVLTNLLPGTEYLV -FINC_BOVIN/1327-1406 ENMGGr.........PREDR...VPPS....RN..SITLTNLNPGTEYVV -FINC_BOVIN/1417-1499 TGGSSp.........VQEFT...VPGS....KS..TATISGLKPGVDYTI -FINC_BOVIN/1511-1590 KNGPGp.........SKTKT...VGPD....QT..EMTIEGLQPTVEYVV -FINC_BOVIN/1601-1680 KEKTGp.........MKEIN...LAPD....SS..SVVVSGLMVATKYEV -FINC_BOVIN/1693-1771 ANGQT..........PIQRT...IRPD....VR..SYTITGLQPGTDYKI -FINC_BOVIN/1782-1861 PGSPPr........eVVPRP...RPGV....T...EATITGLEPGTEYTI -FINC_CHICK/551-630 PEDG............IHEL...LPAPgg.eED..TAELHGLRPGSEYTI -FINC_RAT/1266-1346 AGEGIp.........IFEDF...VDSS....VG..YYTVTGLEPGIDYDI -GUNB_CELFI/651-733 QGTTQ..........TLVGT...TTAA....A....YILRDLTPGTAYSY -IL7R_HUMAN/129-221 KDENK..........WTHVN...LSST....KL..TLLQRKLQPAAMYEI -ITB4_HUMAN/1127-1208 QGDSEs.........EAHLL...DSKV....P...SVELTNLYPYCDYEM -ITB4_HUMAN/1220-1310 VNDDNrpi.....gpMKKVL...VDNP....KNr.MLLIENLRESQPYRY -ITB4_HUMAN/1581-1665 LNGGE..........LHRLN...IPNP....AQt.SVVVEDLLPNHSYVF -ITB4_HUMAN/1694-1781 AQGGGpa.......tAFRVD...GDSP....ES..RLTVPGLSENVPYKF -KALM_CHICK/178-271 QGIHPsed.....daTNWQT...VAQT....TDe.RVQLSDIRASRWYQF -KALM_CHICK/544-642 VTTESrqnslpnsiiSQSQI...LPAD....HY..VLTVPNLRPSMLYRL -KMLC_CHICK/60-145 SVDNK..........WTDLT...TCRS....T...SFNVQDLQADREYKF -LAR_DROME/322-404 KNANQ..........AFSEI...SGII....TM..YYVVRALSPYTEYEF -LAR_DROME/417-503 TNSNQpe......asWNSQM...VDNS....E...LTTVSDVTPHAIYTV -LAR_DROME/515-598 TYANQ..........AHHKR...ISNS....E...AYTLDGLYPDTLYYI -LAR_DROME/709-800 LRDEGkgf....lnePFKFD...VVDT....L...EFNVTGLQPDTKYSI -LAR_DROME/909-995 IDHGL..........GSERN...MTLR....K....AVFTNLEENTEYIF -MPSF_CHICK/371-457 VGLEN..........WVQCN...DAPV....KIc.KYPVTGLYEGRSYIF -MPSF_CHICK/499-585 VGSGS..........WQRVNaqvAVKS....P...RYAVFDLAEGKPYVF -MPSF_CHICK/600-684 VGSNQwe.......pANHKP...INYN....R....FVVHGLETGEQYIF -MPSF_CHICK/699-785 ANHKN..........WHEVNssvISRT....I....YTVEDLTEDAFYEF -MPSF_CHICK/801-887 VDTED..........WITAN...EKPT....SHr.YFKVTDLHQGHTYVF -NCA1_BOVIN/509-597 MGEEVw........hSKWYD...AKEA....SMegIVTIVGLKPETTYAV -NCA1_BOVIN/610-691 LSSEW..........KPEIR...LPSG....SD..HVMLKSLDWNAEYEV -NGCA_CHICK/700-794 LEEPGgggps.ggfpWAEST...VDAP....P....VVVGGLPPFSPFQI -NRCA_CHICK/623-709 GLHEPg........vWHYQT...EVPG....SH..TTVQLKLSPYVNYSF -NRCA_CHICK/726-810 DVDDE..........WTSVV...VANV....S...KYIVSGTPTFVPYEI -NRCA_CHICK/928-1014 INNTHel......gpLVEIR...IPAN....ES..SLILKNLNYSTRYKF -NRG_DROME/717-799 .RDIPaa......awENNNI...FDWR....QN..NIVIADQPTFVKYLI -NRG_DROME/815-905 ENEGEe........gLREIH...VKGD....TH..NALVTQFKPDSKNYA -NRG_DROME/917-1007 V.KESyvge..rreyDPHIT...DPRV....T...RMKMAGLKPNSKYRI -PHB_ALCFA/344-418 .NGS............KVGS...ATAT....A....YTDSGLIAGTTYSY -PTP1_DROME/123-205 LSEASss.......yNRTFQ...VNDN....TF..QHSVKELTPGATYQV -PTP1_DROME/217-301 PDANDsvl.....yvEKEGE...PPGP....A...QAAFKGLVPGRAYNI -PTP1_DROME/312-394 TRRQS..........TVPRS...NEPV....AF..SDFRDIAEPGKTFNV -PTP1_DROME/405-485 TFNGD..........TSTLT...TDRT....R....FTLESLLPGRNYSL -PTP1_DROME/583-661 DSEQQ..........WVRLP...SVRS....T...EADITDMTKGEKYTI -PTP1_DROME/864-944 A.DNL..........LAQNM...TTRN....E....ITISDLRPHRNYTF -PTP1_DROME/958-1044 INNLT..........DAGMQ...DFES....EEa.FGVIKNLKPGETYVF -PTP6_DROME/236-321 AGTPTft.......yHKDFI...NGSH....T...SYILDHFKPNTTYFL -PTP6_DROME/332-425 SGEVPkvi.....eeAIYQQ...NSRN....L...PYMFDKLKTATDYEF -PTP9_DROME/171-259 DNQTGve......ivKNSRN...SVET....LI..HFELQNLRPYTDYRV -PTPB_HUMAN/22-103 DTLGAa........lCPTFR...IDNT....TY..GCNLQDLQAGTIYNF -PTPB_HUMAN/112-192 ENNQKiq......gvQIQES...TSWN....E....YTFFNLTAGSKYNI -PTPB_HUMAN/467-543 R.DLL..........LIHKS...LSKD....AK..EFTFTDLVPGRKYMA -PTPB_HUMAN/554-632 ENVV...........IKNES...ISSE....TS..RYSFHSLKSGSLYSV -PTPB_HUMAN/643-725 DGKV...........VQSLV...IAKS....VR..ECSFSSLTPGRLYTV -PTPB_HUMAN/731-808 KNNF...........IQTKS...IPKS....EN..ECVFVQLVPGRLYSV -PTPB_HUMAN/907-984 H.SQK..........VDSQT...IPKH....VF..EHTFHRLEAGEQYQI -PTPB_HUMAN/995-1074 ENGIL..........LRNTS...EPAT....TK..QHKFEDLTPGKKYKI -PTPB_HUMAN/1085-1162 PDGNLq.........ERAQV...DPLV....Q...SFSFQNLLQGRMYKM -PTPB_HUMAN/1173-1250 PNGTKk.........ENWKD...KDLT....E....WRFQGLVPGRKYVL -PTPB_HUMAN/1261-1344 RDALTv.........FNPYN...NRKS....E...GRIVYGLRPGRSYQF -PTPB_HUMAN/1355-1434 MDTQEv.........EFSRK...LEKE....KS..LLNIMMLVPHKRYLV -PTPK_MOUSE/290-376 T.SGS..........WTETH...AVNA....P...TYKLWHLDPDTEYEI -PTPZ_HUMAN/312-401 LDGEDq........tKHEFL...TDGY....QDl.GAILNNLLPNMSYVL -SEK_MOUSE/441-525 KDQN...........ERSYR...IVRT....AAr.NTDIKGLNPLTSYVF -TENA_CHICK/593-671 TSSGGl.........DLQFT...VPGN....QT..SATIHELEPGVEYFI -TENA_CHICK/682-767 KDDNG..........DITSS...LKRP....ET..SYMQPGLAPGQQYNV -TENA_CHICK/774-853 KDVPGd.........RTTID...LSED....EN..QYSIGNLRPHTEYEV -TENA_CHICK/864-945 ISGGD..........HTELT...VPKGnq.aTT..RATLTGLRPGTEYGI -TENA_CHICK/956-1033 P.SGK..........KNEME...IPVD....ST..SFILRGLDAGTEYTI -TENA_CHICK/1045-1124 SDNPEe.........TWNIT...VPGG....QH..SVNVTGLKANTPYNV -TENA_CHICK/1136-1215 SNRLLe.........PMEFN...ISGN....SR..TAHISGLSPSTDFIV -TENA_CHICK/1227-1306 KRKSD..........PLELI...VPGH....ER..THDITGLKEGTEYEI -TENA_CHICK/1317-1395 ITGGT..........PNVVT...VDGS....KT..RTKLVKLVPGVDYNV -TENA_CHICK/1406-1483 EDEP...........EVTQM...VSGN....TV..EYDLNGLRPATEYTL -TENA_CHICK/1494-1571 I.DGR..........VKEVI...LDPE....TT..SYTLTELSPSTQYTV -TENA_HUMAN/1254-1334 ADQVEe.........AHNLT...VPGS....LR..SMEIPGLRAGTPYTV -TENA_HUMAN/1528-1607 SGKLLd.........PQEFT...LSGT....QR..KLELRGLITGIGYEV -TIE1_HUMAN/446-533 DSTMD..........WSTIV...VDPS....E...NVTLMNLRPKTGYSV -TIE1_HUMAN/545-632 GTRGQ..........ERREN...VSSP....QAr.TALLTGLTPGTHYQL -TIE1_HUMAN/644-729 AGGAGd.........PLWID...VDRP....EEt.STIIRGLNASTRYLF -TIE2_HUMAN/444-529 VNHYEa.........WQHIQ...VTNE....I....VTLNYLEPRTEYEL -TIE2_HUMAN/543-626 VQKSD..........QQNIK...VPGN....LT..SVLLNNLHPREQYVV -TIE2_HUMAN/639-724 QGKNE..........DQHVDv.kIKNA....TIi.QYQLKGLEPETAYQV -UFO_HUMAN/327-411 QDTPE..........VLMDI...GLRQ....EV..TLELQGDGSVSNLTV - -7LES_DROME/1799-1891 WVQ.AHATPTk....SNSS -7LES_DROVI/1917-1997 RLA.LAYAATp....GAPI -APU_THETY/928-1009 AVT.AVDNDGn...eSALS -APU_THETY/1165-1248 KVV.AVDTSYn....RTAS -AXO1_CHICK/602-692 RVL.ASNILGv....GEPS -AXO1_CHICK/807-896 SVR.AYNRAGa....GPPS -CAML_HUMAN/812-907 EVQ.AFNGRGs....GPAS -CHI1_BACCI/465-542 TIK.AKDAAGn...lSAAS -CHIT_STRLI/142-219 SVK.ARDTADq...tGPAS -CHIX_STROI/169-240 QVA.AVNGA.......GES -CONT_CHICK/799-884 DVS.AFNSAGy....GPPS -CPSF_CHICK/630-716 RVF.AVNAIGv....SQPS -CPSF_CHICK/923-1008 RVY.SENVCGt....SQEP -ECK_HUMAN/329-420 TVE.ARNGV........SG -ECK_HUMAN/436-519 QVQ.ALTQEGq....GAGS -EPH1_HUMAN/333-435 NVE.AQNGVSglgssGHAS -EPH3_CHICK/333-429 EIQ.AVNGVTd...qSPFS -EPH3_CHICK/444-528 QVR.ARTVAGy....GRYS -ETK1_CHICK/325-421 EID.AVNGVSd...lSTLS -FAS2_SCHAM/530-616 RFA.AQNEVGf....GPWS -FAS2_SCHAM/642-735 EVR.ATNAIGn....SVPG -FINC_BOVIN/577-660 QLI.SVQHY......GQRE -FINC_BOVIN/689-768 NVY.EISEE.......GEQ -FINC_BOVIN/780-858 TIY.AVEEN.......QES -FINC_BOVIN/875-955 KVF.AVNQG.......RES -FINC_BOVIN/1142-1225 SVY.TVKDD.......KES -FINC_BOVIN/1236-1316 SVS.SVYEQ.......HES -FINC_BOVIN/1327-1406 SIV.ALNSK.......EES -FINC_BOVIN/1417-1499 TVY.AVTGRGd....SPAS -FINC_BOVIN/1511-1590 SVY.AQNQN.......GES -FINC_BOVIN/1601-1680 SVY.ALKDT.......LTS -FINC_BOVIN/1693-1771 HLY.TLNDN.......ARS -FINC_BOVIN/1782-1861 QVI.ALKNN.......QKS -FINC_CHICK/551-630 NIV.AIYDD.......MES -FINC_RAT/1266-1346 SVI.TLING.......GES -GUNB_CELFI/651-733 VVK.AKDVAGn...vSAAS -IL7R_HUMAN/129-221 KVR.SIPDHYfkgfwSEWS -ITB4_HUMAN/1127-1208 KVC.AYGAQGe....GPYS -ITB4_HUMAN/1220-1310 TVK.ARNGAGw....GPER -ITB4_HUMAN/1581-1665 RVR.AQSQEGw....GRER -ITB4_HUMAN/1694-1781 KVQ.ARTTEGf....GPER -KALM_CHICK/178-271 RVA.AVNVHGt...rGFTA -KALM_CHICK/544-642 EVQ.VLTTGGe....GPAT -KMLC_CHICK/60-145 RVR.AANVYGi....SEPS -LAR_DROME/322-404 YVI.AVNNIGr....GPPS -LAR_DROME/417-503 RVQ.AYTSMGa....GPMS -LAR_DROME/515-598 WLA.ARSQRGe....GATT -LAR_DROME/709-800 QVA.ALTRKGd....GDRS -LAR_DROME/909-995 RVR.AYTKQGa....GPFS -MPSF_CHICK/371-457 RVR.AVNSAGi....SRPS -MPSF_CHICK/499-585 RVL.SANKHGi....SDPS -MPSF_CHICK/600-684 RVK.AVNAVGf....SENS -MPSF_CHICK/699-785 KIA.AANVVGi....GHPS -MPSF_CHICK/801-887 KVR.AVNDAGv....GKSS -NCA1_BOVIN/509-597 RLA.ALNGKGl....GEIS -NCA1_BOVIN/610-691 YVV.AENQQ.......GKS -NGCA_CHICK/700-794 RVQ.AVNGAGk....GPEA -NRCA_CHICK/623-709 RVI.AVNEIGr....SQPS -NRCA_CHICK/726-810 KVQ.ALNDLGy...aPEPS -NRCA_CHICK/928-1014 YFN.AQTSV......GSGS -NRG_DROME/717-799 KVV.AINDR.......GES -NRG_DROME/815-905 RIL.AYNGRFn....GPPS -NRG_DROME/917-1007 SIT.ATTKMGe....GSEH -PHB_ALCFA/344-418 TVT.AVDPTAg...eSQPS -PTP1_DROME/123-205 QAY.TIYDG.......KES -PTP1_DROME/217-301 SVQ.TMSED.......EIS -PTP1_DROME/312-394 IVK.TVSGK.......VTS -PTP1_DROME/405-485 SVQ.AVSKK.......MES -PTP1_DROME/583-661 QVN.TVSFG.......VES -PTP1_DROME/864-944 TVV.VRSGTEss..vLRSS -PTP1_DROME/958-1044 KIQ.AKTAIGf....GPER -PTP6_DROME/236-321 RIV.GKNSIGn....GQPT -PTP6_DROME/332-425 RVR.ACSDLTkt..cGPWS -PTP9_DROME/171-259 IVK.AFTTKNe....GEPS -PTPB_HUMAN/22-103 KII.SLDEE........RT -PTPB_HUMAN/112-192 AIT.AVSGG.......KRS -PTPB_HUMAN/467-543 TVT.SISGD........LK -PTPB_HUMAN/554-632 VVT.TVSGG.......ISS -PTPB_HUMAN/643-725 TIT.TRSGKYe...nHSFS -PTPB_HUMAN/731-808 TVT.TKSGQ........YE -PTPB_HUMAN/907-984 MIA.SVSGS........LK -PTPB_HUMAN/995-1074 QIL.TVSGG.......LFS -PTPB_HUMAN/1085-1162 VIV.THSGE........LS -PTPB_HUMAN/1173-1250 WVV.THSGD........LS -PTPB_HUMAN/1261-1344 NVK.TVSGDSw....KTYS -PTPB_HUMAN/1355-1434 SIK.VQSAG.......MTS -PTPK_MOUSE/290-376 RVLlTRPGEGg...tGLPG -PTPZ_HUMAN/312-401 QIV.AICTNGl...yGKYS -SEK_MOUSE/441-525 HVR.ARTAAGy....GDFS -TENA_CHICK/593-671 RVF.AILKN.......KKS -TENA_CHICK/682-767 SLH.IVKNNTr...gPGLS -TENA_CHICK/774-853 TLI.SRRGD.......MES -TENA_CHICK/864-945 GVT.AVRQD.......RES -TENA_CHICK/956-1033 SLV.AEKGR.......HKS -TENA_CHICK/1045-1124 TLY.GVIRG.......YRT -TENA_CHICK/1136-1215 YLY.GISHG.......FRT -TENA_CHICK/1227-1306 ELY.GVSSG.......RRS -TENA_CHICK/1317-1395 NII.SVKGF.......EES -TENA_CHICK/1406-1483 RVH.AVKDA.......QKS -TENA_CHICK/1494-1571 KLQ.ALSRS.......MRS -TENA_HUMAN/1254-1334 TLH.GEVRG.......HST -TENA_HUMAN/1528-1607 MVS.GFTQG.......HQT -TIE1_HUMAN/446-533 RVQlSRPGEGg...eGAWG -TIE1_HUMAN/545-632 DVQ.LYHCTLl....GPAS -TIE1_HUMAN/644-729 RMR.ASI.QGl....GDWS -TIE2_HUMAN/444-529 CVQ.LVRRGEg....GEGH -TIE2_HUMAN/543-626 RAR..VNTKAq....GEWS -TIE2_HUMAN/639-724 DIF.AENNIGs....SNPA -UFO_HUMAN/327-411 CVA.AYTAAGd....GPWS -// diff --git a/forester/archive/RIO/others/hmmer/testsuite/masks_test.c b/forester/archive/RIO/others/hmmer/testsuite/masks_test.c deleted file mode 100644 index 9713ffd..0000000 --- a/forester/archive/RIO/others/hmmer/testsuite/masks_test.c +++ /dev/null @@ -1,149 +0,0 @@ -/* masks_test.c - * SRE, Tue Nov 18 11:10:20 1997 [St. Louis] - * - * Test driver for sequence masking routines in masks.c - * - * CVS $Id: masks_test.c,v 1.1.1.1 2005/03/22 08:34:46 cmzmasek Exp $ - */ - -#include - -#include "structs.h" -#include "funcs.h" -#include "globals.h" -#include "squid.h" - -static char banner[] = "\ -masks_test : testing of repeat masking code in masks.c"; - -static char usage[] = "\ -Usage: testdriver [-options]\n\ - Available options are:\n\ - -h : help; display this usage info\n\ - -v : verbose output\n\ -"; - -static char experts[] = "\ - --xnu : apply xnu to seqs in \n\ -\n"; - -static struct opt_s OPTIONS[] = { - { "-h", TRUE, sqdARG_NONE }, - { "-v", TRUE, sqdARG_NONE }, - { "--xnu", FALSE, sqdARG_STRING }, -}; -#define NOPTIONS (sizeof(OPTIONS) / sizeof(struct opt_s)) - -/* The test sequence and result from the XNU software distribution - */ -static char *test1 = "\ -ACDEFGHIKLMNPQRQRQRQRQRQRQRQRQRSTVWYACDEFGHIKLMNPQRQRQRQRQRQ\ -RQRQRQRSTVWYACDEFGHIKLMNPQRQRQRQRQRQRQRQRQRSTVWYACDEFGHIKLMN\ -PQRQRQRQRQRQRQRQRQRSTVWYACDEFGHIKLMNPQRQRQRQRQRQRQRQRQRSTVWY\ -ACDEFGHIKLMNPQRQRQRQRQRQRQRQRQRSTVWY"; - -static char *answer1 = "\ -ACDEFGHIKLMNPXXXXXXXXXXXXXXXXXXSTVWYACDEFGHIKLMNPXXXXXXXXXXX\ -XXXXXXXSTVWYACDEFGHIKLMNPXXXXXXXXXXXXXXXXXXSTVWYACDEFGHIKLMN\ -PXXXXXXXXXXXXXXXXXXSTVWYACDEFGHIKLMNPXXXXXXXXXXXXXXXXXXSTVWY\ -ACDEFGHIKLMNPXXXXXXXXXXXXXXXXXXSTVWY"; - -int -main(int argc, char **argv) -{ - char *seq; - char *dsq; - int len; - int i,j; - char *result; - - char *optname; /* name of option found by Getopt() */ - char *optarg; /* argument found by Getopt() */ - int optind; /* index in argv[] */ - int be_verbose; - char *xnufile; /* NULL, or file to run xnu on */ - - - /*********************************************** - * Parse command line - ***********************************************/ - - be_verbose = FALSE; - xnufile = NULL; - - while (Getopt(argc, argv, OPTIONS, NOPTIONS, usage, - &optind, &optname, &optarg)) { - if (strcmp(optname, "-v") == 0) { be_verbose = TRUE; } - else if (strcmp(optname, "--xnu") == 0) { xnufile = optarg; } - else if (strcmp(optname, "-h") == 0) { - Banner(stdout, banner); - puts(usage); - puts(experts); - exit(0); - } - } - if (argc - optind != 0) - Die("Incorrect number of arguments.\n%s\n", usage); - - SetAlphabet(hmmAMINO); - - /* XNU test - */ - seq = test1; - len = (int) strlen(seq); - dsq = DigitizeSequence(seq, len); - XNU(dsq, len); - result = MallocOrDie(sizeof(char) * (len+1)); - - for (i = 0; i < len; i++) - result[i] = Alphabet[(int) dsq[i+1]]; - result[len] = '\0'; - - if (be_verbose) - { - printf("XNU test:\n"); - for (i = 1; i <= len; i+=60) - { - for (j = i; j < i+60 && j <= len; j++) - putc(Alphabet[(int) dsq[j]], stdout); - putc('\n', stdout); - } - if (strcmp(answer1, result) == 0) - printf("-- OK; Identical to expected\n"); - } - - if (strcmp(answer1, result) != 0) - Die("XNU test failed."); - free(result); - free(dsq); - - /* On demand XNU test. - */ - if (xnufile != NULL) - { - int format; - SQFILE *sqfp; - SQINFO sqinfo; - int xnum; - - if ((sqfp = SeqfileOpen(xnufile, SQFILE_UNKNOWN, NULL)) == NULL) - Die("Failed to open sequence database file %s\n%s\n", xnufile, usage); - while (ReadSeq(sqfp, sqfp->format, &seq, &sqinfo)) - { - dsq = DigitizeSequence(seq, sqinfo.len); - xnum = XNU(dsq, sqinfo.len); - result = DedigitizeSequence(dsq, sqinfo.len); - - printf("%-20s\t%5d\n", sqinfo.name, xnum); - if (be_verbose) - WriteSeq(stdout, SQFILE_FASTA, result, &sqinfo); - - free(dsq); - FreeSequence(seq, &sqinfo); - free(result); - } - SeqfileClose(sqfp); - } - - return EXIT_SUCCESS; -} diff --git a/forester/archive/RIO/others/hmmer/testsuite/parsingviterbi_test.c b/forester/archive/RIO/others/hmmer/testsuite/parsingviterbi_test.c deleted file mode 100644 index 57eab1e..0000000 --- a/forester/archive/RIO/others/hmmer/testsuite/parsingviterbi_test.c +++ /dev/null @@ -1,167 +0,0 @@ -/* parsingviterbi_test.c - * Wed Mar 4 15:07:37 1998 - * cp trace_test.c ../src/testdriver.c; cd ../src; make testdriver - * - * Test driver for P7ParsingViterbi(); alignment in linear memory. - * - * CVS $Id: parsingviterbi_test.c,v 1.1.1.1 2005/03/22 08:34:47 cmzmasek Exp $ - */ - -#include -#include -#include - -#include "structs.h" -#include "funcs.h" -#include "globals.h" -#include "squid.h" - -static char banner[] = "\ -parsingviterbi_test : testing of Plan7 linear memory alignment code"; - -static char usage[] = "\ -Usage: parsingviterbi_test [-options]\n\ - Available options are:\n\ - -h : help; display this usage info\n\ - -v : be verbose\n\ -"; - -static char experts[] = "\ -\n"; - -static struct opt_s OPTIONS[] = { - { "-h", TRUE, sqdARG_NONE }, - { "-v", TRUE, sqdARG_NONE }, -}; -#define NOPTIONS (sizeof(OPTIONS) / sizeof(struct opt_s)) - -int -main(int argc, char **argv) -{ - char *hmmfile; /* file to read HMM(s) from */ - HMMFILE *hmmfp; /* opened hmmfile for reading */ - char *seqfile; /* file to read target sequence(s) from */ - SQFILE *sqfp; /* opened seqfile for reading */ - char *seq; /* target sequence */ - SQINFO sqinfo; /* optional info for seq */ - char *dsq; /* digitized target sequence */ - struct plan7_s *hmm; /* HMM to search with */ - struct p7trace_s *tr1; /* traceback from P7Viterbi() */ - struct p7trace_s *tr2; /* traceback from P7ParsingViterbi() */ - int nseq; - float sc1, sc2; /* scores from Viterbi, ParsingViterbi() */ - - struct p7trace_s **tarr; /* array of decomposed Viterbi traces */ - int ntr; /* number of traces */ - int i1,i2,k1,k2; /* starts, stops in seq, model for Viterbi */ - int idx; /* index of a decomposed trace */ - - int be_verbose; - - char *optname; /* name of option found by Getopt() */ - char *optarg; /* argument found by Getopt() */ - int optind; /* index in argv[] */ - - /*********************************************** - * Parse command line - ***********************************************/ - - be_verbose = FALSE; - - while (Getopt(argc, argv, OPTIONS, NOPTIONS, usage, - &optind, &optname, &optarg)) { - if (strcmp(optname, "-v") == 0) be_verbose = TRUE; - else if (strcmp(optname, "-h") == 0) { - Banner(stdout, banner); - puts(usage); - puts(experts); - exit(0); - } - } - if (argc - optind != 0) - Die("Incorrect number of arguments.\n%s\n", usage); - - hmmfile = "fn3.hmm"; - seqfile = "titin.fa"; - - /*********************************************** - * Open test sequence file - ***********************************************/ - - if ((sqfp = SeqfileOpen(seqfile, SQFILE_UNKNOWN, "BLASTDB")) == NULL) - Die("Failed to open sequence database file %s\n%s\n", seqfile, usage); - - /*********************************************** - * Open HMM file - * Read a single HMM from it. (Config HMM, if necessary). - ***********************************************/ - - if ((hmmfp = HMMFileOpen(hmmfile, NULL)) == NULL) - Die("Failed to open HMM file %s\n%s", hmmfile, usage); - if (!HMMFileRead(hmmfp, &hmm)) - Die("Failed to read any HMMs from %s\n", hmmfile); - if (hmm == NULL) - Die("HMM file %s corrupt or in incorrect format? Parse failed", hmmfile); - P7Logoddsify(hmm, TRUE); - - /*********************************************** - * Search HMM against each sequence, using both - * normal Viterbi and P7ParsingViterbi. - ***********************************************/ - - nseq = 0; - while (ReadSeq(sqfp, sqfp->format, &seq, &sqinfo)) - { - nseq++; - dsq = DigitizeSequence(seq, sqinfo.len); - - sc1 = P7Viterbi(dsq, sqinfo.len, hmm, &tr1); - sc2 = P7ParsingViterbi(dsq, sqinfo.len, hmm, &tr2); - - if (be_verbose) - { - printf("test sequence %d: %s %s\n", - nseq, sqinfo.name, - sqinfo.flags & SQINFO_DESC ? sqinfo.desc : ""); - for (idx = 0; idx < tr2->tlen; idx++) - printf("%1s %d\n", Statetype(tr2->statetype[idx]), tr2->pos[idx]); - } - - if (sc1 != sc2) - Die("Scores for the two Viterbi implementations are unequal (%d,%d)", sc1, sc2); - - TraceDecompose(tr1, &tarr, &ntr); - if (ntr == 0) - Die("ntr == 0 can't happen"); - if (ntr != (tr2->tlen/2) -1) - Die("# of domains for the two Viterbi implementations are unequal (%d, %d)", - ntr, (tr2->tlen/2) -1); - - for (idx = 0; idx < ntr; idx++) - { - TraceSimpleBounds(tarr[idx], &i1, &i2, &k1, &k2); - - if (i1 != tr2->pos[idx*2 + 1] + 1) - Die("Start positions %d and %d disagree for domain %d\n", - i1, tr2->pos[idx*2 + 1] + 1, idx); - if (i2 != tr2->pos[idx*2 + 2]) - Die("End positions %d and %d disagree for domain %d\n", - i2, tr2->pos[idx*2 + 2], idx); - } - - - for (idx = 0; idx < ntr; idx++) - P7FreeTrace(tarr[idx]); - free(tarr); - FreeSequence(seq, &sqinfo); - P7FreeTrace(tr1); - P7FreeTrace(tr2); - free(dsq); - } - - FreePlan7(hmm); - HMMFileClose(hmmfp); - SeqfileClose(sqfp); - - return EXIT_SUCCESS; -} diff --git a/forester/archive/RIO/others/hmmer/testsuite/titin.fa b/forester/archive/RIO/others/hmmer/testsuite/titin.fa deleted file mode 100644 index 0b0f1dd..0000000 --- a/forester/archive/RIO/others/hmmer/testsuite/titin.fa +++ /dev/null @@ -1,386 +0,0 @@ ->gi|2136280|pir||I38344 titin - human -MTTQAPTFTQPLQSVVVLEGSTATFEAHISGFPVPEVSWFRDGQVISTSTLPGVQISFSDGRAKLTIPAV -TKANSGRYSLKATNGSGQATSTAELLVKAETAPPNFVQRLQSMTVRQGSQVRLQVRVTGIPNPVVKFYRD -GAEIQSSLDFQISQEGDLYSLLIAEAYPEDSGTYSVNATNSVGRATSTAELLVQGEEEVPAKKTKTIVST -AQISESRQTRIEKKIEAHFDARSIATVEMVIDGAAGQQLPHKTPPRIPPKPKSRSPTPPSIAAKAQLARQ -QSPSPIRHSPSPVRHVRAPTPSPVRSVSPAARISTSPIRSVRSPLLMRKTQASTVATGPEVPPPWKQEGY -VASSSEAEMRETTLTTSTQIRTEERWEGRYGVQEQVTISGAAGAAASVSASASYAAEAVATGAKEVKQDA -DKSAAVATVVAAVDMARVREPVISAVEQTAQRTTTTAVHIQPAQEQVRKEAEKTAVTKVVVAADKAKEQE -LKSRTKEIITTKQEQMHVTHEQIRKETEKTFVPKVVISAAKAKEQETRISEEITKKQKQVTQEAIMKETR -KTVVPKVIVATPKVKEQDLVSRGREGITTKREQVQITQEKMRKEAEKTALSTIAVATAKAKEQETILRTR -ETMATRQEQIQVTHGKVDVGKKAEAVATVVAAVDQARVREPREPGHLEESYAQQTTLEYGYKERISAAKV -AEPPQRPASEPHVVPKAVKPRVIQAPSETHIKTTDQKGMHISSQIKKTTDLTTERLVHVDKRPRTASPHF -TVSKISVPKTEHGYEASIAGSAIATLQKELSATSSAQKITKSVKAPTVKPSETRVRAEPTPLPQFPFADT -PDTYKSEAGVEVKKEVGVSITGTTVREERFEVLHGREAKVTETARVPAPVEIPVTPPTLVSGLKNVTVIE -GESVTLECHISGYPSPTVTWYREDYQIESSIDFQITFQSGIARLMIREAFAEDSGRFTCSAVNEAGTVST -SCYLAVQVSEEFEKETTAVTEKFTTEEKRFVESRDVVMTDTSLTEEQAGPGEPAAPYFITKPVVQKLVEG -GSVVFGCQVGGNPKPHVYWKKSGVPLTTGYRYKVSYNKQTGECKLVISMTFADDAGEYTIVVRNKHGETS -ASASLLEEADYELLMKSQQEMLYQTQVTAFVQEPEVGETAPGFVYSEYEKEYEKEQALIRKKMAKDTVVV -RTYVEDQEFHISSFEERLIKEIEYRIIKTTLEELLEEDGEEKMAVDISESEAVESGFDLRIKNYRILEGM -GVTFHCKMSGYPLPKIAWYKDGKRIKHGERYQMDFLQDGRASLRIPVVLPEDEGIYTAFASNIKGNAICS -GKLYVEPAAPLGAPTYIPTLEPVSRIRSLSPRSVSRSPIRMSPARMSPARMSPARMSPARMSPGRRLEET -DESQLERLYKPVFVLKPVSFKCLEGANCRFDLKVVGRPMPETFWFHDGQQIVNDYTHKVVIKEDGTQSLI -IVPATPSDSGEWTVVAQNRAGRSSISVILTVEAVEHQVKPMFVEKLKNVNIKEGSRLEMKVRATGNPNPD -IVWLKNSDIIVPHKYPKIRIEGTKGEAALKIDSTVSQDSAWYTATAINKAGRDTTRCKVNVEVEFAEPEP -ERKLIIPRGTYRAKEIAAPELEPLHLRYGQEQWEEGDLYDKEKQQKPFFKKKLTSLRLKRFGPAHFECRL -TPISDPTMVVEWLHDGKPLEAANRLRMINEFGYCSLDYGVAYSRDSGIITCRATNKYGTDHTSATLIVKD -EKSLVEESQLPEGRKGLQRIEELERMAHEGALTGVTTDQKEKQKPDIVLYPEPVRVLEGETARFRCRVTG -YPQPKVNWYLNGQLIRKSKRFRVRYDGIHYLDIVDCKSYDTGEVKVTAENPEGVIEHKVKLEIQQREDFR -SVLRRAPEPRPEFHVHEPGKLQFEVQKVDRPVDTTETKEVVKLKRAERITHEKVPEESEELRSKFKRRTE -EGYYEAITAVELKSRKKDESYEELLRKTKDELLHWTKELTEEEKKALAEEGKITIPTFKPDKIELSPSME -APKIFERIQSQTVGQGSDAHFRVRVVGKPDPECEWYKNGVKIERSDRIYWYWPEDNVCELVIRDVTAEDS -ASIMVKAINIAGETSSHAFLLVQAKQLITFTQELQDVVAKEKDTMATFECETSEPFVKVKWYKDGMEVHE -GDKYRMHSDRKVHFLSILTIDTSDAEDYSCVLVEDENVKTTAKLIVEGAVVEFVKELQDIEVPESYSGEL -ECIVSPENIEGKWYHNDVELKSNGKYTITSRRGRQNLTVKDVTKEDQGEYSFVIDGKKTTCKLKMKPRPI -AILQGLSDQKVCEGDIVQLEVKVSLESVEGVWMKDGQEVQPSDRVHIVIDKQSHMLLIEDMTKEDAGNYS -FTIPALGLSTSGRVSVYSVDVITPLKDVNVIEGTKAVLECKVSVPDVTSVKWYLNDEQIKPDDRVQAIVK -GTKQRLVINRTHASDEGPYKLIVGRVETNCNLSVEKIKIIRGLRDLTCTETQNVVFEVELSHSGIDVLWN -FKDKEIKPSSKYKIEAHGKIYKLTVLNMMKDDEGKYTFYAGENMTSGKLTVAGGAISKPLTDQTVAESQE -AVFECEVANPDSKGEWLRDGKHLPLTNNIRSESDGHKRRLIIAATKLDDIGEYTYKVATSKTSAKLKVEA -VKIKKTLKNLTVTETQDAVFTVELTHPNVKGVQWIKNGVVLESNEKYAISVKGTIYSLRIKNCAIVDESV -YGFRLGRLGASARLHVETVKIIKKPKDVTALENATVAFEVSVSHDTVPVKWFHKSVEIKPSDKHRLVSER -KVHKLMLQNISPSDAGEYTAVVGQLECKAKLFVETLHITKTMKNIEVPETKTASFECEVSHFNVPSMWLK -NGVEIEMSEKFKIVVQGKLHQLIIMNTSTEDSAEYTFVCGNDQVSATLTVTPIMITSMLKDINAEEKDTI -TFEVTVNYEGISYKWLKNGVEIKSTDKCQMRTKKLTHSLNIRNVHFGDAADYTFVAGKATSTATLYVEAR -HIEFRKHIKDIKVLEKKRAMFECEVSEPDITVQWMKDDQELQITDRIKIQKEKYVHRLLIPSTRMSDAGK -YTVVAGGNVSTAKLFVEGRDVRIRSIKKEVQVIEKQRAVVEFEVNEDDVDAHWYKDGIEINFQVQERHKY -VVERRIHRMFISETRQSDAGEYTFVAGRNRSSVTLYVNAPEPPQVLQELQPVTVQSGKPARFCAMISGRP -QPKISWYKEEQLLSTGFKCKFLHDGQEYTLLLIEAFPEDAAVYTCEAKNDYGVATTSASLSVEVPEVVSP -DQEMPVYPPAIITPLQDTVTSEGQPARFQCRVSGTDLKVSWYSKDKKIKPSRFFRMTQFEDTYQLEIAEA -YPEDEGTYTFVANNAVGQVSSTANLSLEAPESILHERIEQEIEMEMKEFSSSFLSAEEEGLHSAELQLSK -INETLELLSESPVYPTKFDSEKEGTGPIFIKEVSNADISMGDVATLSVTVIGIPKPKIQWFFNGVLLTPS -ADYKFVFDGDDHSLIILFTKLEDEGEYTCMASNDYGKTICSAYLKINSKGEGHKDTETESAVAKSLEKLG -GPCPPHFLKELKPIRCAQGLPAIFEYTVVGEPAPTVTWFKENKQLCTSVYYTIIHNPNGSGTFIVNDPQR -EDSGLYICKAENMLGESTCAAELLVLLEDTDMTDTPCKAKSTPEAPEDFPQTPLKGPAVEALDSEQEIAT -FVKDTILKAALITEENQQLSYEHIAKANELSSQLPLGAQELQSILEQDKLTPESTREFLCINGSIHFQPL -KEPSPNLQLQIVQSQKTFSKEGILMPEEPETQAVLSDTEKIFPSAMSIEQINSLTVEPLKTLLAEPEGNY -PQSSIEPPMHSYLTSVAEEVLSLKEKTVSDTNREQRVTLQKQEAQSALILSQSLAEGHVESLQSPDVMIS -QVNYEPLVPSEHSCTEGGKILIESANPLENAGQDSAVRIEEGKSLRFPLALEEKQVLLKEEHSDNVVMPP -DQIIESKREPVAIKKVQEVQGRDLLSKESLLSGIPEEQRLNLKIQICRALQAAVASEQPGLFSEWLRNIE -KVEVEAVNITQEPRHIMCMYLVTSAKSVTEEVTIIIEDVDPQMANLKMELRDALCAIIYEEIDILTAEGP -RIQQGAKTSLQEEMDSFSGSQKVEPITEPEVESKYLISTEEVSYFNVQSRVKYLDATPVTKGVASAVVSD -EKQDESLKPSEEKEESSSESGTEEVATVKIQEAEGGLIKEDGPMIHTPLVDTVSEEGDIVHLTTSITNAK -EVNWYFENKLVPSDEKFKCLQDQNTYTLVIDKVNTEDHQGEYVCEALNDSGKTATSAKLTVVKRAAPVIK -RKIEPLEVALGHLAKFTCEIQSAPNVRFQWFKAGREIYESDKCSIRSSKYISSLEILRTQVVDCGEYTCK -ASNEYGSVSCTATLTVTVPGGEKKVRKLLPERKPEPKEEVVLKSVLRKRPEEEEPKVEPKKLEKVKKPAV -PEPPPPKPVEEVEVPTVTKRERKIPEPTKVPEIKPAIPLPAPEPKPKPEAEVKTIKPPPVEPEPTPIAAP -VTVPVVGKKAEAKAPKEEAAKPKGPIKGVPKKTPSPIEAERRKLRPGSGGEKPPDEAPFTYQLKAVPLKF -VKEIKDIILTESEFVGSSAIFECLVSPSTAITTWMKDGSNIRESPKHRFIADGKDRKLHIIDVQLSDAGE -YTCVLRLGNKEKTSTAKLVVEELPVRFVKTLEEEVTVVKGQPLYLSCELNKERDVVWRKDGKIVVEKPGR -IVPGVIGLMRALTINDADDTDAGTYTVTVENANNLECSSCVKVVEVIRDWLVKPIRDQHVKPKGTAIFAC -DIAKDTPNIKWFKGYDEIPAEPNDKTEILRDGNHLYLKIKNAMPEDIAEYAVEIEGKRYPAKLTLGEREV -ELLKPIEDVTIYEKESASFDAEISEADIPGQWKLKGELLRPSPTCEIKAEGGKRFLTLHKVKLDQAGEVL -YQALNAITTAILTVKEIELDFAVPLKDVTVPERRQARFECVLTREANVIWSKGPDIIKSSDKFDIIADGK -KHILVINDSQFDDEGVYTAEVEGKKTSARLFVTGIRLKFMSPLEDQTVKEGETATFVCELSHEKMHVVWF -KNDAKLHTSRTVLISSEGKTHKLEMKEVTLDDISQIKAQVKELSSTAQLKVLEADPYFTVKLHDKTAVEK -DEITLKCEVSKDVPVKWFKDGEEIVPSPKYSIKADGLRRILKIKKADLKDKGEYVCDCGTDKTKANVTVE -ARLIEVEKPLYGVEVFVGETAHFEIELSEPDVHGQWKLKGQPLTASPDCEIIEDGKKHILILHNCQLGMT -GEVSFQAANAKSAANLKVKELPLIFITPLSDVKVFEKDEAKFECEVSREPKTFRWLKGTQEITGDDRFEL -IKDGTKHSMVIKSAAFEDEAKYMFEAEDKHTSGKLIIEGIRLKFLTPLKDVTAKEKESAVFTVELSHDNI -RVKWFKNDQRLHTTRSVSMQDEGKTHSITFKDLSIDDTSQIRVEAMGMSSEAKLTVLEGDPYFTGKLQDY -TGVEKDEVILQCEISKADAPVKWFKDGKEIKPSKNAVIKTDGKKRMLILKKALKSDIGQYTCDCGTDKTS -GKLDIEDREIKLVRPLHSVEVMETETARFETEISEDDIHANWKLKGEALLQTPDCEIKEEGKIHSLVLHN -CRLDQTGGVDFQAANVKSSAHLRVKPRVIGLLRPLKDVTVTAGETATFDCELSYEDIPVEWYLKGKKLEP -SDKVVPRSEGKVHTLTLRDVKLEDAGEVQLTAKDFKTHANLFVKEPPVEFTKPLEDQTVEEGATAVLECE -VSRENAKVKWFKNGTEILKSKKYEIVADGRVRKLVIHDCTPEDIKTYTCDAKDFKTSCNLNVVPPHVEFL -RPLTDLQVREKEMARFECELSRENAKVKWFKDGAEIKKGKKYDIISKGAVRILVINKCLLDDEAEYSCEV -RTARTSGMLTVLEEEAVFTKNLANIEVSETDTIKLVCEVSKPGAEVIWYKGDEEIIETGRYEILTEGRKR -ILVIQNAHLEDAGNYNCRLPSSRTDGKVKVHELAAEFISKPQNLEILEGEKAEFVCSISKESFPVQWKRD -DKTLESGDKYDVIADGKKRVLVVKDATLQDMGTYVVMVGAARAAAHLTVIEKLRIVVPLKDTRVKEQQEV -VFNCEVNTEGAKAKWFRNEEAIFDSSKYIILQKDLVYTLRIRDAHLDDQANYNVSLTNHRGENVKSAANL -IVEEEDLRIVEPLKDIETMEKKSVTFWCKVNRLNVTLKWTKNGEEVPFDNRVSYRVDKYKHMLTIKDCGF -PDEGEYIVTAGQDKSVAELLIIEAPTEFVEHLEDQTVTEFDDAVFSCQLSREKANVKWYRNGREIKEGKK -YKFEKDGSIHRLIIKDCRLDDECEYACGVEDRKSRARLFVEEIPVEIIRPPQDILEAPGADVVFLAELNK -DKVEVQWLRNNMVVVQGDKHQMMSEGKIHRLQICDIKPRDQGEYRFIAKDKEARAKLELAAAPKIKTADQ -DLVVDVGKPLTMVVPYDAYPKAEAEWFKENEPLSTKTIDTTAEQTSFRILEAKKGDKGRYKIVLQNKHGK -AEGFINLKVIDVPGPVRNLEVTETFDGEVSLAWEEPLTDGGSKIIGYVVERRDIKRKTWVLATDRAESCE -FTVTGLQKGGVEYLFRVSARNRVGTGEPVETDNPVEARSKYDVPGPPLNVTITDVNRFGVSLTWEPPEYD -GGAEITNYVIELRDKTSIRWDTAMTVRAEDLSATVTDVVEGQEYSFRVRAQNRIGVGKPSAATPFVKVAD -PIERPSPPVNLTSSDQTQSSVQLKWEPPLKDGGSPILGYIIERCEEGKDNWIRCNMKLVPELTYKVTGLE -KGNKYLYRVSAENKAGVSDPSEILGPLTADDAFVEPTMDLSAFKDGLEVIVPNPITILVPSTGYPRPTAT -WCFGDKVLETGDRVKMKTLSAYAELVISPSERSDKGIYTLKLENRVKTISGEIDVNVIARPSAPKELKFG -DITKDSVHLTWEPPDDDGGSPLTGYVVEKREVSRKTWTKVMDFVTDLEFTVPDLVQGKEYLFKVCARNKC -GPGEPAYVDEPVNMSTPATVPDPPENVKWRDRTANSIFLTWDPPKNDGGSRIKGYIVERCPRGSDKWVAC -GEPVAETKMEVTGLEEGKWYAYRVKTLNRQGASKPSRPTEEIQAVDTQEAPEIFLDVKLLAGLTVKAGTK -IELPATVTGKPEPKITWTKADMILKQDKRITIENVPKKSTVTIVDSKRSDTGTYIIEAVNVCGRATAVVE -VNVLDKPGPPAAFDITDVTNESCLLTWNPPRDDGGSKITNYVVERRATDSEVWHKLSSTVKDTNFKATKL -IPNKEYIFRVAAENMYGAGEPVQASPITAKYQFDPPGPPTRLEPSDITKDAVTLTWCEPDDDGGSPITGY -WVERLDPDTDKWVRCNKMPVKDTTYRVKGLTNKKKYRFRVLAENLAGPGKPSKSTEPILIKDPIDPPWPP -GKPTVKDVGKTSVRLNWTKPEHDGGAKIESYVIEMLKTGTDEWVRVAEGVPTTQHLLPGLMEGQEYSFRV -RAVNKAGESEPSEPSDPVLCREKLYPPSPPRWLEVINITKNTADLKWTVPEKDGGSPITNYIVEKRDVRR -KGWQTVDTTVKDTKCTVTPLTEGSLYVFRVAAENAIGQSDYTEIEDSVLAKDTFTTPGPPYALAVVDVTK -RHVDLKWEPPKNDGGRPIQRYVIEKKERLGTRWVKAGKTAGPDCNFRVTDVIEGTEVQFQVRAENEAGVG -HPSEPTEILSIEDPTSPPSPPLDLHVTDAGRKHIAIAWKPPEKNGGSPIIGYHVEMCPVGTEKWMRVNSR -PIKDLKFKVEEGVVPDKEYVLRVRAVNAIGVSEPSEISENVVAKDPDCKPTIDLETHDIIVIEGEKLSIP -VPFRAVPVPTVSWHKDGKEVKASDRLTMKNDHISAHLEVPKSVRADAGIYTITLENKLGSATASINVKVI -GLPGPCKDIKASDITKSSCKLTWEPPEFDGGTPILHYVLERREAGRRTYIPVMSGENKLSWTVKDLIPNG -EYFFRVKAVNKVGGGEYIELKNPVIAQDPKQPPDPPVDVEVHNPTAEAMTITWKPPLYDGGSKIMGYIIE -KIAKGEERWKRCNEHLVPILTYTAKGLEEGKEYQFRVRAENAAGISEPSRATPPTKAVDPIDAPKVILRT -SLEVKRGDEIALDASISGSPYPTITWIKDENVIVPEEIKKRAAPLVRRRKGEVQEEEPFVLPLTQRLSID -NSKKGESQLRVRDSLRPDHGLYMIKVENDHGIAKAPCTVSVLDTPGPPINFVFEDIRKTSVLCKWEPPLD -DGGSEIINYTLEKKDKTKPDSEWIVVTSTLRHCKYSVTKLIEGKEYLFRVRAENRFGPGPPCVSKPLVAK -DPFGPPDAPDKPIVEDVTSNSMLVKWNEPKDNGSPILGYWLEKREVNSTHWSRVNKSLLNALKANVDGLL -EGLTYVFRVCAENAAGPGKFSPPSDPKTAHDPISPPGPPIPRVTDTSSTTIELEWEPPAFNGGGEIVGYF -VDKQLVGTNKWSRCTEKMIKVRQYTVKEIREGADYKLRVSAVNAAGEGPPGETQPVTVAEPQEPPAVELD -VSVKGGIQIMAGKTLRIPAVVTGRPVPTKVWTKEEGELDKDRVVIDNVGTKSELIIKDALRKDHGRYVIT -ATNSCGSKFAAARVEVFDVPGPVLDLKPVVTNRKMCLLNWSDPEDDGGSEITGFIIERKDAKMHTWRQPI -ETERSKCDITGLLEGQEYKFRVIAKNKFGCGPPVEIGPILAVDPLGPPTSPERLTYTERQRSTITLDWKE -PRSNGGSPIQGYIIEKRRHDKPDFERVNKRLCPTTSFLVENLDEHQMYEFRVKAVNEIGESEPSLPLNVV -IQDDEVPPTIKLRLSVRGDTIKVKAGEPVHIPADVTGLPMPKIEWSKNETVIEKPTDALQITKEEVSRSE -AKTELSIPKAVREDKGTYTVTASNRLGSVFRNVHVEVYDRPSPPRNLAVTDIKAESCYLTWDAPLDNGGS -EITHYVIDKRDASRKKAEWEEVTNTAVEKRYGIWKLIPNGQYEFRVRAVNKYGISDECKSDKVVIQDPYR -LPGPPGKPKVLARTKGSMLVSWTPPLDNGGSPITGYWLEKREEGSPYWSRVSRAPITKVGLKGVEFNVPR -LLEGVKYQFRAMAINAAGIGPPSEPSDPEVAGDPIFPPGPPSCPEVKDKTKSSISLGWKPPAKDGGSPIK -GYIVEMQEEGTTDWKRVNEPDKLITTCECVVPNLKELRKYRFRVKAVNEAGESEPSDTTGEIPATDIQEE -PEVFIDIGAQDCLVCKAGSQIRIPAVIKGRPTPKSSWEFDGKAKKAMKDGVHDIPEDAQLETAENSSVII -IPECKRSHTGKYSITAKNKAGQKTANCRVKVMDVPGPPKDLKVSDITRGSCRLSWKMPDDDGGDRIKGYV -IEKRTIDGKAWTKVNPDCGSTTFVVPDLLSEQQYFFRVRAENRFGIGPPVETIQRTTARDPIYPPDPPIK -LKIGLITKNTVHLSWKPPKNDGGSPVTHYIVECLAWDPTGTKKEAWRQCNKRDVEELQFTVEDLVEGGEY -EFRVKAVNAAGVSKPSATVGPCDCQRPDMPPSIDLKEFMEVEEGTNVNIVAKIKGVPFPTLTWFKAPPKK -PDNKEPVLYDTHVNKLVVDDTCTLVIPQSRRSDTGLYTITAVNNLGTASKEMRLNVLGRPGPPVGPIKFE -SVSADQMTLSWFPPKDDGGSKITNYVIEKREANRKTWVHVSSEPKECTYTIPKLLEGHEYVFRIMAQNKY -GIGEPLDSEPETARNLFSVPGAPDKPTVSSVTRNSMTVNWEEPEYDGGSPVTGYWLEMKDTTSKRWKRVN -RDPIKAMTLGVSYKVTGLIEGSDYQFRVYAINAAGVGPASLPSDPATARDPIAPPGPPFPKVTDWTKSSA -DLEWSPPLKDGGSKVTGYIVEYKEEGKEEWEKGKDKEVRGTKLVVTGLKEGAFYKFRVSAVNIAGIGEPG -EVTDVIEMKDRLVSPDLQLDASVRDRIVVHAGGVIRIIAYVSGKPPPTVTWNMNERTLPQEATIETTAIS -SSMVIKNCQRSHQGVYSLLAKNEAGERKKTIIVDVLDVPGPVGTPFLAHNLTNESCKLTWFSPEDDGGSP -ITNYVIEKRESDRRAWTPVTYTVTRQNATVQGLIQGKAYFFRIAAENSIGMGPFVETSEALVIREPITVP -ERPEDLEVKEVTKNTVTLTWNPPKYDGGSEIINYVLESRLIGTEKFHKVTNDNLLSRKYTVKGLKEGDTY -EYRVSAVNIVGQGKPSFCTKPITCKDELAPPTLHLDFRDKLTIRVGEAFALTGRYSGKPKPKVSWFKDEA -DVLEDDRTHIKTTPATLALEKIKAKRSDSGKYCVVVENSTGSRKGFCQVNVVDHPGPPVGPVSFDEVTKD -YMVISWKPPLDDGGSKITNYIIEKKEVGKDVWMPVTSASAKTTCKVSKLLEGKDYIFRIHAENLYGISDP -LVSDSMKAKDRFRVPDAPDQPIVTEVTKDSALVTWNKPHDGGKPITNYILEKRETMSKRWARVTKDPIHP -YTKFRVPDLLEGCQYEFRVSAENEIGIGDPSPPSKPVFAKDPIAKPSPPVNPEAIDTTCNSVDLTWQPPR -HDGGSKILGYIVEYQKVGDEEWRRANHTPESCPETKYKVTGLRDGQTYKFRVLAVNAAGESDPAHVPEPV -LVKDRLEPPELILDANMAREQHIKVGDTLRLSAIIKGVPFPKVTWKKEDRDAPTKARIDVTPVGSKLEIR -NAAHEDGGIYSLTVENPAGSKTVSVKVLVLDKPGPPRDLEVSEIRKDSCYLTWKEPLDDGGSVITNYVVE -RRDVASAQWSPLSATSKKKSHFAKHLNEGNQYLFRVAAENQYGRGPFVETPKPIKALDPLHPPGPPKDLH -HVDVDKTEVSLVWNKPDRDGGSPITGYLVEYQEEGTQDWIKFKTVTNLECVVTGLQQGKTYRFRVKAENI -VGLGLPDTTIPIECQEKLVPPSVELDVKLIEGLVVKAGTTVRFPAIIRGVPVPTAKWTTDGSEIKTDEHY -TVETDNFSSVLTIKNCLRRDTGEYQITVSNAAGSKTVAVHLTVLDVPGPPTGPINILDVTPEHMTISWQP -PKDDGGSPVINYIVEKQDTRKDTWGVVSSGSSKTKLKIPHLQKGCEYVFRVRAENKIGVGPPLDSTPTVA -KHKFSPPSPPGKPVVTDITENAATVSWTLPKSDGGSPITGYYMERREVTGKWVRVNKTPIADLKFRVTGL -YEGNTYEFRVFAENLAGLSKPSPSSDPIKACRPIKPPGPPINPKLKDKSRETADLVWTKPLSDGGSPILG -YVVECQKPGTAQWNRINKDELIRQCAFRVPGLIEGNEYRFRIKAANIVGEGEPRELAESVIAKDILHPPE -VELDVTCRDVITVRVGQTIRILARVKGRPEPDITWTKEGKVLVREKRVDLIQDLPRVELQIKEAVRADHG -KYIISAKNSSGHAQGSAIVNVLDRPGPCQNLKVTNVTKENCTISWENPLDNGGSEITNFIVEYRKPNQKG -WSIVASDVTKRLIKANLLANNEYYFRVCAENKVGVGPTIETKTPILAINPIDRPGEPENLHIADKGKTFV -YLKWRRPDYDGGSPNLSYHVERRLKGSDDWERVHKGSIKETHYMVDRCVENQIYEFRVQTKNEGGESDWV -KTEEVVVKEDLQKPVLDLKLSGVLTVKAGDTIRLEAGVRGKPFPEVAWTKDKDATDLTRSPRVKIDTRAD -SSKFSLTKAKRSDGGKYVVTATNTAGSFVAYATVNVLDKPGPVRNLKIVDVSSDRCTVCWDPPEDDGGCE -IQNYILEKCETKRMVWSTYSATVLTPGTTVTRLIEGNEYIFRVRAENKIGTGPPTESKPVIAKTKYDKPG -RPDPPEVTKVSKEEMTVVWNPPEYDGGKSITGYFLEKKEKHSTRWVPVNKSAIPERRMKVQNLLPDHEYQ -FRVKAENEIGIGEPSLPSRPVVAKDPIEPPGPPTNFRVVDTTKHSITLGWGKPVYDGGAPIIGYVVEMRP -KIADASPDEGWKRCNAAAQLVRKEFTVTSLDENQEYEFRVCAQNQVGIGRPAELKEAIKPKEILEPPEID -LDASMRKLVIVRAGCPIRLFAIVRGRPAPKVTWRKVGIDNVVRKGQVDLVDTMAFLVIPNSTRDDSGKYS -LTLVNPAGEKAVFVNVRVLDTPGPVSDLKVSDVTKTSCHVSWAPPENDGGSQVTHYIVEKREADRKTWST -VTPEVKKTSFHVTNLVPGNEYYFRVTAVNEYGPGVPTDVPKPVLASDPLSEPDPPRKLEATEMTKNSATL -AWLPPLRDGGAKIDGYIISYREEEQPADRWTEYSVVKDLSLVVTGLKEGKKYKFRVAARNAVGVSLPREA -EGVYEAKEQLLPPKILMPEQITIKAGKKLRIEAHVYGKPHPTCKWKKGEDEVVTSSHLAVHKADSSSILI -IKDVTRKDSGYYSLTAENSSGTDTQKIKVVVMDAPGPPQPPFDISDIDADACSLSWHIPLEDGGSNITNY -IVEKCDVSRGDWVTALASVTKTSCRVGKLIPGQEYIFRVRAENRFGISEPLTSPKMVAQFPFGVPSEPKN -ARVTKVNKDCIFVAWDRPDSDGGSPIIGYLIERKERNSLLWVKANDTLVRSTEYPCAGLVEGLEYSFRIY -ALNKAGSSPPSKPTEYVTARMPVDPPGKPEVIDVTKSTVSLIWARPKHDGGSKIIGYFVEACKLPGDKWV -RCNTAPHQIPQEEYTATGLEEKAQYQFRAIARTAVNISPPSEPSDPVTILAENVPPRIDLSVAMKSLLTV -KAGTNVCLDATVFGKPMPTVSWKKDGTLLKPAEGIKMAMQRNLCTLELFSVNRKDSGDYTITAENSSGSK -SATIKLKVLDKPGPPASVKINKMYSDRAMLSWEPPLEDGGSEITNYIVDKRETSRPNWAQVSATVPITSC -SVEKLIEGHEYQFRICAENKYGVGDPVFTEPAIAKNPYDPPGRCDPPVISNITKDHMTVSWKPPADDGGS -PITGYLLEKRETQAVNWTKVNRKPIIERTLKATGLQEGTEYEFRVTAINKAGPGKPSDASKAAYARDPQY -PPAPPAFPKVYDTTRSSVSLSWGKPAYDGGSPIIGYLVEVKRADSDNWVRCNLPQNLQKTRFEVTGLMED -TQYQFRVYAVNKIGYSDPSDVPDKHYPKDILIPPEGEHDADLRKTLILRAGVTMRLYVPVKGRPPPKITW -SKPNVNLRDRIGLDIKSTDFDTFLRCENVNKYDAGKYILTLENSCGKKEYTIVVKVLDTPGPPINVTVKE -ISKDSAYVTWEPPIIDGGSPIINYVVQKRDAERKSWSTVTTECSKTSFRVPNLEEGKSYFFRVFAENEYG -IGDPGETRDAVKASQTPGPVVDLKVRSVSKSSCSIGWKKPHSDGGSRIIGYVVDFLTEENKWQRVMKSLS -LQYSAKDLTEGKEYTFRVSAENENGEGTPSEITVVARDDVVAPDLDLKGLPDLCYLAKENSNFRLKIPIK -GKPAPSVSWKKGEDPLATDTRVSVESSAVNTTLIVYDCQKSDAGKYTITLKNVAGTKEGTISIKVVGKPG -IPTGPIKFDEVTAEAMTLKWAPPKDDGGSEITNYILEKRDSVNNKWVTCASAVQKTTFRVTRLHEGMEYT -FRVSAENKYGVGEGLKSEPIVARHPFDVPDAPPPPNIVDVRHDSVSLTWTDPKKTGGSPITGYHLEFKER -NSLLWKRANKTPIRMRDFKVTGLTEGLEYEFRVMAINLAGVGKPSLPSEPVVALDPIDPPGKPEVINITR -NSVTLIWTEPKYDGGHKLTGYIVEKRDLPSKSWMKANHVNVPECAFTVTDLVEGGKYEFRIRAKNTAGAI -SAPSESTETIICKDEYEAPTIVLDPTIKDGLTIKAGDTIVLNAISILGKPLPKSSWSKAGKDIRPSDITQ -ITSTPTSSMLTIKYATRKDAGEYTITATNPFGTKVEHVKVTVLDVPGPPGPVEISNVSAEKATLTWTPPL -EDGGSPIKSYILEKRETSRLLWTVVSEDIQSCRHVATKLIQGNEYIFRVSAVNHYGKGEPVQSEPVKMVD -RFGPPGPPEKPEVSNVTKNTATVSWKRPVDDGGSEITGYHVERREKKSLRWVRAIKTPVSDLRCKVTGLQ -EGSTYEFRVSAENRAGIGPPSEASDSVLMKDAAYPPGPPSNPHVTDTTKKSASLAWGKPHYDGGLEITGY -VVEHQKVGDEAWIKDTTGTALRITQFVVPDLQTKEKYNFRISAINDAGVGEPAVIPDVEIVEREMAPDFE -LDAELRRTLVVRAGLSIRIFVPIKGRPAPEVTWTKDNINLKNRANIENTESFTLLIIPECNRYDTGKFVM -TIENPAGKKSGFVNVRVLDTARPSPQLRPTDITKDSVTLHWDLPLIDGGSRITNYIVEKREATRKSYSTA -TTKCHKCTYKVTGLSEGCEYFFRVMAENEYGIGEPTETTEPVKASEAPSPPDSLNIMDITKSTVSLAWPK -PKHDGGSKITGYVIEAQRKGSDQWTHITTVKGLECVVRNLTEGEEYTFQVMAVNSAGRSAPRESRPVIVK -EQTMLPELDLRGIYQKLVIAKAGDNIKVEIPVLGRPKPTVTWKKGDQILKQTQRVNFETTATSTILNINE -CVRSDSGPYPLTARNIVGEVGDVITIQVHDIPGPPTGPIKFDEVSSDFVTFSWDPPENDGGVPISNYVVE -MRQTDSTTWVELATTVIRTTYKATRLTTGLEYQFRVKAQNRYGVGPGITSAWIVANYPFKVPGPPGTPQV -TAVTKDSMTISWHEPLSDGGSPILGYHVERKERNGILWQTVSKALVPGNIFKSSGLTDGIAYEFRVIAEN -MAGKSKPSKPSEPMLALDPIDPPGKPVPLNITRHTVTLKWAKPEYTGGFKITSYIVEKRDLPNGRWLKAN -FSNILENEFTVSGLTEDAAYEFRVIAKNAAGAISPPSEPSDAITCRDDVEAPKIKVDVKFKDTVILKAGE -AFRLEADVSGRPPPTMEWSKDGKELEGTAKLEIKIADFSTNLVNKDSTRRDSGAYTLTATNPGGFAKHIF -NVKVLDRPGPPEGPLAVTEVTSEKCVLSWFPPLDDGGAKIDHYIVQKRETSRLAWTNVASEVQVTKLKVT -KLLKGNEYIFRVMAVNKYGVGEPLESEPVLAVNPYGPPDPPKNPEVTTITKDSMVVCWGHPDSDGGSEII -NYIVERRDKAGQRWIKCNKKTLTDLRYKVSGLTEGHEYEFRIMAENAAGISAPSPTSPFYKACDTVFKPG -PPGNPRVLDTSRSSISIAWNKPIYDGGSEITGYMVEIALPEEDEWQIVTPPAGLKATSYTITGLTENQEY -KIRIYAMNSEGLGEPALVPGTPKAEDRMLPPEIELDADLRKVVTIRACCTLRLFVPIKGRPDPEVKWARD -HGESLDKASIESASSYTLLIVGNVNRFDSGKYILTVENSSGSKSAFVNVRVLDTPGPPQDLKVKEVTKTS -VTLTWDPPLLDGGSKIKNYIVEKRESTRKAYSTVATNCHKTSWKVDQLQEGCSYYFRVLAENEYGIGLPA -ETAESVKASERPLPPGKITLMDVTRNSVSLSWEKPEHDGGSRILGYIVEMQTKGSDKWATCATVKVTEAT -ITGLIQGEEYSFRVSAQNEKGISDPRQLSVPVIAKDLVIPPAFKLLFNTFTVLAGEDLKVDVPFIGRPTP -AVTWHKDNVPLKQTTRVNAESTENNSLLTIKDACREDVGHYVVKLTNSAGEAIETLNVIVLDKPGPPTGP -VKMDEVTADSITLSWGPPKYDGGSSINNYIVEKRDTSTTTWQIVSATVARTTIKACRLKTGCEYQFRIAA -ENRYGKSTYLNSEPTVAQYPFKVPGPPGTPVVTLSSRDSMEVQWNEPISDGGSRVIGYHLERKERNSILW -VKLNKTPIPQTKFKTTGLEEGVEYEFRVSAENIVGIGKPSKVSECYVARDPCDPPGRPEAIIVTRNSVTL -QWKKPTYDGGSKITGYIVEKKELPEGRWMKASFTNIIDTHFEVTGLVEDHRYEFRVIARNAAGVFSEPSE -STGAITARDEVDPPRISMDPKYKDTIVVHAGESFKVDADIYGKPIPTIQWIKGDQELSNTARLEIKSTDF -ATSLSVKDAVRVDSGNYILKAKNVAGERSVTVNVKVLDRPGPPEGPVVISGVTAEKCTLAWKPPLQDGGS -DIINYIVERRETSRLVWTVVDANVQTLSCKVTKLLEGNEYTFRIMAVNKYGVGEPLESEPVVAKNPFVVP -DAPKAPEVTTVTKDSMIVVWERPASDGGSEILGYVLEKRDKEGIRWTRCHKRLIGELRLRVTGLIENHDY -EFRVSAENAAGLSEPSPPSAYQKACDPIYKPGPPNNPKVIDITRSSVFLSWSKPIYDGGCEIQGYIVEKC -DVNVGEWTMCTPPTGINKTNIEVEKLLEKHEYNFRICAINKAGVGEHADVPGPIIVEEKLEAPDIDLDLE -LRKIINIRAGGSLRLFVPIKGRPTPEVKWGKVDGEIRDAAIIDVTSSFTSLVLDNVNRYDSGKYTLTLEN -SSGTKSAFVTVRVLDTPSPPVNLKVTEITKDSVSITWEPPLLDGGSKIKNYIVEKREATRKSYAAVVTNC -HKNSWKIDQLQEGCSYYFRVTAENEYGIGLPAQTADPIKVAEVPQPPGKITVDDVTRNSVSLSWTKPEHD -GGSKIIQYIVEMQAKHSEKWSECARVKSLQAVITNLTQGEEYLFRVVAVNEKGRSDPRSLAVPIVAKDLV -IEPDVKPAFSSYSVQVGQDLKMEVPISGRPKPTITWTKDGLPLKQTTRINVTDSLDLTTLSIKETHKDDG -GQYGITVANVVGQKTASIEIVTLDKPDPPKGPVKFDDVSAESITLSWNPPLYTGGCQITNYIVQKRDTTT -TVWDVVSATVARTTLKVTKLKTGTEYQFRIFAENRYGQSFALESDPIVAQYPYKEPGPPGTPFATAISKD -SMVIQWHEPVNNGGSPVIGYHLERKERNSILWTKVNKTIIHDTQFKAQNLEEGIEYEFRVYAENIVGVGK -ASKNSECYVARDPCDPPGTPEPIMVKRNEITLQWTKPVYDGGSMITGYIVEKRDLPDGRWMKASFTNVIE -TQFTVSGLTEDQRYEFRVIAKNAAGAISKPSDSTGPITAKDEVELPRISMDPKFRDTIVVNAGETFRLEA -DVHGKPLPTIEWLRGDKEIEESARCEIKNTDFKALLIVKDAIRIDGGQYILRASNVAGSKSFPVNVKVLD -RPGPPEGPVQVTGVTSEKCSLTWSPPLQDGGSDISHYVVEKRETSRLAWTVVASEVVTNSLKVTKLLEGN -EYVFRIMAVNKYGVGEPLESAPVLMKNPFVLPGPPKSLEVTNIAKDSMTVCWNRPDSDGGSEIIGYIVEK -RDRSGIRWIKCNKRRITDLRLRVTGLTEDHEYEFRVSAENAAGVGEPSPATVYYKACDPVFKPGPPTNAH -IVDTTKNSITLAWGKPIYDGGSEILGYVVEICKADEEEWQIVTPQTGLRVTRFEISKLTEHQEYKIRVCA -LNKVGLGEATSVPGTVKPEDKLEAPELDLDSELRKGIVVRAGGSARIHIPFKGRPMPEITWSREEGEFTD -KVQIEKGVNYTQLSIDNCDRNDAGKYILKLENSSGSKSAFVTVKVLDTPGPPQNLAVKEVRKDSAFLVWE -PPIIDGGAKVKNYVIDKRESTRKAYANVSSKCSKTSFKVENLTEGAIYYFRVMAENEFGVGVPVETVDAV -KAAEPPSPPGKVTLTDVSQTSASLMWEKPEHDGGSRVLGYVVEMQPKGTEKWSIVAESKVCNAVVTGLSS -GQEYQFRVKAYNEKGKSDPRVLGVPVIAKDLTIQPSLKLPFNTYSIQAGEDLKIEIPVIGRPRPNISWVK -DGEPLKQTTRVNVEETATSTVLHIKEGNKDDFGKYTVTATNSAGTATENLSVIVLEKPGPPVGPVRFDEV -SADFVVISWEPPAYTGGCQISNYIVEKRDTTTTTWHMVSATVARTTIKITKLKTGTEYQFRIFAENRYGK -SAPLDSKAVIVQYPFKEPGPPGTPFVTSISKDQMLVQWHEPVNDGGTKIIGYHLEQKEKNSILWVKLNKT -PIQDTKFKTTGLDEGLEYEFKVSAENIVGIGKPSKVSECFVARDPCDPPGRPEAIVITRNNVTLKWKKPA -YDGGSKITGYIVEKKDLPDGRWMKASFTNVLETEFTVSGLVEDQRYEFRVIARNAAGNFSEPSDSSGAIT -ARDEIDAPNASLDPKYKDVIVVHAGETFVLEADIRGKPIPDVVWSKDGKELEETAARMEIKSTIQKTTLV -VKDCIRTDGGQYILKLSNVGGTKSIPITVKVLDRPGSPEGPLKVTGVTAEKCYLAWNPPLQDGGANISHY -IIEKRETSRLSWTQVSTEVQALNYKVTKLLPGNEYIFRVMAVNKYGIGEPLESGPVTACNPYKPPGPPST -PEVSAITKDSMVVTWARPVDDGGTEIEGYILEKRDKEGVRWTKCNKKTLTDLRLRVTGLTEGHSYEFRVA -AENAAGVGEPSEPSVFYRACDALYPPGPPSNPKVTDTSRSSVSLAWSKPIYDGGAPVKGYVVEVKEAAAD -EWTTCTPPTGLQGKQFTVTKLKENTEYNFRICAINSEGVGEPATLPGSVVAQERIEPPEIELDADLRKVV -VLRASATLRLFVTIKGRPEPEVKWEKAEGILTDRAQIEVTSSFTMLVIDNVTRFDSGRYNLTLENNSGSK -TAFVNVRVLDSPSAPVNLTIREVKKDSVTLSWEPPLIDGGAKITNYIVEKRETTRKAYATITNNCTKTTF -RIENLQEGCSYYFRVLASNEYGIGLPAETTEPVKVSEPPLPPGRVTLVDVTRNTATIKWEKPESDGGSKI -TGYVVEMQTKGSEKWSTCTQVKTLEATISGLTAGEEYVFRVAAVNEKGRSDPRQLGVPVIARDIEIKPSV -ELPFHTFNVKAREQLKIDVPFKGRPQATVNWRKDGQTLKETTRVNVSSSKTVTSLSIKEASKEDVGTYEL -CVSNSAGSITVPITIIVLDRPGPPGPIRIDEVSCDSITISWNPPEYDGGCQISNYIVEKKETTSTTWHIV -SQAVARTSIKIVRLTTGSEYQFRVCAENRYGKSSYSESSAVVAEYPFSPPGPPGTPKVVHATKSTMLVTW -QVPVNDGGSRVIGYHLEYKERSSILWSKANKILIADTQVKVSGLDEGLMYEYRVYAENIAGIGKCSKSCE -PVPARDPCDPPGQPEVTNITRKSVSLKWSKPHYDGGAKITGYIVERRELPDGRWLKCNYTNIQETYFEVT -ELTEDQRYEFRVFARNAADSVSEPSESTGPIIVKDDVEPPRVMMDVKFRDVIVVKAGEVLKINADIAGRP -LPVISWAKDGIEIEERARTEIISTDNHTLLTVKDCIRRDTGQYVLTLKNVAGTRSVAVNCKVLDKPGPPA -GPLEINGLTAEKCSLSWGRPQEDGGADIDYYHRKKRETSHLAWTICEGELQMTSCKVTKLLKGNEYIFRV -TGVNKYGVGEPLESVAIKALDPFTVPSPPTSLEITSVTKESMTLCWSRPESDGGSEISGYIIERREKNSL -RWVRVNKKPVYDLRVKSTGLREGCEYEYRVYAENAAGLSLPSETSPLIRAEDPVFLPSPPSKPKIVDSGK -TTITIAWVKPLFDGGAPITGYTVEYKKSDDTDWKTSIQSLRGTEYTISGLTTGAEYVFRVKSVNKVGASD -PSDSSDPQIAKEREEEPLFDIDSEMRKTLIVKAGASFTMTVPFRGRPVPNVLWSKPDTDLRTRAYVDTTD -SRTSLTIENANRNDSGKYTLTIQNVLSAASLTLVVKVLDTPGPPTNITVQDVTKESAVLSWDVPENDGGA -PVKNYHIEKREASKKAWVSVTNNCNRLSYKVTNLQEGAIYYFRVSGENEFGVGIPAETKEGVKITEKPSP -PEKLGVTSISKDSVSLTWLKPEHDGGSRIVHYVVEALEKGQKNWVKCAVAKSTHHVVSGLRENSEYFFRV -FAENQAGLSDPRELLLPVLIKEQLEPPEIDMKNFPSHTVYVRAGSNLKVDIPISGKPLPKVTLSRDGVPL -KATMRFNTEITAENLTINLKESVTADAGRYEITAANSSGTTKAFINIVVLDRPGPPTGPVVISDITEESV -TLKWEPPKYDGGSQVTNYILLKRETSTAVWTEVSATVARTMMKVMKLTTGEEYQFRIKAENRFGISDHID -SACVTVKLPYTTPGPPSTPWVTNVTRESITVGWHEPVSNGGSAVVGYHLEMKDRNSILWQKANKLVIRTT -HFKVTTISAGLIYEFRVYAENAAGVGKPSHPSEPVLAIDACEPPRNVRITDISKNSVSLSWQQPAFDGGS -KITGYIVERRDLPDGRWTKASFTNVTETQFTISGLTQNSQYEFRVFARNAVGSISNPSEVVGPITCIDSY -GGPVIDLPLEYTEVVKYRAGTSVKLRAGISGKPAPTIEWYKDDKELQTNALVCVENTTDLASILIKDADR -LNSGCYELKLRNAMASASATIRVQILDKPGPPGGPIEFKTVTAEKITLLWRPPADDGGAKITHYIVEKRE -TSRVVWSMVSEHLEECIITTTKIIKGNEYIFRVRAVNKYGIGEPLESDSVVAKNAFVTPGPPGIPEVTKI -TKNSMTVVWSRPIADGGSDISGYFLEKRDKKSLGWFKVLKETIRDTRQKVTGLTENSDYQYRVCAVNAAG -QGPFSEPSEFYKAADPIDPPGPPAKIRIADSTKSSITLGWSKPVYDGGSAVTGYVVEIRQGEEEEWTTVS -TKGEVRTTEYVVSNLKPGVNYYFRVSAVNCAGQGEPIEMNEPVQAKDILEAPEIDLDVALRTSVIAKAGE -DVQVLIPFKGRPPPTVTWRKDEKNLGSDARYSIENTDSSSLLTIPQVTRNDTGKYILTIENGVGEPKSST -VSVKVLDTPAACQKLQVKHVSRGTVTLLWDPPLIDGGSPIINYVIEKRDATKRTWSVVSHKCSSTSFKLI -DLSEKTPFFFRVLAENEIGIGEPCETTEPVKAAEVPAPIRDLSMKDSTKTSVILSWTKPDFDGGSVITEY -VVERKGKGEQTWSHAGISKTCEIEVSQLKEQSVLEFRVFAKNEKGLSDPVTIGPITVKELIITPEVDLSD -IPGAQVTVRIGHNVHLELPYKGKPKPSISWLKDGLPLKESEFVRFSKTENKITLSIKNAKKEHGGKYTVI -LDNAVCRIAVPITVITLGPPSKPKGPIRFDEIKADSVILSWDVPEDNGGGEITCYSIEKRETSQTNWKMV -CSSVARTTFKVPNLVKDAEYQFRVRAENRYGVSQPLVSSIIVAKHQFRIPGPPGKPVIYNVTSDGMSLTW -DAPVYDGGSEVTGFHVEKKERNSILWQKVNTSPISGREYRATGLVEGLDYQFRVYAENSAGLSSPSDPSK -FTLAVSPVDPPGTPDYIDVTRETITLKWNPPLRDGGSKIVGYSIEKRQGNERWVRCNFTDVSECQYTVTG -LSPGDRYEFRIIARNAVGTISPPSQSSGIIMTRDENVPPIVEFGPEYFDGLIIKSGESLRIKALVQGRPV -PRVTWFKDGVEIEKRMNMEITNVLGSTSLFVRDATRDHRGVYTVEAKNASGSAKAEIKVKVQDTPGKVVG -PIRFTNITGEKMTLWWDAPLNDGCAPITHYIIEKRETSRLAWALIEDKCEAQSYTAIKLINGNEYQFRVS -AVNKFGVGRPLDSDPVVAQIQYTVPDAPGIPEPSNITGNSITLTWARPESDGGSEIQQYILERREKKSTR -WVKVISKRPISETRFKVTGLTEGNEYEFHVMAENAAGVGPASGISRLIKCREPVNPPGPPTVVKVTDTSK -TTVSLEWSKPVFDGGMEIIGYIIEMCKTDLGDWHKVNAEACVKTRYTVTDLQAGEEYKFRVSAINGAGKG -DSCEVTGTIKAVDRLTAPELDIDANFKQTHVVRAGASIRLFIAYQGRPTPTAVWSKPDSNLSLRADIHTT -DSFSTLTVENCNRNDAGKYTLTVENNSGSKSITFTVKVLDTPGPPGPITFKDVTRGSATLMWDAPLLDGG -ARIHHYVVEKREASRRSWQVISEKCTRQIFKVNDLAEGVPYYFRVSAVNEYGVGEPYEMPEPIVATEQPA -PPRRLDVVDTSKSSAVLAWLKPDHDGGSRITGYLLEMRQKGSDLWVEAGHTKQLTFTVERLVEKTEYEFR -VKAKNDAGYSEPREAFSSVIIKEPQIEPTADLTGITNQLITCKAGSPFTIDVPISGRPAPKVTWKLEEMR -LKETDRVSITTTKDRTTLTVKDSMRGDSGRYFLTLENTAGVKTFSVTVVVIGRPGPVTGPIEVSSVSAES -CVLSWGEPKDGGGTEITNYIVEKRESGTTAWQLVNSSVKRTQIKVTHLTKYMEYSFRVSSENRFGVSKPL -ESAPIIAEHPFVPPSAPTRPEVYHVSANAMSIRWEEPYHDGGSKIIGYWVEKKERNTILWVKENKVPCLE -CNYKVTGLVEGLEYQFRTYALNAAGVSKASEASRPIMAQNPVDAPGRPEVTDVTRSTVSLIWSAPAYDGG -SKVVGYIIERKPVSEVGDGRWLKCNYTIVSDNFFTVTALSEGDTYEFRVLAKNAAGVISKGSESTGPVTC -RDEYAPPKAELDARLHGDLVTIRAGSDLVLDAAVGGKPEPKIIWTKGDKELDLCEKVSLQYTGKRATAVI -KFCDRSDSGKYTLTVKNASGTKAVSVMVKVLDSPGPCGKLTVSRVTQEKCTLAWSLPQEDGGAEITHYIV -ERRETSRLNWVIVEGECPTLSYVVTRLIKNNEYIFRVRAVNKYGPGVPVESEPIVARNSFTIPSPPGIPE -EVGTGKEHIIIQWTKPESDGGNEISNYLVDKREKESLRWTRVNKDYVVYDTRLKVTSLMEGCDYQFRVTA -VNAAGNSEPSERSNFISCREPSYTPGPPSAPRVVDTTKHSISLAWTKPMYDGGTDIVGYVLEMQEKDTDQ -WYRVHTNATIRNTEFTVPDLKMGQKYSFRVAAVNVKGMSEYSESIAEIEPVERIEIPDLELADDLKKTVT -IRAGASLRLMVSVSGRPPPVITWSKQGIDLASRAIIDTTESYSLLIVDKVNRYDAGKYTIEAENQSGKKS -ATVLVKVYDTPGPCPSVKVKEVSRDSVTITWEIPTIDGGAPINNYIVEKREAAMRAFKTVTTKCSKTLYR -ISGLVEGTMHYFRVLPENIYGIGEPCETSDAVLVSEVPLVPAKLEVVDVTKSTVTLAWEKPLYDGGSRLT -GYVLEACKAGTERWMKVVTLKPTVLEHTVTSLNEGEQYLFRIRAQNEKGVSEPRETVTAVTVQDLRVLPT -IDLSTMPQKTIHVPAGRPVELVIPIAGRPPPAASWFFAGSKLRESERVTVETHTKVAKLTIRETTIRDTG -EYTLELKNVTGTTSETIKVIILDKPGPPTGPIKIDEIDATSITISWEPPELDGGAPLSGYVVEQRDAHRP -GWLPVSESVTRSTFKFTRLTEGNEYVFRVAATNRFGIGSYLQSEVIECRSSIRIPGPPETLQIFDVSRDG -MTLTWYPPEDDGGSQVTGYIVERKEVRADRWVRVNKVPVTMTRYRSTGLTEGLEYEHRVTAINARGSGKP -SRPSKPIVAMDPIAPPGKPQNPRVTDTTRTSVSLAWSVPEDEGGSKVTGYLIEMQKVDQHEWTKCNTTPT -KIREYTLTHLPQGAEYRFRVLACNAGGPGEPAEVPGTVKVTEMLEYPDYELDERYQEGIFVRQGGVIRLT -IPIKGKPFPICKWTKEGQDISKRAMIATSETHTELVIKEADRGDSGTYDLVLENKCGKKAVYIKVRVIGS -PNSPEGPLEYDDIQVRSVRVSWRPPADDGGADILGYILERREVPKAAWYTIDSRVRGTSLVVKGLKENVE -YHFRVSAENQFGISKPLKSEEPVTPKTPLNPPEPPSNPPEVLDVTKSSVSLSWSRPKDDGGSRVTGYYIE -RKETSTDKVVRHNKTQITTTMYTVTGLVPDAEYQFRIIAQNDVGLSETSPASEPVVCKDPFDKPSQPGEL -EILSISKDSVTLQWEKPECDGGKEILGYWVEYRQSGDSAWKKSNKERIKDKQFTIGGLLEATEYEFRVFA -ENETGLSRPRRTAMSIKTKLTSGEAPGIRKEMKDVTTKLGEAAQLSCQIVGRPLPDIKWYRFGKELIQSR -KYKMSSDGRTHTLTVMTEEQEDEGVYTCIATNEVGEVETSSKLLLQATPQFHPGYPLKEKYYGAVGSTLR -LHVMYIGRPVPAMTWFHGQKLLQNSENITIENTEHYTHLVMKNVQRKTHAGKYKVQLSNVFGTVDAILDV -EIQDKPDKPTGPIVIEALLKNSAVISWKPPADDGGSWITNYVVEKCEAKEGAEWQLVSSAISVTTCRIVN -LTENAGYYFRVSAQNTFGISDPLEVSSVVIIKSPFEKPGAPGKPTITAVTKDSCVVAWKPPASDGGAKIR -NYYLEKREKKQNKWISVTTEEIRETVFSVKNLIEGLEYEFRVKCENLGGESEWSEISEPITPKSDVPIQA -PHFKEELRNLNVRYQSNATLVCKVTGHPKPIVKWYRQGKEIIADGLKYRIQEFKGGYHQLIIASVTDDDA -TVYQVRATNQGGSVSGTASLEVEVPAKIHLPKTLEGMGAVHALRGEVVSIKIPFSGKPDPVITWQKGQDL -IDNNGHYQVIVTRSFTSLVFPNGVERKDAGFYVVCAKNRFGIDQKTVELDVADVPDPPRGVKVSDASRDS -VNLTWTEPASDGGSKITNYIVEKCATTAERWLRVGQARETRYTVINLFGKTSYQFRVIAENKFGLSKPSE -PSEPTITKEDKTRAMNYDEEVDETREVSMTKASHSSTKELYEKYMIAEDLGRGEFGIVHRCVETSSKKTY -MAKFVKVKGTDQVLVKKEISILNIARHRNILHLHESFESMEELVMIFEFISGLDIFERINTSAFELNERE -IVSYVHQVCEALQFLHSHNIGHFDIRPENIIYQTRRSSTIKIIEFGQARQLKPGDNFRLLFTAPEYYAPE -VHQHDVVSTATDMWSLGTLVYVLLSGINPFLAETNQQIIENIMNAEYTFDEEAFKEISIEAMDFVDRLLV -KERKSRMTASEALQHPWLKQKIERVSTKVIRTLKHRRYYHTLIKKDLNMVVSAARISCGGAIRSQKGVSV -AKVKVASIEIGPVSGQIMHAVGEEGGHVKYVCKIENYDQSTQVTWYFGVRQLENSEKYEITYEDGVAILY -VKDITKLDDGTYRCKVVNDYGEDSSYAELFVKGVREVYDYYCRRTMKKIKRRTDTMRLLERPPEFTLPLY -NKTAYVGENVRFGVTITVHPEPHVTWYKSGQKIKPGDNDKKYTFESDKGLYQLTINSVTTDDDAEYTVVA -RNKYGEDSCKAKLTVTLHPPPTDSTLRPMFKRLLANAECQEGQSVCFEIRVSGIPPPTLKWEKDGQPLSL -GPNIEIIHEGLDYYALHIRDTLPEDTGYYRVTATNTAGSTSCQAHLQVERLRYKKQEFKSKEEHERHVQK -QIDKTLRMAEILSGTESVPLTQVAKEALREAAVLYKPAVSTKTVKGEFRLEIEEKKEERKLRMPYDVPEP -RKYKQTTIEEDQRIKQFVPMSDMKWYKKIRDQYEMPGKLDRVVQKRPKRIRLSRWEQFYVMPLPRITDQY -RPKWRIPKLSQDDLEIVRPARRRTPSPDYDFYYRPRRRSLGDISDEELLLPIDDYLAMKRTEEERLRLEE -ELELGFSASPPSRSPPHFELSSLRYSSPQAHVKVEETRKNFRYSTYHIPTKAEASTSYAELRERHAQAAY -RQPKQRQRIMAEREDEELLRPVTTTQHLSEYKSELDFMSKEEKSRKKSRRQREVTEITEIEEEYEISKHA -QRESSSSASRLLRRRRSLSPTYIELMRPVSELIRSRPQPAEEYEDDTERRSPTPERTRPRSPSPVSSERS -LSRFERSARFDIFSRYESMKAALKTQKTSERKYEVLSQQPFTLDHAPRITLRMRSHRVPCGQNTRFILNV -QSKPTAEVKWYHNGVELQESSKIHYTNTSGVLTLEILDCHTDDSGTYRAVCTNYKGEASDYATLDVTGGD -YTTYASQRRDEEVPRSVFPELTRTEAYAVPSFKKTSEMEASSSVREVKSQMTETRESLSSYEHSASAEMK -SAALEEKSLEEKSTTRKIKTTLAARILTKPRSMTVYEGESARFSCDTDGEPVPTVTWLRKGQVLSTSARH -QVTTTKYKSTFEISSVQASDEGNYSVVVENSEGKQEAEFTLTIQKARVTEKAVTSPPRVKSPEPRVKSPE -AVKSPKRVKSPEPSHPKAVSPTETKPTPIEKVQHLPVSAPPKITQFLKAEASKEIAKLTCVVESSVLRAK -EVTWYKDGKKLKENGHFQFHYSADGTYELKINNLTESDQGEYVCEISGEGGTSKTNLQFMGQAFKSIHEK -VSKISETKKSDQKTTESTVTRKTEPKAPEPISSKPVIVTGLQDTTVSSDSVAKFAVKATGEPRPTAIWTK -DGKAITQGGKYKLSEDKGGFFLEIHKTDTSDSGLYTCTVKNSAGSVSSSCKLTIKAIKDTEAQKVSTQKT -SEITPQKKAVVQEEISQKALRSEEIKMSEAKSQEKLALKEEASKVLISEEVKKSAATSLEKSIVHEEITK -TSQASEEVRTHAEIKAFSTQMSINEGQRLVLKANIAGATDVKWVLNGVELTNSEEYRYGVSGSDQTLTIK -QASHRDEGILTCISKTKEGIVKCQYDLTLSKELSDAPAFISQPRSQNINEGQNVLFTREISGEPSPEIEW -FKNNLPISISSNVSISRSRNVYSLEIRNASVSDSGKYTIKAKNFRGQCSATASLMVLPLVEEPSREVVLR -TSGDTSLQGSFSSQSVQMSASKQEASFSSFSSSSASSMTEMKFASMSAQSMSSMQESFVEMSSSSFMGIS -NMTQLESSTSKMLKAGIRGIPPKIEALPSDISIDEGKVLTVACAFTGEPTPEVTWSCGGRKIHSQEQGRF -HIENTDDLTTLIIMDVQKQDGGLYTLSLGNEFGSDSATVNIHIRSI diff --git a/forester/archive/RIO/others/hmmer/testsuite/tophits_test.c b/forester/archive/RIO/others/hmmer/testsuite/tophits_test.c deleted file mode 100644 index cdf1cfa..0000000 --- a/forester/archive/RIO/others/hmmer/testsuite/tophits_test.c +++ /dev/null @@ -1,170 +0,0 @@ -/* tophits_test.c - * SRE, Tue Oct 28 08:03:10 1997 [Newton Institute, Cambridge UK] - * - * Test driver for tophits.c. Returns 0 if everything is OK. - * - * Options: - * -v Verbose; print stuff. - */ - -#include -#include - -#include "structs.h" -#include "funcs.h" -#include "globals.h" -#include "squid.h" - -static char banner[] = "\ -tophits_test : internal verification of tophits.c"; - -static char usage[] = "\ -Usage: tophits_test [-options]\n\ - Available options are:\n\ - -h : help; display this usage info\n\ - -s : set random seed to \n\ - -v : be verbose (default is to simply exit with status 1 or 0)\n\ -"; - -static char experts[] = "\ -\n"; - -static struct opt_s OPTIONS[] = { - { "-h", TRUE, sqdARG_NONE }, - { "-s", TRUE, sqdARG_INT }, - { "-v", TRUE, sqdARG_NONE }, -}; -#define NOPTIONS (sizeof(OPTIONS) / sizeof(struct opt_s)) - -int -main(int argc, char **argv) -{ - struct tophit_s *hit; /* hit list */ - int i,j; /* counters */ - int nsamples; /* option: # of random "scores" */ - int be_verbose; /* option: TRUE to show output */ - int seed; /* option: random number seed */ - int paramH; /* option: H parameter */ - int paramA; /* option: A parameter */ - double *list; /* list of "scores" */ - double tmp; /* used for swapping */ - float score, score2; - - char *optname; /* name of option found by Getopt() */ - char *optarg; /* argument found by Getopt() */ - int optind; /* index in argv[] */ - - /*********************************************** - * Parse command line - ***********************************************/ - be_verbose = FALSE; - seed = (int) time ((time_t *) NULL); - paramH = 100; - paramA = 10; - nsamples = 1000; - - while (Getopt(argc, argv, OPTIONS, NOPTIONS, usage, - &optind, &optname, &optarg)) { - if (strcmp(optname, "-s") == 0) { seed = atoi(optarg); } - else if (strcmp(optname, "-v") == 0) { be_verbose = TRUE; } - else if (strcmp(optname, "-h") == 0) { - Banner(stdout, banner); - puts(usage); - puts(experts); - exit(0); - } - } - if (argc - optind != 0) - Die("Incorrect number of arguments.\n%s\n", usage); - - sre_srandom(seed); - if (be_verbose) - printf("%d\tSEED\n", seed); - - /*********************************************** - * Generate three tiers of numbers: - * paramA - really good scores, 1000-2000 - * paramH - good scores, 100-200 - * nsamples - paramH - paramA: bad scores, 10-20 - * then shuffle. - ***********************************************/ - - list = MallocOrDie (sizeof(double) * nsamples); - for (i = 0; i < paramA; i++) - list[i] = 1000. + 1000. * sre_random(); - for (; i < paramA + paramH; i++) - list[i] = 100. + 100. * sre_random(); - for (; i < nsamples; i++) - list[i] = 10. + 10. * sre_random(); - - for (i = 0; i < nsamples; i++) - { - j = CHOOSE(nsamples); - tmp = list[j]; - list[j] = list[i]; - list[i] = tmp; - } - - if (be_verbose) - for (i = 0; i < nsamples; i++) - printf("%8.2f\tTest set\n", list[i]); - - /*********************************************** - * Test of FullSortTophits(). - * Fill up a hit list with random numbers; - * FullSort it; - * check that all top H are >= 100 and sorted. - ***********************************************/ - - hit = AllocTophits(100); - for (i = 0; i < nsamples; i++) - RegisterHit(hit, list[i], 0., (float) list[i], 0., 0., - NULL, NULL, NULL, /* name, acc, desc */ - 0,0,0, - 0,0,0, - 0,0, - NULL); - FullSortTophits(hit); - - if (be_verbose) - { - for (i = 0; i < hit->num; i++) - { - GetRankedHit(hit, i, NULL, &score, NULL, NULL, - NULL, NULL, NULL, /* name, acc, desc */ - NULL, NULL, NULL, - NULL, NULL, NULL, - NULL, NULL, - NULL); - printf("%8.2f FullSort()\n", score); - } - } - - for (i = 0; i < hit->num-1; i++) - { - GetRankedHit(hit, i, NULL, &score, NULL, NULL, - NULL, NULL, NULL, /* name, acc, desc */ - NULL, NULL, NULL, - NULL, NULL, NULL, - NULL, NULL, - NULL); - GetRankedHit(hit, i+1,NULL, &score2,NULL, NULL, - NULL, NULL, NULL, /* name, acc, desc */ - NULL, NULL, NULL, - NULL, NULL, NULL, - NULL, NULL, - NULL); - if (score < score2) - Die("FullSortTophits() fails test: order wrong"); - if (i < paramA && score < 1000.) - Die("FullSortTophits() fails test: lost a number"); - if (i < paramA + paramH && score < 100.) - Die("FullSortTophits() fails test: lost a number"); - } - - FreeTophits(hit); - free(list); - - if (be_verbose) printf("tophits_test is OK\n"); - return 0; -} diff --git a/forester/archive/RIO/others/hmmer/testsuite/trace_test.c b/forester/archive/RIO/others/hmmer/testsuite/trace_test.c deleted file mode 100644 index 98e5e97..0000000 --- a/forester/archive/RIO/others/hmmer/testsuite/trace_test.c +++ /dev/null @@ -1,146 +0,0 @@ -/* trace_test.c - * Mon Feb 2 07:57:47 1998 - * cp trace_test.c ../src/testdriver.c; cd ../src; make testdriver - * - * Test driver for Viterbi tracebacks. - * - * RCS $Id: trace_test.c,v 1.1.1.1 2005/03/22 08:34:47 cmzmasek Exp $ - */ - - -#include -#include -#include - -#include "structs.h" -#include "funcs.h" -#include "globals.h" -#include "squid.h" - -static char banner[] = "\ -trace_test : testing of Plan7 Viterbi traceback code"; - -static char usage[] = "\ -Usage: testdriver [-options]\n\ - Available options are:\n\ - -h : help; display this usage info\n\ - -v : be verbose\n\ -"; - -static char experts[] = "\ - --hmm : use HMM in file \n\ - --seq : use seq(s) in file \n\ - --small : run P7SmallViterbi()\n\ -\n"; - -static struct opt_s OPTIONS[] = { - { "-h", TRUE, sqdARG_NONE }, - { "-v", TRUE, sqdARG_NONE }, - { "--hmm", FALSE, sqdARG_STRING }, - { "--seq", FALSE, sqdARG_STRING }, - { "--small", FALSE, sqdARG_NONE }, -}; -#define NOPTIONS (sizeof(OPTIONS) / sizeof(struct opt_s)) - -int -main(int argc, char **argv) -{ - char *hmmfile; /* file to read HMM(s) from */ - HMMFILE *hmmfp; /* opened hmmfile for reading */ - char *seqfile; /* file to read target sequence(s) from */ - SQFILE *sqfp; /* opened seqfile for reading */ - char *seq; /* target sequence */ - SQINFO sqinfo; /* optional info for seq */ - char *dsq; /* digitized target sequence */ - struct plan7_s *hmm; /* HMM to search with */ - struct p7trace_s *tr; /* traceback */ - int nseq; - float sc; - - int be_verbose; - int do_small; /* TRUE to invoke P7SmallViterbi */ - - char *optname; /* name of option found by Getopt() */ - char *optarg; /* argument found by Getopt() */ - int optind; /* index in argv[] */ - - /*********************************************** - * Parse command line - ***********************************************/ - - be_verbose = FALSE; - hmmfile = "trace_test.hmm"; - seqfile = "trace_test.seq"; - do_small = FALSE; - - while (Getopt(argc, argv, OPTIONS, NOPTIONS, usage, - &optind, &optname, &optarg)) { - if (strcmp(optname, "-v") == 0) be_verbose = TRUE; - else if (strcmp(optname, "--hmm") == 0) hmmfile = optarg; - else if (strcmp(optname, "--seq") == 0) seqfile = optarg; - else if (strcmp(optname, "--small") == 0) do_small = TRUE; - else if (strcmp(optname, "-h") == 0) { - Banner(stdout, banner); - puts(usage); - puts(experts); - exit(0); - } - } - if (argc - optind != 0) - Die("Incorrect number of arguments.\n%s\n", usage); - - /*********************************************** - * Open test sequence file - ***********************************************/ - - if ((sqfp = SeqfileOpen(seqfile, SQFILE_UNKNOWN, "BLASTDB")) == NULL) - Die("Failed to open sequence database file %s\n%s\n", seqfile, usage); - - /*********************************************** - * Open HMM file - * Read a single HMM from it. (Config HMM, if necessary). - ***********************************************/ - - if ((hmmfp = HMMFileOpen(hmmfile, NULL)) == NULL) - Die("Failed to open HMM file %s\n%s", hmmfile, usage); - if (!HMMFileRead(hmmfp, &hmm)) - Die("Failed to read any HMMs from %s\n", hmmfile); - if (hmm == NULL) - Die("HMM file %s corrupt or in incorrect format? Parse failed", hmmfile); - P7Logoddsify(hmm, TRUE); - - /*********************************************** - * Search HMM against each sequence - ***********************************************/ - - nseq = 0; - while (ReadSeq(sqfp, sqfp->format, &seq, &sqinfo)) - { - nseq++; - dsq = DigitizeSequence(seq, sqinfo.len); - - if (do_small) sc = P7SmallViterbi(dsq, sqinfo.len, hmm, &tr); - else sc = P7Viterbi(dsq, sqinfo.len, hmm, &tr); - - if (be_verbose) - { - printf("test sequence %d: score %.1f : %s %s\n", - nseq, sc, sqinfo.name, - sqinfo.flags & SQINFO_DESC ? sqinfo.desc : ""); - P7PrintTrace(stdout, tr, hmm, dsq); - } - - if (! TraceVerify(tr, hmm->M, sqinfo.len)) - Die("Trace verify failed on seq #%d, %s\n", nseq, sqinfo.name); - - FreeSequence(seq, &sqinfo); - P7FreeTrace(tr); - free(dsq); - } - - FreePlan7(hmm); - HMMFileClose(hmmfp); - SeqfileClose(sqfp); - - return EXIT_SUCCESS; -} diff --git a/forester/archive/RIO/others/hmmer/testsuite/trace_test.hmm b/forester/archive/RIO/others/hmmer/testsuite/trace_test.hmm deleted file mode 100644 index f52309a..0000000 --- a/forester/archive/RIO/others/hmmer/testsuite/trace_test.hmm +++ /dev/null @@ -1,47 +0,0 @@ -HMMER2.0 -NAME trace_test -DESC -LENG 10 -ALPH Amino -RF no -CS no -COM ./hmmbuild -F trace_test.hmm trace_test.slx -NSEQ 7 -DATE Mon Feb 2 09:14:31 1998 -XT -8455 -4 -1000 -1000 -8455 -4 -8455 -4 -NULT -4 -8455 -NULE 595 -1558 85 338 -294 453 -1158 197 249 902 -1085 -142 -21 -313 45 531 201 384 -1998 -644 -HMM A C D E F G H I K L M N P Q R S T V W Y - m->m m->i m->d i->m i->i d->m d->d b->m m->e - -585 * -1585 - 1 2806 -444 -1270 -1265 -1732 -744 -1182 -1034 -1212 -1545 -1010 -869 -1380 -1102 -1325 -156 -259 -640 -2039 -1721 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -111 -6123 -7165 -894 -1115 -701 -1378 -585 * - 2 -608 5196 -2368 -2362 -1635 -1253 -1702 -987 -2034 -1540 -1191 -1745 -1831 -1953 -1922 -934 -927 -770 -1855 -1692 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -115 -6127 -7169 -894 -1115 -701 -1378 * * - 3 -1130 -2063 3441 243 -2502 -1157 -662 -2514 -843 -2578 -2029 -96 -1658 -421 -1434 -970 -1239 -2138 -2388 -1915 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -120 -6132 -7174 -894 -1115 -701 -1378 * * - 4 -988 -1921 379 3052 -2309 -1196 -495 -2108 -350 -2194 -1612 -103 -1621 -210 -770 -862 -1042 -1799 -2207 -1742 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -126 -6138 -7180 -894 -1115 -701 -1378 * * - 5 -1602 -1340 -2512 -2455 3740 -2315 -379 -478 -2225 -175 -190 -1874 -2503 -1781 -2079 -1892 -1647 -683 210 1257 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -132 -6144 -7186 -894 -1115 -701 -1378 * * - 6 -759 -1104 -1314 -1488 -2461 3330 -1563 -2503 -1721 -2645 -2082 -1325 -1776 -1591 -1791 -959 -1094 -1922 -2199 -2280 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -140 -6152 -7194 -894 -1115 -701 -1378 * * - 7 -1192 -1547 -812 -781 -520 -1557 4586 -1881 -457 -1789 -1357 -866 -1913 -669 -600 -1231 -1258 -1678 -918 -90 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -148 -6160 -7202 -894 -1115 -701 -1378 * * - 8 -1016 -805 -2637 -2367 -591 -2470 -1853 3039 -2040 329 346 -2180 -2589 -1943 -2070 -1879 -1051 1184 -1698 -1261 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -158 -6170 -7212 -894 -1115 -701 -1378 * * - 9 -1028 -1686 -800 -442 -2171 -1521 -263 -1873 3103 -1859 -1251 -547 -1750 74 599 -1004 -972 -1620 -1812 -1528 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -170 -6182 -7224 -894 -1115 -701 -1378 * * - 10 -1369 -1199 -2542 -2295 -129 -2453 -1603 488 -1867 2607 742 -2168 -2539 -1724 -1835 -1992 -1384 130 -1284 -897 - - * * * * * * * * * * * * * * * * * * * * - - * * * * * * * * 0 -// diff --git a/forester/archive/RIO/others/hmmer/testsuite/trace_test.seq b/forester/archive/RIO/others/hmmer/testsuite/trace_test.seq deleted file mode 100644 index 5f5eba4..0000000 --- a/forester/archive/RIO/others/hmmer/testsuite/trace_test.seq +++ /dev/null @@ -1,25 +0,0 @@ ->seq1 Basic traceback. SNB M ECT -ACDEFGHIKL ->seq2 C-terminal tail; one CC transition -ACDEFGHIKLY ->seq3 C-terminal tail; three CC transitions -ACDEFGHIKLYYY ->seq4 N-terminal tail; one NN transition -YACDEFGHIKL ->seq5 N-terminal tail; three NN transitions -YYYACDEFGHIKL ->seq6 one JJ -ACDEFGHIKLYACDEFGHIKL ->seq7 three JJ -ACDEFGHIKLYYYACDEFGHIKL ->seq8 D->E wing unfolding -ACDEFGHIK ->seq9 B->D wing unfolding -CDEFGHIKL ->seq10 MD, DD, DM transitions -ACDEHIKL ->seq11 MI, II, IM transitions -ACDEFYYYGHIKL ->seq12 bogosity -Y - diff --git a/forester/archive/RIO/others/hmmer/testsuite/trace_test.slx b/forester/archive/RIO/others/hmmer/testsuite/trace_test.slx deleted file mode 100644 index 3e71587..0000000 --- a/forester/archive/RIO/others/hmmer/testsuite/trace_test.slx +++ /dev/null @@ -1,7 +0,0 @@ -seq1 ACDEFGHIKL -seq2 ACDEFGHIKL -seq3 ACDEFGHIKL -seq4 ACDEFGHIKL -seq5 ACDEFGHIKL -seq6 ACDEFGHIKL -seq7 ACDEFGHIKL diff --git a/forester/archive/RIO/others/hmmer/testsuite/viterbi_exercise.c b/forester/archive/RIO/others/hmmer/testsuite/viterbi_exercise.c deleted file mode 100644 index 01a9c1f..0000000 --- a/forester/archive/RIO/others/hmmer/testsuite/viterbi_exercise.c +++ /dev/null @@ -1,166 +0,0 @@ -/* viterbi_exercise.c - * SRE, Mon Mar 9 07:55:47 1998 [St. Louis] - * - * Exercise the various Viterbi algorithms, big and small. - * - * RCS $Id: viterbi_exercise.c,v 1.1.1.1 2005/03/22 08:34:50 cmzmasek Exp $ - */ - - -#include -#include -#include -#include - -#include "structs.h" -#include "funcs.h" -#include "globals.h" -#include "squid.h" - -static char banner[] = "\ -viterbi_exercise : testing of Plan7 Viterbi code"; - -static char usage[] = "\ -Usage: testdriver [-options]\n\ - Available options are:\n\ - -h : help; display this usage info\n\ - -v : be verbose\n\ -"; - -static char experts[] = "\ - --hmm : use HMM in file \n\ -\n"; - -static struct opt_s OPTIONS[] = { - { "-h", TRUE, sqdARG_NONE }, - { "-v", TRUE, sqdARG_NONE }, - { "--hmm", FALSE, sqdARG_STRING }, -}; -#define NOPTIONS (sizeof(OPTIONS) / sizeof(struct opt_s)) - -int -main(int argc, char **argv) -{ - char *hmmfile; /* file to read HMM(s) from */ - HMMFILE *hmmfp; /* opened hmmfile for reading */ - struct plan7_s *hmm; /* the HMM to search with */ - char *dsq; /* digitized target sequence */ - char *seq; - SQINFO sqinfo; - int L; /* length of dsq */ - struct p7trace_s *tr1; /* traceback */ - struct p7trace_s *tr2; /* another traceback */ - int nseq; - float sc1, sc2; /* scores */ - int config; - int i; - - int be_verbose; - - char *optname; /* name of option found by Getopt() */ - char *optarg; /* argument found by Getopt() */ - int optind; /* index in argv[] */ - - - /*********************************************** - * Parse command line - ***********************************************/ - - be_verbose = FALSE; - hmmfile = "fn3.hmm"; - nseq = 100; - - while (Getopt(argc, argv, OPTIONS, NOPTIONS, usage, - &optind, &optname, &optarg)) { - if (strcmp(optname, "-v") == 0) be_verbose = TRUE; - else if (strcmp(optname, "--hmm") == 0) hmmfile = optarg; - else if (strcmp(optname, "-h") == 0) { - Banner(stdout, banner); - puts(usage); - puts(experts); - exit(0); - } - } - if (argc - optind != 0) - Die("Incorrect number of arguments.\n%s\n", usage); - - /*********************************************** - * Open HMM file - * Read a single HMM from it. - ***********************************************/ - - if ((hmmfp = HMMFileOpen(hmmfile, NULL)) == NULL) - Die("Failed to open HMM file %s\n%s", hmmfile, usage); - if (!HMMFileRead(hmmfp, &hmm)) - Die("Failed to read any HMMs from %s\n", hmmfile); - if (hmm == NULL) - Die("HMM file %s corrupt or in incorrect format? Parse failed", hmmfile); - Plan7Renormalize(hmm); - - /*********************************************** - * We cycle through different model configurations. - * For each configuration, we repeat 100 times: - * - generate a sequence - * - score it by Viterbi and by SmallViterbi - * - make sure they give OK and identical results - ***********************************************/ - - for (config = 1; config <= 5; config++) - { - switch (config) { - case 1: Plan7NakedConfig(hmm); break; - case 2: Plan7GlobalConfig(hmm); break; - case 3: Plan7LSConfig(hmm); break; - case 4: Plan7FSConfig(hmm, 0.5, 0.5); break; - case 5: Plan7SWConfig(hmm, 0.5, 0.5); break; - default: Die("never happens"); - } - P7Logoddsify(hmm, TRUE); - - for (i = 0; i < nseq; i++) - { - EmitSequence(hmm, &dsq, &L, NULL); - sprintf(sqinfo.name, "seq%d", i+1); - sqinfo.len = L; - sqinfo.flags = SQINFO_NAME | SQINFO_LEN; - - sc1 = P7Viterbi(dsq, L, hmm, &tr1); - sc2 = P7SmallViterbi(dsq, L, hmm, &tr2); - - if (be_verbose) - { - printf("Viterbi score: %.1f SmallViterbi: %.1f\n", sc1, sc2); - P7PrintTrace(stdout, tr1, hmm, dsq); - P7PrintTrace(stdout, tr2, hmm, dsq); - - seq = DedigitizeSequence(dsq, L); - WriteSeq(stdout, SQFILE_FASTA, seq, &sqinfo); - free(seq); - } - - if (sc1 != sc2) - Die("Different scores from normal/small Viterbi"); - - if (fabs(sc1 - P7TraceScore(hmm, dsq, tr1)) > 0.1) - Die("P7Viterbi score doesn't match its TraceScore"); - if (fabs(sc2 - P7TraceScore(hmm, dsq, tr2)) > 0.1) - Die("P7SmallViterbi score doesn't match its TraceScore"); - - if (! TraceVerify(tr1, hmm->M, L)) - Die("TraceVerify() failed for a P7Viterbi trace"); - if (! TraceVerify(tr2, hmm->M, L)) - Die("TraceVerify() failed for a P7SmallViterbi trace"); - - if (tr1->tlen != tr2->tlen) - Die("Trace lengths differ for normal/small Viterbi"); - if (! TraceCompare(tr1, tr2)) - Die("Different traces from normal/small Viterbi"); - - P7FreeTrace(tr1); - P7FreeTrace(tr2); - free(dsq); - } - } - - return EXIT_SUCCESS; -} diff --git a/forester/archive/RIO/others/hmmer/testsuite/weeviterbi_test.c b/forester/archive/RIO/others/hmmer/testsuite/weeviterbi_test.c deleted file mode 100644 index 232ca9b..0000000 --- a/forester/archive/RIO/others/hmmer/testsuite/weeviterbi_test.c +++ /dev/null @@ -1,150 +0,0 @@ -/* weeviterbi_test.c - * Wed Mar 4 17:30:39 1998 - * - * Test driver for Myers/Miller/Hirschberg linear memory Viterbi tracebacks. - * - * RCS $Id: weeviterbi_test.c,v 1.1.1.1 2005/03/22 08:34:47 cmzmasek Exp $ - */ - -#include -#include -#include - -#include "structs.h" -#include "funcs.h" -#include "globals.h" -#include "squid.h" - -static char banner[] = "\ -weeviterbi_test : testing of Plan7 Myers/Miller/Hirschberg Viterbi traceback code"; - -static char usage[] = "\ -Usage: testdriver [-options]\n\ - Available options are:\n\ - -h : help; display this usage info\n\ - -v : be verbose\n\ -"; - -static char experts[] = "\ - --hmm : use HMM in file \n\ - --seq : use seq(s) in file \n\ -\n"; - -static struct opt_s OPTIONS[] = { - { "-h", TRUE, sqdARG_NONE }, - { "-v", TRUE, sqdARG_NONE }, - { "--hmm", FALSE, sqdARG_STRING }, - { "--seq", FALSE, sqdARG_STRING }, -}; -#define NOPTIONS (sizeof(OPTIONS) / sizeof(struct opt_s)) - -int -main(int argc, char **argv) -{ - char *hmmfile; /* file to read HMM(s) from */ - HMMFILE *hmmfp; /* opened hmmfile for reading */ - char *seqfile; /* file to read target sequence(s) from */ - SQFILE *sqfp; /* opened seqfile for reading */ - char *seq; /* target sequence */ - SQINFO sqinfo; /* optional info for seq */ - char *dsq; /* digitized target sequence */ - struct plan7_s *hmm; /* HMM to search with */ - struct p7trace_s *t1; /* standard Viterbi traceback */ - struct p7trace_s *t2; /* WeeViterbi traceback */ - int nseq; - float sc1,sc2; /* scores from Viterbi, WeeViterbi */ - - int be_verbose; - - char *optname; /* name of option found by Getopt() */ - char *optarg; /* argument found by Getopt() */ - int optind; /* index in argv[] */ - - /*********************************************** - * Parse command line - ***********************************************/ - - be_verbose = FALSE; - hmmfile = "weeviterbi_test.hmm"; - seqfile = "weeviterbi_test.seq"; - - while (Getopt(argc, argv, OPTIONS, NOPTIONS, usage, - &optind, &optname, &optarg)) { - if (strcmp(optname, "-v") == 0) be_verbose = TRUE; - else if (strcmp(optname, "--hmm") == 0) hmmfile = optarg; - else if (strcmp(optname, "--seq") == 0) seqfile = optarg; - else if (strcmp(optname, "-h") == 0) { - Banner(stdout, banner); - puts(usage); - puts(experts); - exit(0); - } - } - if (argc - optind != 0) - Die("Incorrect number of arguments.\n%s\n", usage); - - /*********************************************** - * Open test sequence file - ***********************************************/ - - if ((sqfp = SeqfileOpen(seqfile, SQFILE_UNKNOWN, "BLASTDB")) == NULL) - Die("Failed to open sequence database file %s\n%s\n", seqfile, usage); - - /*********************************************** - * Open HMM file - * Read a single HMM from it. (Config HMM, if necessary). - ***********************************************/ - - if ((hmmfp = HMMFileOpen(hmmfile, NULL)) == NULL) - Die("Failed to open HMM file %s\n%s", hmmfile, usage); - if (!HMMFileRead(hmmfp, &hmm)) - Die("Failed to read any HMMs from %s\n", hmmfile); - if (hmm == NULL) - Die("HMM file %s corrupt or in incorrect format? Parse failed", hmmfile); - P7Logoddsify(hmm, TRUE); - - /*********************************************** - * Search HMM against each sequence - ***********************************************/ - - nseq = 0; - while (ReadSeq(sqfp, sqfp->format, &seq, &sqinfo)) - { - nseq++; - dsq = DigitizeSequence(seq, sqinfo.len); - - sc1 = P7Viterbi(dsq, sqinfo.len, hmm, &t1); - sc2 = P7WeeViterbi(dsq, sqinfo.len, hmm, &t2); - - if (be_verbose) - { - printf("test sequence %d: %s %s\n", - nseq, sqinfo.name, - sqinfo.flags & SQINFO_DESC ? sqinfo.desc : ""); - printf("** P7Viterbi trace:\n"); - P7PrintTrace(stdout, t1, hmm, dsq); - printf("** P7WeeViterbi trace:\n"); - P7PrintTrace(stdout, t2, hmm, dsq); - } - - if (! TraceVerify(t1, hmm->M, sqinfo.len)) - Die("Trace verify failed on Viterbi for seq #%d, %s\n", nseq, sqinfo.name); - if (! TraceVerify(t2, hmm->M, sqinfo.len)) - Die("Trace verify failed on WeeViterbi for seq #%d, %s\n", nseq, sqinfo.name); - if (sc1 != sc2) - Die("Scores for the two Viterbi implementations are unequal (%.1f,%.1f)", sc1, sc2); - if (! TraceCompare(t1, t2)) - Die("WeeViterbi() trace is not identical to Viterbi() trace"); - - FreeSequence(seq, &sqinfo); - P7FreeTrace(t1); - P7FreeTrace(t2); - free(dsq); - } - - FreePlan7(hmm); - SeqfileClose(sqfp); - HMMFileClose(hmmfp); - - return EXIT_SUCCESS; -} diff --git a/forester/archive/RIO/others/hmmer/testsuite/weeviterbi_test.hmm b/forester/archive/RIO/others/hmmer/testsuite/weeviterbi_test.hmm deleted file mode 100644 index ed56c05..0000000 --- a/forester/archive/RIO/others/hmmer/testsuite/weeviterbi_test.hmm +++ /dev/null @@ -1,233 +0,0 @@ -HMMER2.0 -NAME rrm -DESC -LENG 72 -ALPH Amino -RF no -CS no -COM hmmbuild weeviterbi_test.hmm /nfs/w4/Pfam/Seed/rrm.seed -NSEQ 70 -DATE Wed Mar 4 17:40:23 1998 -XT -8455 -4 -1000 -1000 -8455 -4 -8455 -4 -NULT -4 -8455 -NULE 595 -1558 85 338 -294 453 -1158 197 249 902 -1085 -142 -21 -313 45 531 201 384 -1998 -644 -HMM A C D E F G H I K L M N P Q R S T V W Y - m->m m->i m->d i->m i->i d->m d->d b->m m->e - -21 * -6129 - 1 -1234 -371 -8214 -7849 -5304 -8003 -7706 2384 -7769 2261 -681 -7660 -7694 -7521 -7816 -7346 -5543 1527 -6974 -6639 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -11 -11284 -12326 -894 -1115 -701 -1378 -21 * - 2 -3634 -3460 -5973 -5340 3521 -2129 -4036 -831 -2054 -1257 -2663 -4822 -5229 -4557 -4735 -1979 -1569 -1476 -3893 3439 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -11 -11284 -12326 -894 -1115 -701 -1378 * * - 3 -5570 838 -8268 -7958 -5637 -8152 -8243 2427 -7947 -461 -539 -7805 -7843 -7878 -8124 -7550 -5559 3130 -7481 -7000 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -11 -11284 -12326 -894 -1115 -701 -1378 * * - 4 -1146 -4797 -1564 -2630 -1480 2769 -2963 -1850 992 -4812 -3887 737 -4397 -120 793 -205 -1019 -4418 -4981 -1059 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -11 -11284 -12326 -894 -1115 -701 -1378 * * - 5 -5242 -7035 445 -3538 -7284 1773 -4583 -7166 -4676 -7046 -6312 3633 -1651 -1262 -849 -1278 -5287 -6650 -7228 -291 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -11 -11284 -12326 -894 -1115 -701 -1378 * * - 6 -6898 -6238 -9292 -8703 -410 -9176 -7772 820 -8535 3071 -753 -8917 -8033 -7171 -7955 -8614 -6722 5 -6136 -6414 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 278 394 45 96 359 117 -369 -294 -249 - - -33 -6025 -12326 -153 -3315 -701 -1378 * * - 7 -5 -5297 178 -2982 -5685 -2278 -528 -5452 -1615 -5394 -4488 1396 3136 -3022 -3659 780 976 -4981 -5565 -4854 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -11 -11284 -12327 -894 -1115 -701 -1378 * * - 8 -3329 -4799 -805 543 789 -4303 572 -4868 140 -1087 -3888 -603 1691 530 183 -162 293 -2124 2317 2037 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -12 -11284 -12327 -894 -1115 -701 -1378 * * - 9 -373 -4801 2182 1353 -1426 44 -407 -1928 -366 -4817 -3891 1263 -4395 -1080 -666 295 50 -1947 -4985 397 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -12 -11285 -12327 -894 -1115 -701 -1378 * * - 10 450 1883 -5953 -5317 -1256 -1301 -4027 1322 -1847 -283 1542 -4802 -5206 -1502 -4713 -4241 2143 1615 -3893 -3551 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -12 -11285 -12327 -894 -1115 -701 -1378 * * - 11 -1786 -4835 1027 -807 -5155 -1278 -2989 -4907 -410 -4850 -3924 957 -4421 -943 -250 670 3048 -4456 -5017 -4333 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -12 -11285 -12327 -894 -1115 -701 -1378 * * - 12 -3329 -4802 1324 2670 -5123 -4302 -2961 -4874 732 -2424 -3891 -457 -262 553 250 -694 -989 -4424 1772 -1014 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -12 -11285 -12327 -894 -1115 -701 -1378 * * - 13 -325 -4802 1515 2286 -5123 -2017 868 -4874 260 -2865 -1087 -2938 -4395 2006 -810 492 -1754 -4424 -4985 -4302 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -12 -11285 -12327 -894 -1115 -701 -1378 * * - 14 -337 -4801 2075 1854 -5121 -723 -567 -1924 73 -634 -194 -1227 -4396 1588 -3049 -212 -414 -4422 -5 -4302 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -12 -11285 -12327 -894 -1115 -701 -1378 * * - 15 -6843 -6192 -9252 -8675 -481 -9132 -7773 1557 -8511 2856 467 -8869 -8024 -7180 -7953 -8566 -6676 459 -6154 -6421 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -12 -11285 -12327 -894 -1115 -701 -1378 * * - 16 5 -4654 -1525 936 444 -4347 -3013 -1809 2193 -441 -3760 -441 -4438 -2577 1775 -91 -3285 -1104 180 -259 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -12 -11285 -12327 -894 -1115 -701 -1378 * * - 17 -97 -4802 2341 1548 -5123 -2042 -2961 -4874 -347 -2479 -194 -5 -726 1566 807 -1858 42 -4424 -4985 -4302 - - -146 -501 232 42 -381 398 105 -627 210 -463 -721 275 393 44 95 361 116 -370 -295 -242 - - -45 -5457 -12327 -1928 -440 -701 -1378 * * - 18 358 -3435 -5945 -1175 1490 -5154 1309 1157 -1944 1759 -387 -4797 -5204 -4530 -1684 -4238 -376 166 -3893 1330 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -12 -11285 -12327 -894 -1115 -701 -1378 * * - 19 -2191 733 -7910 -7364 4360 -7323 -5649 -1557 -7016 -750 -407 -6877 -7039 -6263 -6681 -6482 -5572 -4211 -4950 -1019 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -12 -11285 -12328 -894 -1115 -701 -1378 * * - 20 -83 -4801 -3176 698 -5121 1566 -2961 -1977 942 -4817 -3890 -239 -4396 582 256 1807 -874 -1745 -4984 -1334 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -13 -11286 -12328 -894 -1115 -701 -1378 * * - 21 -1216 -4802 -289 1083 -1452 -655 -584 -4874 1345 -4818 -3891 964 1488 2130 -3049 -310 107 -2012 -4985 -1334 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -13 -11286 -12328 -894 -1115 -701 -1378 * * - 22 -45 1344 -1667 -843 2933 -2146 400 582 -4479 -1948 -2709 -506 -5117 -436 -1764 -4119 -3523 -96 215 2616 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -13 -11286 -12328 -894 -1115 -701 -1378 * * - 23 -556 -4294 -4426 -1796 -273 3377 -4149 -4100 -4273 -2279 -3695 -562 298 -4067 -4575 -1940 -3954 -3921 -4866 -77 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -13 -11286 -12328 -894 -1115 -701 -1378 * * - 24 -376 -4801 -143 1004 -1426 805 279 -1771 821 -1486 -3890 -527 2002 126 45 -287 -1679 -617 -4985 -4302 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -13 -11286 -12328 -894 -1115 -701 -1378 * * - 25 -3608 -178 -1585 -1970 660 -5154 -4024 2773 -894 -985 -386 -4796 -1707 -4528 -4707 -609 -1823 2145 -3893 -1100 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -13 -11286 -12328 -894 -1115 -701 -1378 * * - 26 -673 -173 -3429 1042 -4598 -2161 -3110 535 1570 9 283 -508 -4517 -255 382 -1924 313 1407 -4706 -4127 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -13 -11286 -12328 -894 -1115 -701 -1378 * * - 27 -1211 -4799 1518 768 -5119 -1218 -441 -945 -1312 -2414 -587 909 -4396 -1010 534 1815 78 -487 -4983 -128 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -13 -11286 -12328 -894 -1115 -701 -1378 * * - 28 1271 2236 -5933 -5299 810 -2278 -651 1901 -1970 -221 -2639 -1497 -5203 -4524 -629 -638 -1577 1521 -3894 -1008 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -13 -11286 -12328 -894 -1115 -701 -1378 * * - 29 -1909 -4796 153 441 -1513 -4304 -599 -1894 1709 25 -3886 689 -1498 243 1438 -189 -879 380 -126 -255 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -13 -11286 -12329 -894 -1115 -701 -1378 * * - 30 -1277 -3441 -5893 -1776 -1155 -5147 -513 1829 -1993 1189 1888 -1484 -703 -4503 -1652 -1974 -3546 2209 -3898 -3554 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -14 -11287 -12329 -894 -1115 -701 -1378 * * - 31 -1299 746 -5893 -1992 -1190 -5147 -524 1691 424 -60 2330 -4774 111 -4503 -132 248 -1571 1419 -3898 -19 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -14 -11287 -12329 -894 -1115 -701 -1378 * * - 32 -3370 -4477 -3387 50 -560 -1979 -449 -51 1375 -681 233 1068 701 -1040 1343 -1845 543 -480 -10 1246 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -240 -11287 -2797 -894 -1115 -701 -1378 * * - 33 -3122 -4595 3395 -593 -4916 -1399 589 -1433 360 -4611 -290 780 -1313 35 -1369 -1782 -3061 -1712 -4778 -4095 - - -151 -504 236 42 -380 396 122 -618 211 -468 -714 274 392 45 98 355 123 -373 -299 -248 - - -841 -2976 -1709 -1966 -426 -3668 -118 * * - 34 -452 -4116 -568 -735 -4435 -1350 -2280 -1270 1458 -4131 792 -2257 1620 415 1996 479 -765 -1327 -4300 -538 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -15 -10529 -11571 -894 -1115 -1180 -840 * * - 35 272 -4448 -1054 1495 -1086 -283 -2616 -726 380 -1231 -3538 1286 -4050 1395 -988 154 68 50 -4633 -876 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -14 -10906 -11948 -894 -1115 -2229 -346 * * - 36 -3050 -4521 457 -2349 -4841 -1681 65 -1545 404 -2305 -3610 996 -1241 -714 -1055 -351 3167 -4143 -4705 -4022 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -14 -10981 -12024 -894 -1115 -2036 -403 * * - 37 -943 -4583 277 -486 -4904 2690 -181 -1421 829 -2551 -758 866 -4177 -751 11 -804 -1361 -4205 -4766 -4084 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -15 -11049 -12091 -894 -1115 -2632 -254 * * - 38 -1544 -4606 -1206 -627 -1238 -1111 -220 -4677 1841 -1463 -537 -311 146 1310 2236 252 -1424 -1820 -4789 -1025 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -15 -11074 -12116 -894 -1115 -1795 -490 * * - 39 -871 902 -3255 -2704 -1212 -2110 605 -4156 -647 -1293 101 192 1442 -2552 91 2587 -171 -3858 -4584 -3996 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -15 -11128 -12170 -894 -1115 -1064 -938 * * - 40 -3251 -4717 -597 -2552 -1539 -1882 45 -4784 2499 -1083 -3807 -1125 -312 -892 2672 -1497 -649 -1932 -4902 -1040 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -15 -11202 -12244 -894 -1115 -158 -3269 * * - 41 -4425 -5751 -1160 -3492 -6118 3496 -552 -1896 -1318 -2596 -4883 -434 -258 -3375 -548 -4283 -4348 -5409 -5833 -5262 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -15 -11288 -12330 -894 -1115 -701 -1378 * * - 42 -3608 -96 -1795 -5308 3204 -5154 498 -1086 -989 -1857 1406 -4797 -5204 -807 -4709 -4238 -268 -366 187 3035 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -15 -11288 -12330 -894 -1115 -701 -1378 * * - 43 2573 2359 -7700 -8052 -7623 2634 -6965 -7447 -7655 -7712 -6731 -6019 -5985 -7072 -7238 -2014 -4755 -2203 -7845 -7842 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -15 -11288 -12330 -894 -1115 -701 -1378 * * - 44 -1896 -3552 -6072 -5447 4093 -5277 -4115 -1389 -5044 -1849 -2748 -4920 -5327 -4660 -4842 -2020 -787 -772 -3948 1996 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -15 -11288 -12330 -894 -1115 -701 -1378 * * - 45 -2123 1258 -8228 -7927 -5768 -8106 -8270 1951 -7921 -982 -4434 -7761 -7830 -7926 -8131 -7503 -5516 3355 -7605 -7039 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -16 -11288 -12331 -894 -1115 -701 -1378 * * - 46 -1158 -4801 136 2359 -5122 -4302 -508 -644 437 -2559 -3890 628 -4395 -213 172 18 1464 -2067 -4985 -1086 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -16 -11289 -12331 -894 -1115 -701 -1378 * * - 47 -7925 -6836 -8294 -8655 4067 -8176 -4357 -6786 -8211 -6080 795 -6785 -8028 -6925 -7569 -7427 -7774 -6956 -3603 3066 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -16 -11289 -12331 -894 -1115 -701 -1378 * * - 48 -633 -4801 851 2019 -1639 -2148 879 -1118 1178 -2414 -3891 -481 -71 241 -1485 -232 744 -569 -4985 -4302 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -16 -11289 -12331 -894 -1115 -701 -1378 * * - 49 -3331 -4805 2054 434 -5126 -1882 -432 -4877 377 -4821 -3894 2009 -4398 -269 -1336 1291 1198 -1970 -4988 -4305 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -16 -11289 -12331 -894 -1115 -701 -1378 * * - 50 -638 -4800 -1786 1796 -5120 -1884 1628 -1952 812 -444 -621 -1191 1228 530 -672 8 -873 45 -4983 -276 - - -149 -500 232 43 -381 398 105 -627 210 -466 -721 277 393 45 95 359 119 -370 -295 -239 - - -38 -6076 -12331 -1893 -453 -701 -1378 * * - 51 243 -4801 1218 2315 -5122 -1551 -485 -1640 -795 -2479 -783 -420 -685 -1027 1035 415 -3268 -631 -23 -4302 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -16 -11289 -12332 -894 -1115 -701 -1378 * * - 52 415 694 2467 1155 -1401 -4334 -490 -1800 -2599 -4689 -637 -384 -1759 -12 -3098 1144 -834 -569 -4907 -271 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -17 -11290 -12332 -894 -1115 -701 -1378 * * - 53 2846 -3442 -1698 -5254 -979 -5146 -4014 -750 -4864 -773 1875 -4771 -5197 -1456 -1779 -127 -329 428 -3898 -3555 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -17 -11290 -12332 -894 -1115 -701 -1378 * * - 54 581 -4801 1239 1462 -5122 -1606 -432 -367 1251 -1623 -3891 335 -4395 1283 -110 -3209 753 -1920 -4985 -4302 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -17 -11290 -12332 -894 -1115 -701 -1378 * * - 55 686 -4798 937 304 -1378 -4303 -437 -1924 2219 -1669 -621 828 -4396 -1012 742 0 -1608 -1126 -4982 -1015 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -17 -11290 -12332 -894 -1115 -701 -1378 * * - 56 3420 863 -7680 -7410 -5526 -6323 -6681 -57 -7168 -2455 -4425 -6591 -6708 -6875 -7058 -2256 -4981 -4 -6573 -6193 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -17 -11290 -12332 -894 -1115 -701 -1378 * * - 57 -2038 -3436 -5943 -5308 -1145 -5154 -4025 2255 423 1498 1203 -4797 -1707 -478 -1267 -2117 -3548 1450 -3893 -931 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -18 -11291 -12333 -894 -1115 -701 -1378 * * - 58 622 -4802 1764 1486 -5123 -4302 -2961 -1060 334 -4818 -3891 -420 -4396 1293 1148 487 -3268 -1087 -4985 -429 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -102 -11291 -4156 -894 -1115 -701 -1378 * * - 59 1265 -231 -1498 1351 -5045 -262 -355 -4796 922 -1073 -3813 778 -4318 877 -34 53 386 -2030 289 -4225 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -18 -11207 -12249 -894 -1115 -160 -3250 * * - 60 -684 813 -5723 -473 532 -2124 -3981 -2958 -121 2114 2840 -1421 -5174 -4409 -926 -4196 -1685 -376 -3915 497 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -18 -11291 -12333 -894 -1115 -701 -1378 * * - 61 -1812 -4803 1626 -749 -515 -1133 -415 -4875 -1294 -4819 -3892 3181 -793 1470 -1377 -246 -3268 -4425 -4986 -193 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -18 -11291 -12333 -894 -1115 -701 -1378 * * - 62 -1812 -4808 -1465 33 -1509 2998 1583 -4879 122 -4823 -3897 972 -4400 -1078 -3055 -1613 -682 -4429 -4991 -1114 - - -149 -500 232 43 -378 398 105 -627 212 -466 -721 275 393 45 98 359 117 -367 -295 -250 - - -98 -4229 -12334 -49 -4901 -701 -1378 * * - 63 -676 -4701 -742 -1422 825 -589 -545 255 1702 -2571 812 -2986 -4424 796 418 -221 1302 -1179 -4912 1028 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -19 -11292 -12334 -894 -1115 -701 -1378 * * - 64 -3341 -4695 350 1378 -1551 -1973 -2998 477 1265 78 273 -1163 21 504 -1507 -1108 282 114 -19 473 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -19 -11292 -12334 -894 -1115 -701 -1378 * * - 65 -3605 -3444 -949 -2090 2356 -1177 -4010 1410 -1703 1341 -404 -1673 -747 -4487 -4679 -2139 -1048 1197 -3900 411 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -19 -11292 -12334 -894 -1115 -701 -1378 * * - 66 -655 -539 1179 279 -1324 1202 -2962 -1895 147 -682 1298 1427 -2056 608 756 -1119 -1893 -4419 -4982 140 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -19 -11292 -12335 -894 -1115 -701 -1378 * * - 67 -1814 -4814 166 -2636 -5135 2921 -568 -4885 -1333 -2415 -3903 1495 -4406 -312 -619 602 -1672 -4436 -4997 -4314 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -20 -11293 -12335 -894 -1115 -701 -1378 * * - 68 -3329 1217 -624 -797 -1594 -4303 1580 -4872 2069 -2414 -3890 617 -4396 283 2449 -560 -267 -2067 -4984 -1334 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -20 -11293 -12335 -894 -1115 -701 -1378 * * - 69 108 566 -1460 747 -1608 -4306 -2965 -30 1407 -2607 -3878 346 1033 -336 863 -1038 745 617 -4975 -4296 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -20 -11293 -12335 -894 -1115 -701 -1378 * * - 70 -1318 -3465 -283 -172 -3423 -2053 -3974 1957 -4721 1761 1425 -4678 -1762 -4391 -1578 -1974 -1561 1341 -3918 -3570 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -20 -11293 -12336 -894 -1115 -701 -1378 * * - 71 -1165 -4790 -240 -275 -5105 -4306 1035 -2009 1665 -395 707 -1334 -218 -188 1891 -1077 -383 404 110 348 - - -149 -500 233 43 -381 398 106 -626 210 -464 -720 275 394 45 96 359 117 -369 -294 -249 - - -43 -6001 -12336 -150 -3342 -701 -1378 * * - 72 -1929 1218 -1535 -1647 -3990 -4677 -3410 1725 207 -1481 -3117 -3608 -810 -1118 -743 -1942 428 2687 -4325 -3869 - - * * * * * * * * * * * * * * * * * * * * - - * * * * * * * * 0 -// diff --git a/forester/archive/RIO/others/hmmer/testsuite/weeviterbi_test.seq b/forester/archive/RIO/others/hmmer/testsuite/weeviterbi_test.seq deleted file mode 100644 index 3a8750f..0000000 --- a/forester/archive/RIO/others/hmmer/testsuite/weeviterbi_test.seq +++ /dev/null @@ -1,10 +0,0 @@ ->RU1A_HUMAN U1 SMALL NUCLEAR RIBONUCLEOPROTEIN A (U1 SNRNP A PROTEIN). -MAVPETRPNHTIYINNLNEKIKKDELKKSLYAIFSQFGQILDILVSRSLK -MRGQAFVIFKEVSSATNALRSMQGFPFYDKPMRIQYAKTDSDIIAKMKGT -FVERDRKREKRKPKSQETPATKKAVQGGGATPVVGAVQGPVPGMPPMTQA -PRIMHHMPGQPPYMPPPGMIPPPGLAPGQIPPGAMPPQQLMPGQMPPAQP ->RU1A_HUMAN U1 SMALL NUCLEAR RIBONUCLEOPROTEIN A (U1 SNRNP A PROTEIN). -TFVERDRKREKRKPKSQETPATKKAVQGGGATPVVGAVQGPVPGMPPMTQ -APRIMHHMPGQPPYMPPPGMIPPPGLAPGQIPPGAMPPQQLMPGQMPPAQ -PLSENPPNHILFLTNLPEETNELMLSMLFNQFPGFKEVRLVPGRHDIAFV -EFDNEVQAGAARDALQGFKITQNNAMKISFAKK diff --git a/forester/archive/RIO/others/hmmer/tutorial/7LES_DROME b/forester/archive/RIO/others/hmmer/tutorial/7LES_DROME deleted file mode 100644 index 885cccf..0000000 --- a/forester/archive/RIO/others/hmmer/tutorial/7LES_DROME +++ /dev/null @@ -1,138 +0,0 @@ -ID 7LES_DROME STANDARD; PRT; 2554 AA. -AC P13368; -DT 01-JAN-1990 (REL. 13, CREATED) -DT 01-JAN-1990 (REL. 13, LAST SEQUENCE UPDATE) -DT 01-NOV-1995 (REL. 32, LAST ANNOTATION UPDATE) -DE SEVENLESS PROTEIN (EC 2.7.1.112). -GN SEV. -OS DROSOPHILA MELANOGASTER (FRUIT FLY). -OC EUKARYOTA; METAZOA; ARTHROPODA; INSECTA; DIPTERA. -RN [1] -RP SEQUENCE FROM N.A. -RC STRAIN=CANTON-S; -RX MEDLINE; 88282538. -RA BASLER K., HAFEN E.; -RL CELL 54:299-311(1988). -RN [2] -RP SEQUENCE FROM N.A. -RC STRAIN=OREGON-R; -RX MEDLINE; 88329706. -RA BOWTELL D.L.L., SIMON M.A., RUBIN G.M.; -RL GENES DEV. 2:620-634(1988). -RN [3] -RP IDENTIFICATION OF FN-III REPEATS. -RX MEDLINE; 90199889. -RA NORTON P.A., HYNES R.O., RESS D.J.G.; -RL CELL 61:15-16(1990). -CC -!- FUNCTION: RECEPTOR FOR AN EXTRACELLULAR SIGNAL REQUIRED TO -CC INSTRUCT A CELL TO DIFFERENTIATE INTO A R7 PHOTORECEPTOR. THE -CC LIGAND FOR SEV IS THE BOSS (BRIDE OF SEVENLESS) PROTEIN ON THE -CC SURFACE OF THE NEIGHBORING R8 CELL. -CC -!- CATALYTIC ACTIVITY: ATP + A PROTEIN TYROSINE = ADP + -CC PROTEIN TYROSINE PHOSPHATE. -CC -!- SUBUNIT: MAY FORM A COMPLEX WITH DRK AND SOS. -CC -!- SIMILARITY: BELONGS TO THE INSULIN RECEPTOR FAMILY OF TYROSINE- -CC PROTEIN KINASES. -CC -!- SIMILARITY: CONTAINS SEVEN FIBRONECTIN TYPE III-LIKE DOMAINS. -CC -!- CAUTION: UNCLEAR WHETHER THE POTENTIAL MEMBRANE SPANNING REGION -CC NEAR THE N-TERMINUS IS PRESENT AS A TRANSMEMBRANE DOMAIN IN THE -CC NATIVE PROTEIN OR SERVES AS A CLEAVED SIGNAL SEQUENCE. -DR EMBL; X13666; G8579; ALT_INIT. -DR EMBL; J03158; G158419; -. -DR PIR; A28912; TVFF7L. -DR FLYBASE; FBGN0003366; SEV. -DR PROSITE; PS00107; PROTEIN_KINASE_ATP. -DR PROSITE; PS00109; PROTEIN_KINASE_TYR. -DR PROSITE; PS00239; RECEPTOR_TYR_KIN_II. -DR PROSITE; PS50011; PROTEIN_KINASE_DOM. -KW TRANSFERASE; TYROSINE-PROTEIN KINASE; TRANSMEMBRANE; ATP-BINDING; -KW PHOSPHORYLATION; RECEPTOR; VISION; REPEAT. -FT DOMAIN 1 2123 EXTRACELLULAR (POTENTIAL). -FT TRANSMEM 102 122 POTENTIAL. -FT TRANSMEM 2124 2147 POTENTIAL. -FT DOMAIN 2148 2554 CYTOPLASMIC (POTENTIAL). -FT DOMAIN 311 431 FIBRONECTIN TYPE-III. -FT DOMAIN 436 528 FIBRONECTIN TYPE-III. -FT DOMAIN 822 921 FIBRONECTIN TYPE-III. -FT DOMAIN 1298 1392 FIBRONECTIN TYPE-III. -FT DOMAIN 1680 1794 FIBRONECTIN TYPE-III. -FT DOMAIN 1797 1897 FIBRONECTIN TYPE-III. -FT DOMAIN 1898 1988 FIBRONECTIN TYPE-III. -FT DOMAIN 2038 2046 POLY-ARG. -FT DOMAIN 2209 2485 PROTEIN KINASE. -FT NP_BIND 2215 2223 ATP (BY SIMILARITY). -FT BINDING 2242 2242 ATP (BY SIMILARITY). -FT MUTAGEN 2242 2242 K->M: INACTIVATES THE PROTEIN. -FT MOD_RES 2380 2380 PHOSPHORYLATION (AUTO-) (BY SIMILARITY). -FT CARBOHYD 30 30 POTENTIAL. -FT CARBOHYD 129 129 POTENTIAL. -FT CARBOHYD 481 481 POTENTIAL. -FT CARBOHYD 505 505 POTENTIAL. -FT CARBOHYD 617 617 POTENTIAL. -FT CARBOHYD 647 647 POTENTIAL. -FT CARBOHYD 966 966 POTENTIAL. -FT CARBOHYD 1228 1228 POTENTIAL. -FT CARBOHYD 1313 1313 POTENTIAL. -FT CARBOHYD 1353 1353 POTENTIAL. -FT CARBOHYD 1550 1550 POTENTIAL. -FT CARBOHYD 1557 1557 POTENTIAL. -FT CARBOHYD 1639 1639 POTENTIAL. -FT CARBOHYD 1725 1725 POTENTIAL. -FT CARBOHYD 1756 1756 POTENTIAL. -FT CARBOHYD 1804 1804 POTENTIAL. -FT CARBOHYD 1889 1889 POTENTIAL. -FT CARBOHYD 1947 1947 POTENTIAL. -FT CARBOHYD 2073 2073 POTENTIAL. -FT VARIANT 392 392 M -> V. -FT VARIANT 1668 1668 A -> V. -FT VARIANT 1703 1703 N -> H. -FT VARIANT 1730 1730 R -> K. -FT VARIANT 1731 1731 G -> E. -FT VARIANT 1741 1741 V -> M. -FT VARIANT 2271 2271 R -> C. -FT CONFLICT 1823 1823 E -> Q (IN REF. 2). -SQ SEQUENCE 2554 AA; 287107 MW; 1143D891 CRC32; - MTMFWQQNVD HQSDEQDKQA KGAAPTKRLN ISFNVKIAVN VNTKMTTTHI NQQAPGTSSS - SSNSQNASPS KIVVRQQSSS FDLRQQLARL GRQLASGQDG HGGISTILII NLLLLILLSI - CCDVCRSHNY TVHQSPEPVS KDQMRLLRPK LDSDVVEKVA IWHKHAAAAP PSIVEGIAIS - SRPQSTMAHH PDDRDRDRDP SEEQHGVDER MVLERVTRDC VQRCIVEEDL FLDEFGIQCE - KADNGEKCYK TRCTKGCAQW YRALKELESC QEACLSLQFY PYDMPCIGAC EMAQRDYWHL - QRLAISHLVE RTQPQLERAP RADGQSTPLT IRWAMHFPEH YLASRPFNIQ YQFVDHHGEE - LDLEQEDQDA SGETGSSAWF NLADYDCDEY YMCEILEALI PYTQYRFRFE LPFGENRDEV - LYSPATPAYQ TPPEGAPISA PVIEHLMGLD DSHLAVHWHP GRFTNGPIEG YRLRLSSSEG - NATSEQLVPA GRGSYIFSQL QAGTNYTLAL SMINKQGEGP VAKGFVQTHS ARNEKPAKDL - TESVLLVGRR AVMWQSLEPA GENSMIYQSQ EELADIAWSK REQQLWLLNV HGELRSLKFE - SGQMVSPAQQ LKLDLGNISS GRWVPRRLSF DWLHHRLYFA MESPERNQSS FQIISTDLLG - ESAQKVGESF DLPVEQLEVD ALNGWIFWRN EESLWRQDLH GRMIHRLLRI RQPGWFLVQP - QHFIIHLMLP QEGKFLEISY DGGFKHPLPL PPPSNGAGNG PASSHWQSFA LLGRSLLLPD - SGQLILVEQQ GQAASPSASW PLKNLPDCWA VILLVPESQP LTSAGGKPHS LKALLGAQAA - KISWKEPERN PYQSADAARS WSYELEVLDV ASQSAFSIRN IRGPIFGLQR LQPDNLYQLR - VRAINVDGEP GEWTEPLAAR TWPLGPHRLR WASRQGSVIH TNELGEGLEV QQEQLERLPG - PMTMVNESVG YYVTGDGLLH CINLVHSQWG CPISEPLQHV GSVTYDWRGG RVYWTDLARN - CVVRMDPWSG SRELLPVFEA NFLALDPRQG HLYYATSSQL SRHGSTPDEA VTYYRVNGLE - GSIASFVLDT QQDQLFWLVK GSGALRLYRA PLTAGGDSLQ MIQQIKGVFQ AVPDSLQLLR - PLGALLWLER SGRRARLVRL AAPLDVMELP TPDQASPASA LQLLDPQPLP PRDEGVIPMT - VLPDSVRLDD GHWDDFHVRW QPSTSGGNHS VSYRLLLEFG QRLQTLDLST PFARLTQLPQ - AQLQLKISIT PRTAWRSGDT TRVQLTTPPV APSQPRRLRV FVERLATALQ EANVSAVLRW - DAPEQGQEAP MQALEYHISC WVGSELHEEL RLNQSALEAR VEHLQPDQTY HFQVEARVAA - TGAAAGAASH ALHVAPEVQA VPRVLYANAE FIGELDLDTR NRRRLVHTAS PVEHLVGIEG - EQRLLWVNEH VELLTHVPGS APAKLARMRA EVLALAVDWI QRIVYWAELD ATAPQAAIIY - RLDLCNFEGK ILQGERVWST PRGRLLKDLV ALPQAQSLIW LEYEQGSPRN GSLRGRNLTD - GSELEWATVQ PLIRLHAGSL EPGSETLNLV DNQGKLCVYD VARQLCTASA LRAQLNLLGE - DSIAGQLAQD SGYLYAVKNW SIRAYGRRRQ QLEYTVELEP EEVRLLQAHN YQAYPPKNCL - LLPSSGGSLL KATDCEEQRC LLNLPMITAS EDCPLPIPGV RYQLNLTLAR GPGSEEHDHG - VEPLGQWLLG AGESLNLTDL LPFTRYRVSG ILSSFYQKKL ALPTLVLAPL ELLTASATPS - PPRNFSVRVL SPRELEVSWL PPEQLRSESV YYTLHWQQEL DGENVQDRRE WEAHERRLET - AGTHRLTGIK PGSGYSLWVQ AHATPTKSNS SERLHVRSFA ELPELQLLEL GPYSLSLTWA - GTPDPLGSLQ LECRSSAEQL RRNVAGNHTK MVVEPLQPRT RYQCRLLLGY AATPGAPLYH - GTAEVYETLG DAPSQPGKPQ LEHIAEEVFR VTWTAARGNG APIALYNLEA LQARSDIRRR - RRRRRRNSGG SLEQLPWAEE PVVVEDQWLD FCNTTELSCI VKSLHSSRLL LFRVRARSLE - HGWGPYSEES ERVAEPFVSP EKRGSLVLAI IAPAAIVSSC VLALVLVRKV QKRRLRAKKL - LQQSRPSIWS NLSTLQTQQQ LMAVRNRAFS TTLSDADIAL LPQINWSQLK LLRFLGSGAF - GEVYEGQLKT EDSEEPQRVA IKSLRKGASE FAELLQEAQL MSNFKHENIV RLVGICFDTE - SISLIMEHME AGDLLSYLRA ARATSTQEPQ PTAGLSLSEL LAMCIDVANG CSYLEDMHFV - HRDLACRNCL VTESTGSTDR RRTVKIGDFG LARDIYKSDY YRKEGEGLLP VRWMSPESLV - DGLFTTQSDV WAFGVLCWEI LTLGQQPYAA RNNFEVLAHV KEGGRLQQPP MCTEKLYSLL - LLCWRTDPWE RPSFRRCYNT LHAISTDLRR TQMASATADT VVSCSRPEFK VRFDGQPLEE - HREHNERPED ENLTLREVPL KDKQLYANEG VSRL -// diff --git a/forester/archive/RIO/others/hmmer/tutorial/Artemia.fa b/forester/archive/RIO/others/hmmer/tutorial/Artemia.fa deleted file mode 100644 index 339a71b..0000000 --- a/forester/archive/RIO/others/hmmer/tutorial/Artemia.fa +++ /dev/null @@ -1,48 +0,0 @@ ->S13421 S13421 GLOBIN - BRINE SHRIMP -DKATIKRTWATVTDLPSFGRNVFLSVFAAK -PEYKNLFVEFRNIPASELASSERLLYHGGR -VLSSIDEAIAGIDTPDRAVKTLLALGERHI -SRGTVRRHFEAFSYAFIDELKQRGVESADL -AAWRRGWDNIVNVLEAGLLRRQIDLEVTGL -SCVDVANIQESWSKVSGDLKTTGSVVFQRM -INGHPEYQQLFRQFRDVDLDKLGESNSFVA -HVFRVVAAFDGIIHELDNNQFIVSTLKKLG -EQHIARGTDISHFQNFRVTLLEYLKENGMN -GAQKASWNKAFDAFEKYISMGLSSLKRVDP -ITGLSGLEKNAILSTWGKVRGNLQEVGKAT -FGKLFTAHPEYQQMFRFSQGMPLASLVESP -KFAAHTQRVVSALDQTLLALNRPSDFVYMI -KELGLDHINRGTDRSHFENYQVVFIEYLKE -TLGDSLDEFTVKSFNHVFEVIISFLNEGLR -QADIVDPVTHLTGRQKEMIKASWSKARTDL -RSLGQELFMRMFKAHPEYQTLFVNKGFADV -PLVSLREDERFISHMANVLGGFDTLLQNLD -ESSYFIYSLRNLGDAHIQRKAGTQHFRSFE -AILIPILQESQGLDAASVEAWKKFFDVSIG -VIAQGLKVATSEEADPVTGLYGKEIVALRQ -AFAAVTPRNVEIGKRVFAKLFAAHPEYKNL -FKKFEQYSVEELPSTDAFHYHISLVMNRFS -SIGKVIDDNVSFVYLLKKLGREHIKRGLSR -KQFDQFVELYIAEISSELSDTGRNGLEKVL -TFATGVIEQGLFQLGQVDSNTLTALEKQSI -QDIWSNLRSTGLQDLAVKIFTRLFSAHPEY -KLLFTGRFGNVDNINENAPFKAHLHRVLSA -FDIVISTLDDSEHLIRQLKDLGLFHTRLGM -TRSHFDNFATAFLSVAQDIAPNQLTVLGRE -SLNKGFKLMHGVIEEGLLQLERINPITGLS -AREVAVVKQTWNLVKPDLMGVGMRIFKSLF -EAFPAYQAVFPKFSDVPLDKLEDTPAVGKH -SISVTTKLDELIQTLDEPANLALLARQLGE -DHIVLRVNKPMFKSFGKVLVRLLENDLGQR -FSSFASRSWHKAYDVIVEYIEEGLQQSYKQ -DPVTGITDAEKALVQESWDLLKPDLLGLGR -KIFTKVFTKHPDYQILFTRTGFGDTPLTKL -DDNPAFGTHIIKVMRAFDHVIQILGKPKTL -MAYLRSVGADHIATNVERRHFQAFSNALIP -VMQHDLKAQLRPDAVAAWRKGLDRIIGIID -QGLIGLKEVNPQNAFSAYDIQAVQRTWALA -KPDLMGKGAMVFKQLFTDHGYQPLFSNLAQ -YEITGLEGSPELNTHARNVMAQLDTLVGSL -QNSIELGQSLAQLGKDHVPRKVNRVHFKDF -AEHFIPLMKADLGDEFTPLAESAWKRAFDV -MIATIEQGQEGSSHALSSFLTNPVA diff --git a/forester/archive/RIO/others/hmmer/tutorial/RU1A_HUMAN b/forester/archive/RIO/others/hmmer/tutorial/RU1A_HUMAN deleted file mode 100644 index 412e284..0000000 --- a/forester/archive/RIO/others/hmmer/tutorial/RU1A_HUMAN +++ /dev/null @@ -1,98 +0,0 @@ -ID RU1A_HUMAN STANDARD; PRT; 282 AA. -AC P09012; -DT 01-NOV-1988 (REL. 09, CREATED) -DT 01-NOV-1988 (REL. 09, LAST SEQUENCE UPDATE) -DT 01-OCT-1996 (REL. 34, LAST ANNOTATION UPDATE) -DE U1 SMALL NUCLEAR RIBONUCLEOPROTEIN A (U1 SNRNP A PROTEIN). -GN SNRPA. -OS HOMO SAPIENS (HUMAN). -OC EUKARYOTA; METAZOA; CHORDATA; VERTEBRATA; TETRAPODA; MAMMALIA; -OC EUTHERIA; PRIMATES. -RN [1] -RP SEQUENCE FROM N.A. -RC TISSUE=LIVER; -RX MEDLINE; 91340152. -RA NELISSEN R.L.H., SILLEKENS P.T.G., BEIJER R.P., -RA GEURTS VAN KESSEL A.H.M., VAN VENROOIJ W.J.; -RL GENE 102:189-196(1991). -RN [2] -RP SEQUENCE FROM N.A. -RX MEDLINE; 88111575. -RA SILLEKENS P.T.G., HABETS W.J., BEIJER R.P., VAN VENROOIJ W.J.; -RL EMBO J. 6:3841-3848(1987). -RN [3] -RP X-RAY CRYSTALLOGRAPHY (2.8 ANGSTROMS) OF 1-95. -RX MEDLINE; 91061907. -RA NAGAI K., OUBRIDGE C., JESSEN T.-H., LI J., EVANS P.R.; -RL NATURE 348:515-520(1990). -RN [4] -RP X-RAY CRYSTALLOGRAPHY (1.92 ANGSTROMS). -RX MEDLINE; 95075454. -RA OUBRIDGE C., ITO N., EVANS P.R., TEO C.-H., NAGAI K.; -RL NATURE 372:432-438(1994). -RN [5] -RP STRUCTURE BY NMR OF 11-94. -RX MEDLINE; 91172834. -RA HOFFMAN D.W., QUERY C.C., GOLDEN B.L., WHITE S.W., KEENE J.D.; -RL PROC. NATL. ACAD. SCI. U.S.A. 88:2495-2499(1991). -RN [6] -RP STRUCTURE BY NMR OF 1-102. -RX MEDLINE; 94349935. -RA HOWE P.W.A., NAGAI K., NEUHAUS D., VARANI G.; -RL EMBO J. 13:3873-3881(1994). -RN [7] -RP STRUCTURE BY NMR OF 2-102. -RX MEDLINE; 96186818. -RA ALLAIN F.H.-T., GUBSER C.C., HOWE P.W.A., NAGAI K., NEUHAUS D., -RA VARANI G.; -RL NATURE 380:646-650(1996). -RN [8] -RP STRUCTURE BY NMR OF 1-117. -RX MEDLINE; 96180024. -RA AVIS J.M., ALLAIN F.H.-T., HOWE P.W.A., VARANI G., NAGAI K., -RA NEUHAUS D.; -RL J. MOL. BIOL. 257:398-411(1996). -RN [9] -RP MUTAGENESIS, AND DETAILED STUDIES OF RNA-BINDING. -RX MEDLINE; 92007796. -RA JESSEN T.-H., OUBRIDGE C., TEO C.H., PRITCHARD C., NAGAI K.; -RL EMBO J. 10:3447-3456(1991). -CC -!- FUNCTION: BINDS STEM LOOP II OF U1 SNRNA. IT IS THE FIRST SN-RNP -CC TO INTERACT WITH PRE-MRNA. THIS INTERACTION IS REQUIRED FOR THE -CC SUBSEQUENT BINDING OF U2 SN-RNP AND THE U4/U6/U5 TRI-SN-RNP. -CC -!- SUBUNIT: BELONGS TO THE SPLICEOSOME WHERE IT IS ASSOCIATED WITH -CC SN-RNP U1. -CC -!- SUBCELLULAR LOCATION: NUCLEAR. -CC -!- SIMILARITY: BELONGS TO THE U1 A/B" FAMILY. -CC -!- SIMILARITY: CONTAINS 2 RNA RECOGNITION MOTIFS (RNP). -DR EMBL; M60784; G340052; -. -DR EMBL; M60779; G340052; JOINED. -DR EMBL; M60780; G340052; JOINED. -DR EMBL; M60781; G340052; JOINED. -DR EMBL; M60782; G340052; JOINED. -DR EMBL; M60783; G340052; JOINED. -DR EMBL; X06347; G37541; -. -DR PIR; JQ1528; JQ1528. -DR PDB; 1NRC; 31-JAN-94. -DR MIM; 182285; -. -DR PROSITE; PS00030; RNP_1. -KW NUCLEAR PROTEIN; RNA-BINDING; RIBONUCLEOPROTEIN; REPEAT; -KW SPLICEOSOME; 3D-STRUCTURE. -FT DOMAIN 12 17 RNA-BINDING (RNP2) (BY SIMILARITY). -FT DOMAIN 52 59 RNA-BINDING (RNP1) (BY SIMILARITY). -FT DOMAIN 210 215 RNA-BINDING (RNP2) (BY SIMILARITY). -FT DOMAIN 244 251 RNA-BINDING (RNP1) (BY SIMILARITY). -FT REPEAT 1 89 -FT REPEAT 199 282 -FT MUTAGEN 11 11 T->V: ABOLISHES RNA-BINDING. -FT MUTAGEN 13 13 Y->F: SUBSTANTIALLY REDUCES RNA-BINDING. -FT MUTAGEN 15 15 N->V: ABOLISHES RNA-BINDING. -FT MUTAGEN 16 16 N->V: SUBSTANTIALLY REDUCES RNA-BINDING. -FT MUTAGEN 52 52 R->Q: ABOLISHES RNA-BINDING. -SQ SEQUENCE 282 AA; 31279 MW; 22427816 CRC32; - MAVPETRPNH TIYINNLNEK IKKDELKKSL YAIFSQFGQI LDILVSRSLK MRGQAFVIFK - EVSSATNALR SMQGFPFYDK PMRIQYAKTD SDIIAKMKGT FVERDRKREK RKPKSQETPA - TKKAVQGGGA TPVVGAVQGP VPGMPPMTQA PRIMHHMPGQ PPYMPPPGMI PPPGLAPGQI - PPGAMPPQQL MPGQMPPAQP LSENPPNHIL FLTNLPEETN ELMLSMLFNQ FPGFKEVRLV - PGRHDIAFVE FDNEVQAGAA RDALQGFKIT QNNAMKISFA KK -// diff --git a/forester/archive/RIO/others/hmmer/tutorial/amino.null b/forester/archive/RIO/others/hmmer/tutorial/amino.null deleted file mode 100644 index f882173..0000000 --- a/forester/archive/RIO/others/hmmer/tutorial/amino.null +++ /dev/null @@ -1,30 +0,0 @@ -# amino.null -# -# Example of a null model file for protein sequences. -# The values in this file are the HMMER 2 default -# settings. - -Amino - -0.075520 # A -0.016973 # C -0.053029 # D -0.063204 # E -0.040762 # F -0.068448 # G -0.022406 # H -0.057284 # I -0.059398 # K -0.093399 # L -0.023569 # M -0.045293 # N -0.049262 # P -0.040231 # Q -0.051573 # R -0.072214 # S -0.057454 # T -0.065252 # V -0.012513 # W -0.031985 # Y - -0.997151 # p1 diff --git a/forester/archive/RIO/others/hmmer/tutorial/amino.pri b/forester/archive/RIO/others/hmmer/tutorial/amino.pri deleted file mode 100644 index 77c1d8c..0000000 --- a/forester/archive/RIO/others/hmmer/tutorial/amino.pri +++ /dev/null @@ -1,70 +0,0 @@ -# amino.pri -# -# This file incorporates Blocks9.plib, the UCSC mixture -# Dirichlet prior created by Kimmen Sjolander. -# The values in this file are the HMMER 2 default settings. - -Dirichlet # Strategy (mixture Dirichlet) -Amino # type of prior (Amino or Nucleic) - -# Transitions -1 # Single component -1.0 # with probability = 1.0 -0.7939 0.0278 0.0135 # m->m, m->i, m->d alpha's -0.1551 0.1331 # i->m, i->i alpha's -0.9002 0.5630 # d->m, d->d alpha's - -# Match emissions -# -9 # 9 components - -# Component 1 -0.178091 -0.270671 0.039848 0.017576 0.016415 0.014268 0.131916 0.012391 0.022599 0.020358 0.030727 0.015315 0.048298 0.053803 0.020662 0.023612 0.216147 0.147226 0.065438 0.003758 0.009621 -# S A T , C G P >< N V M , Q H R I K F L D W , E Y - -# Component 2 -0.056591 -0.021465 0.0103 0.011741 0.010883 0.385651 0.016416 0.076196 0.035329 0.013921 0.093517 0.022034 0.028593 0.013086 0.023011 0.018866 0.029156 0.018153 0.0361 0.07177 0.419641 -# Y , F W , H ,>< L M , N Q I C V S R , T P A K D G E - -# Component 3 -0.0960191 -0.561459 0.045448 0.438366 0.764167 0.087364 0.259114 0.21494 0.145928 0.762204 0.24732 0.118662 0.441564 0.174822 0.53084 0.465529 0.583402 0.445586 0.22705 0.02951 0.12109 -# Q E , K N R S H D T A >< M P Y G , V L I W C F - -# Component 4 -0.0781233 -0.070143 0.01114 0.019479 0.094657 0.013162 0.048038 0.077 0.032939 0.576639 0.072293 0.02824 0.080372 0.037661 0.185037 0.506783 0.073732 0.071587 0.042532 0.011254 0.028723 -# K R , Q , H >< N E T M S , P W Y A L G V C I , D F - -# Component 5 -0.0834977 -0.041103 0.014794 0.00561 0.010216 0.153602 0.007797 0.007175 0.299635 0.010849 0.999446 0.210189 0.006127 0.013021 0.019798 0.014509 0.012049 0.035799 0.180085 0.012744 0.026466 -# L M , I , F V ><, W Y C T Q , A P H R , K S E N , D G - -# Component 6 -0.0904123 -0.115607 0.037381 0.012414 0.018179 0.051778 0.017255 0.004911 0.796882 0.017074 0.285858 0.075811 0.014548 0.015092 0.011382 0.012696 0.027535 0.088333 0.94434 0.004373 0.016741 -# I V ,, L M >< C T A , F , Y S P W N , E Q K R D G H - -# Component 7 -0.114468 -0.093461 0.004737 0.387252 0.347841 0.010822 0.105877 0.049776 0.014963 0.094276 0.027761 0.01004 0.187869 0.050018 0.110039 0.038668 0.119471 0.065802 0.02543 0.003215 0.018742 -# D , E N , Q H S >< K G P T A , R Y , M V L F W I C - -# Component 8 -0.0682132 -0.452171 0.114613 0.06246 0.115702 0.284246 0.140204 0.100358 0.55023 0.143995 0.700649 0.27658 0.118569 0.09747 0.126673 0.143634 0.278983 0.358482 0.66175 0.061533 0.199373 -# M , V I L F T Y C A >< W S H Q R N K , P E G , D - -# Component 9 -0.234585 -0.005193 0.004039 0.006722 0.006121 0.003468 0.016931 0.003647 0.002184 0.005019 0.00599 0.001473 0.004158 0.009055 0.00363 0.006583 0.003172 0.00369 0.002967 0.002772 0.002686 -# P G W , C H R D E >< N Q K F Y T L A M , S V I - - -## Insert emissions -1 # Single component -1.0 # with probability 1.0 -681 120 623 651 313 902 241 371 687 676 143 548 647 415 551 926 623 505 102 269 diff --git a/forester/archive/RIO/others/hmmer/tutorial/fn3.slx b/forester/archive/RIO/others/hmmer/tutorial/fn3.slx deleted file mode 100644 index 631b108..0000000 --- a/forester/archive/RIO/others/hmmer/tutorial/fn3.slx +++ /dev/null @@ -1,351 +0,0 @@ -# ID fn3 -# AC PF00041 -# DE Fibronectin type III domain -# AU Sonnhammer ELL -# AL HMM_simulated_annealing -# AM hmma -qR -# SE Swissprot_feature_table -# GA Bic_raw 18 hmmls 20 -# CC There is no clear separation between signal and noise. -# DR PROSITE; PDOC00214; -# DR SCOP; 1ttf; sf; -# RN [1] -# RA Bazan J.F. -# RL PNAS USA 87:6934-6938(1990). -# RN [2] -# RA Little E., Bork P., Doolittle R. -# RL J. Mol. Evol. 39:631-643(1994). -# RN [3] -# RA Kornblihtt A.R., et al. -# RL EMBO J. 4:1755-1759(1985). -# SQ 109 -7LES_DROME/1799-1891 P.SPP.RNFSVRVL..SPRELEVSWLPPEq...LRSESVYYTLHW...QQ -7LES_DROVI/1917-1997 S.YAPlPPLQLIEL..NAYGMTLAWPGT......PDALSSLTLEC...QS -APU_THETY/928-1009 A.PQPiTDLKAVS...GNGKVDLSWSVV.......DKAVSYNI.YR...S -APU_THETY/1165-1248 P.TAP.V.LQQPGI..ESSRVTLNWSPSA....DDVAIFGYEIYK...SS -AXO1_CHICK/602-692 PpGPP.GGVVVRDI..GDTTVQLSWSRGFd...NHSPIARYSIEAR...T -AXO1_CHICK/807-896 PkVAP.FRVTAKAV..LSSEMDVSWEPVEqg.dMTGVLLGYEIRY...WK -CAML_HUMAN/812-907 P.QAI.PELEGIEIl.NSSAVLVKWRPVDla.qVKGHLRGYNVTY...WR -CHI1_BACCI/465-542 P.SVP.GNARSTGV..TANSVTLAWNAST....DNVGVTGYNV.YN.... -CHIT_STRLI/142-219 P.SAP.GTPTASNI..TDTSVKLSWSAAT....DDKGVKNYDV.LR.... -CHIX_STROI/169-240 P.PAPpTGLRTGSV..TATSVALSWSPV.......TGATGYAV.YR.... -CONT_CHICK/799-884 PtEVP.TDVSVKVL..SSSEISVSWHHVT.....EKSVEGYQIRY...WA -CPSF_CHICK/630-716 P.DPP.QSVRVTSV..GEDWAVLSWEAPPf..dGGMPITGYLMER...KK -CPSF_CHICK/923-1008 P.GPP.QAVRVMEV..WGSNALLQWEPPKd..dGNAEISGYTVQK...AD -ECK_HUMAN/329-420 P.SAP.HYLTAVGM..GA.KVELRWTPPQd..sGGREDIVYSVTCEqcWP -ECK_HUMAN/436-519 Q.TEP.PKVRLEGR..STTSLSVSWSIPPp...QQSRVWKYEVTYR...K -EPH1_HUMAN/333-435 P.SAP.RNLSFSA...SGTQLSLRWEPPAd..tGGRQDVRYSVRCS..QC -EPH3_CHICK/333-429 P.SAP.QAV.ISSV..NETSLMLEWTPPRd..sGGREDLVYNIIC...KS -EPH3_CHICK/444-528 P.SAV.SIMHQVSR..TVDSITLSWSQPDq...PNGVILDYELQY...YE -ETK1_CHICK/325-421 P.SAP.RNV.ISNI..NETSVILDWSWPLd..tGGRKDVTFNIIC...KK -FAS2_SCHAM/530-616 P.SAV.LQVKMDVM..TATTVTFKFFGPGn..dGGLPTKNYAVQY...KQ -FAS2_SCHAM/642-735 T.SGT.ENEVVVSP..YPNRYELRWQVPAd...NGEPITHYSVKS...CP -FINC_BOVIN/577-660 T.SGP.VQVIITETpsQPNSHPIQWSAPE.....SSHISKYILRW...KP -FINC_BOVIN/689-768 P.VVA.TSESVTEI..TASSFVVSWVSA......SDTVSGFRVEY...EL -FINC_BOVIN/780-858 P.DAP.PDPTVDQV..DDTSIVVRWSRP......RAPITGYRIVY...SP -FINC_BOVIN/875-955 KvPPP.RDLQFVEV..TDVKITIMWTPP......ESPVTGYRVDV...IP -FINC_BOVIN/1142-1225 PlSPP.TNLHLEANp.DTGVLTVSWERST.....TPDITGYRITT...TP -FINC_BOVIN/1236-1316 V.PPP.TDLRFTNV..GPDTMRVTWAPPS.....SIELTNLLVRY...SP -FINC_BOVIN/1327-1406 L.DSP.SGIDFSDI..TANSFTVHWIAP......RATITGYRIRHH...P -FINC_BOVIN/1417-1499 S.DVP.RDLEVIAA..TPTSLLISWDAP......AVTVRYYRITY...GE -FINC_BOVIN/1511-1590 I.DKP.SQMQVTDV..QDNSISVRWLPS......SSPVTGYRVTT...AP -FINC_BOVIN/1601-1680 I.PAP.TNLKFTQV..TPTSLTAQWTAP......NVQLTGYRVRV...TP -FINC_BOVIN/1693-1771 V.SPP.RRARVTDA..TETTITISWRTK......TETITGFQVDA...IP -FINC_BOVIN/1782-1861 I.DAP.SNLRFLAT..TPNSLLVSWQPP......RARITGYIIKY...EK -FINC_CHICK/551-630 I.DRP.KGLTFTEV..DVDSIKIAWESP......QGQVTRYRVTY...SS -FINC_RAT/1266-1346 V.PQL.TDLSFVDI..TDSSIGLRWTPLN.....SSTIIGYRITV...VA -GUNB_CELFI/651-733 P.TTP.GTPVATGV..TTVGASLSWAASTd...AGSGVAGYEL.YR...V -IL7R_HUMAN/129-221 P.EAP.FDLSVIYRe.GANDFVVTFNTSHlq.kKYVKVLMHDVAYR..QE -ITB4_HUMAN/1127-1208 L.GAP.QNPNAKAA..GSRKIHFNWLPP......SGKPMGYRVKY...WI -ITB4_HUMAN/1220-1310 P.SEP.GRLAFNVV..SSTVTQLSWAEPAe...TNGEITAYEVCY...GL -ITB4_HUMAN/1581-1665 P.DTP.TRLVFSAL..GPTSLRVSWQEPR....CERPLQGYSVEY...QL -ITB4_HUMAN/1694-1781 P.SAP.GPLVFTAL..SPDSLQLSWERPRr...PNGDIVGYLVTC...EM -KALM_CHICK/178-271 P.LKPrKELKFIEL..QSGDLEVKWSSKFn...ISIEPVIYVVQRR..WN -KALM_CHICK/544-642 L.AKP.ENLSASFIv.QEGNITGHFSWKIskavLHQPMTGFQVTW...AE -KMLC_CHICK/60-145 P.DPPaGTPCASDI..RSSSLTLSWYGSSy..dGGSAVQSYTVEI...WN -LAR_DROME/322-404 P.TAP.TDVQISEV..TATSVRLEWSYK.....GPEDLQYYVIQY...KP -LAR_DROME/417-503 E.SAP.RNVQVRTL..SSSTMVITWEPPEt...PNGQVTGYKV.Y...YT -LAR_DROME/515-598 P.SQP.SNFRATDI..GETAVTLQWTKPTh...SSENIVHYELYW...ND -LAR_DROME/709-800 P.GDP.QDVKATPL..NSTSIHVSWKPPLek.dRNGIIRGYHIHA...QE -LAR_DROME/909-995 PgGPP.SNITIRFQ..TPDVLCVTWDPPTre.hRNGIITRYDVQFH..KK -MPSF_CHICK/371-457 P.GAP.MDVKCHDA..NRDYVIVTWKPPNt..tSQNPVIGYFVDK...CE -MPSF_CHICK/499-585 P.GPP.TNVHASEI..SKTYVVLSWDPPVp...RGREPLTYFIEK...SM -MPSF_CHICK/600-684 P.SAP.GRVVATRN..TKTSVVVQWDKPK....HEENLYGYYIDY...SV -MPSF_CHICK/699-785 P.SYP.HGITLLNC..DGHSMTLGWKAPKy..sGGSPILGYYIDKR...E -MPSF_CHICK/801-887 P.GPA.YDLTVCEV..RNTSLVLLWKAPVy..eGKSPITGYLVDY...KE -NCA1_BOVIN/509-597 P.SSP.SIDQVEP...YSSTAQVQFDEPEa..tGGVPILKYKAEWR...A -NCA1_BOVIN/610-691 P.SAP.KLEGQMGE..DGNSIKVKLIKQDd...GGSPIRHYLVKYR...A -NGCA_CHICK/700-794 PeRNP.GGVHGEGN..ETGNLVITWEPLPpq.aWNAPWARYRVQWR...P -NRCA_CHICK/623-709 P.NPP.LDLELTGQ..LERSIELSWVPGEe...NNSPITNFVIEY...ED -NRCA_CHICK/726-810 ....P.SNVQGIGS..EPDNLVITWESLKgf.qSNGPGLQYKVSWR..QK -NRCA_CHICK/928-1014 P.SPP.SFLKITNP..TLDSLTLEWGSPTh...PNGVLTSYILKF...QP -NRG_DROME/717-799 ....P.DNVVGQGT..EPNNLVISWTPMPei.eHNAPNFHYYVSW...K. -NRG_DROME/815-905 PlDAP.TNFTMRQIt.SSTSGYMAWTPVSee.sVRGHFKGYKIQT...WT -NRG_DROME/917-1007 P.SPV.QGLDAYPL..GSSAFMLHWKKPLy...PNGKLTGYKIYY...EE -PHB_ALCFA/344-418 G.SAP.TGLAVTAT..TSTSVSLSWNAV.......ANASSYGV.YR.... -PTP1_DROME/123-205 P.DPP.SNLSVQVR..SGKNAIILWSPPT.....QGSYTAFKIKV...LG -PTP1_DROME/217-301 P.NTP.GKFIVWFR..NETTLLVLWQPPY....PAGIYTHYKVSI...EP -PTP1_DROME/312-394 P.LRP.LNVTFDRDfiTSNSFRVLWEAPK....GISEFDKYQVSV...AT -PTP1_DROME/405-485 P.LPV.RNLRSINDd.KTNTMIITWEADP.....ASTQDEYRIVYHe.LE -PTP1_DROME/583-661 P.NPP.RNMTIETV..RSNSVLVHWSPPE.....SGEFTEYSIRYR...T -PTP1_DROME/864-944 P.EPI.TQLHATNI..TDTEISLRWDLP......KGEYNDFDIAY...LT -PTP1_DROME/958-1044 P.GRV.ERFHPTDV..QPSEINFEWSLPSs..eANGVIRQFSIAY...TN -PTP6_DROME/236-321 V.PQV.SIDFAKAV..GANKIYLNWTVND....GNDPIQKFFITL...QE -PTP6_DROME/332-425 Y.DPI.FIPKVETTgsTASTITIGWNPPPp..dLIDYIQYYELIV...SE -PTP9_DROME/171-259 P.SKP.QNLTILDV..SANSITMSWHPPKn...QNGAIAGYHVFH...IH -PTPB_HUMAN/22-103 AePER.CNFTLAESkaSSHSVSIQWRIL.......GSPCNFSLIY...SS -PTPB_HUMAN/112-192 P.PAR.FGVSKEKT..TSTGLHVWWTPS......SGKVTSYEVQL...FD -PTPB_HUMAN/467-543 P.LAV.LQLRVKHA..NETSLSIMWQTP......VAEWEKYIISL...AD -PTPB_HUMAN/554-632 P.AQV.TDLHVANQg.MTSSLFTNWTQA......QGDVEFYQVLL...IH -PTPB_HUMAN/643-725 P.SSV.SGVTVNNSg.RNDYLSVSWLVA......PGDVDNYEVTL...SH -PTPB_HUMAN/731-808 P.DKV.QGVSVSNSa.RSDYLRVSWVHA......TGDFDHYEVTI...KN -PTPB_HUMAN/907-984 P.SAV.KNIHISPNg.ATDSLTVNWTPG......GGDVDSYTVSA...FR -PTPB_HUMAN/995-1074 P.ASV.QGVIADNAy.SSYSLIVSWQKA......AGVAERYDILL...LT -PTPB_HUMAN/1085-1162 P.AAV.TDLRITEN..STRHLSFRWTAS......EGELSWYNIFL...YN -PTPB_HUMAN/1173-1250 P.ASV.SHLRGSNRn.TTDSLWFNWSPA......SGDFDFYELIL...YN -PTPB_HUMAN/1261-1344 P.SPP.SLMSFADI..ANTSLAITWKGPP....DWTDYNDFELQW...LP -PTPB_HUMAN/1355-1434 P.DKI.QNLHCRPQ..NSTAIACSWIPP......DSDFDGYSIECR...K -PTPK_MOUSE/290-376 P.PRPiAPPQLLGV..GPTYLLIQLNANSi..iGDGPIILKEVEYR...M -PTPZ_HUMAN/312-401 S.SEP.ENVQADPE..NYTSLLVTWERPRv..vYDTMIEKFAVLY...QQ -SEK_MOUSE/441-525 P.SSI.ALVQAKEV..TRYSVALAWLEPDr...PNGVILEYEVKY...YE -TENA_CHICK/593-671 V.SPP.TELTVTNV..TDKTVNLEWKHE.......NLVNEYLVTY...VP -TENA_CHICK/682-767 L.PAP.EGLKFKSV..RETSVQVEWDPL......SISFDGWELVFRnmQK -TENA_CHICK/774-853 L.DAP.SQIEAKDV..TDTTALITWSKP......LAEIEGIELTY...GP -TENA_CHICK/864-945 L.DAP.RNLKRVSQ..TDNSITLEWKNS......HANIDNYRIKF...AP -TENA_CHICK/956-1033 L.DNP.KDLEVSDP..TETTLSLRWRRP......VAKFDRYRLTY...VS -TENA_CHICK/1045-1124 E.PEL.GNLSVSET..GWDGFQLTWTAA......DGAYENFVIQV...QQ -TENA_CHICK/1136-1215 H.PEV.GELTVSDI..TPESFNLSWTTT......NGDFDAFTIEI...ID -TENA_CHICK/1227-1306 E.PEV.DNLLVSDA..TPDGFRLSWTAD......DGVFDSFVLKIR..DT -TENA_CHICK/1317-1395 V.GSP.KGISFSDI..TENSATVSWTPP......RSRVDSYRVSY...VP -TENA_CHICK/1406-1483 L.DSP.SGLVVMNI..TDSEALATWQPA......IAAVDNYIVSY...SS -TENA_CHICK/1494-1571 L.DAP.KDLSATEV..QSETAVITWRPP......RAPVTDYLLTY...ES -TENA_HUMAN/1254-1334 E.VPDmGNLTVTEV..SWDALRLNWTTP......DGTYDQFTIQV...QE -TENA_HUMAN/1528-1607 L.PLL.ENLTISDI..NPYGFTVSWMAS......ENAFDSFLVTV...VD -TIE1_HUMAN/446-533 P.PVPlAAPRLLTK..QSRQLVVSPLVSFs...GDGPISTVRLHYR..PQ -TIE1_HUMAN/545-632 PlLQP.WLEGWHVE..GTDRLRVSWSLPLv..pGPLVGDGFLLRL...WD -TIE1_HUMAN/644-729 P.PAP.RHLHAQAL..SDSEIQLTWKHPEa...LPGPISKYVVEV...QV -TIE2_HUMAN/444-529 L.PKPlNAPNVIDT..GHNFAVINISSEPy..fGDGPIKSKKLLY...KP -TIE2_HUMAN/543-626 L.PPP.RGLNLLPK..SQTTLNLTWQPIFp...SSEDDFYVEVERR...S -TIE2_HUMAN/639-724 P.PQP.ENIKISNI..THSSAVISWTILD.....GYSISSITIRY...KV -UFO_HUMAN/327-411 L.GPP.ENISATR...NGSQAFVHWQEPRa..pLQGTLLGYRLAY...QG - -7LES_DROME/1799-1891 ELDGEnvqd..rrewEAHER...RLET....AG..THRLTGIKPGSGYSL -7LES_DROVI/1917-1997 LREQ............LQFN...VAGN....HT..QMRLAPLQPKTRYSC -APU_THETY/928-1009 TVKGG..........LYEKI...ASNV....TQi.TYTDTEVTNGLKYVY -APU_THETY/1165-1248 SETGPf.........IKIAT...VSDS....VY..NYVDTDVVNGNVYYY -AXO1_CHICK/602-692 LLSNKwkq.....mrTNPVN...IEGN....AE..TAQVVNLIPWMDYEF -AXO1_CHICK/807-896 DGDKEea.......aDRVRT...AGLV....T...SAHVTGLNPNTKYHV -CAML_HUMAN/812-907 EGSQRkhsk..rhihKDHVV...VPAN....TT..SVILSGLRPYSSYHL -CHI1_BACCI/465-542 .GAN............LATS...VTGT....T....ATISGLTAGTSYTF -CHIT_STRLI/142-219 .DGA............KVAT...VTGT....T....YTDNGLTKGTAYSY -CHIX_STROI/169-240 .DGV............KVAT...ASGT....S....ATVTGLTPDTAYAF -CONT_CHICK/799-884 AHDKEa........aAQRVQ...VSNQ....EY..STKLENLKPNTRYHI -CPSF_CHICK/630-716 KGSMRw........mKLNFE...VFPD....T...TYESTKMIEGVFYEM -CPSF_CHICK/923-1008 TRTME..........WFTVL...EHSR....PT..RCTVSELVMGNEYRF -ECK_HUMAN/329-420 E.SGEcgp....ceaSVRYS...EPPHgl.tRT..SVTVSDLEPHMNYTF -ECK_HUMAN/436-519 KGDS............NSYN...VRRT....EGf.SVTLDDLAPDTTYLV -EPH1_HUMAN/333-435 QGTAQdggpcqpcgvGVHFSpgaRGLT....TP..AVHVNGLEPYANYTF -EPH3_CHICK/333-429 CGSGRgact...rcgDNVQF...APRQlgltEP..RIYISDLLAHTQYTF -EPH3_CHICK/444-528 KNLSE..........LNSTA...VKSP....TN..TVTVQNLKAGTIYVF -ETK1_CHICK/325-421 CGGSSkice...pcsDNVRF...LPRQtg.lTNt.TVTVVDLLAHTNYTF -FAS2_SCHAM/530-616 DSQGW..........EDALN...RTWP....VDs.PYILENLKPQTRYNF -FAS2_SCHAM/642-735 VEKYDtewrl.lpypCQEHK...LEGQ....AT..TFQLESLQPDTHYKV -FINC_BOVIN/577-660 KNSPDr.........WKEAT...IPGH....LN..SYTIKGLRPGVVYEG -FINC_BOVIN/689-768 SEEGDe.........PQYLD...LPST....AT..SVNIPDLLPGRKYTV -FINC_BOVIN/780-858 SVEGS..........STELN...LPET....AN..SVTLSDLQPGVQYNI -FINC_BOVIN/875-955 VNLPGe........hGQRLP...VSRN....T...FAEVTGLSPGVTYHF -FINC_BOVIN/1142-1225 TNGQQg........ySLEEV...VHAD....QS..SCTFENLSPGLEYNV -FINC_BOVIN/1236-1316 VKNEEd.........VAELS...ISPS....DN..AVVLTNLLPGTEYLV -FINC_BOVIN/1327-1406 ENMGGr.........PREDR...VPPS....RN..SITLTNLNPGTEYVV -FINC_BOVIN/1417-1499 TGGSSp.........VQEFT...VPGS....KS..TATISGLKPGVDYTI -FINC_BOVIN/1511-1590 KNGPGp.........SKTKT...VGPD....QT..EMTIEGLQPTVEYVV -FINC_BOVIN/1601-1680 KEKTGp.........MKEIN...LAPD....SS..SVVVSGLMVATKYEV -FINC_BOVIN/1693-1771 ANGQT..........PIQRT...IRPD....VR..SYTITGLQPGTDYKI -FINC_BOVIN/1782-1861 PGSPPr........eVVPRP...RPGV....T...EATITGLEPGTEYTI -FINC_CHICK/551-630 PEDG............IHEL...LPAPgg.eED..TAELHGLRPGSEYTI -FINC_RAT/1266-1346 AGEGIp.........IFEDF...VDSS....VG..YYTVTGLEPGIDYDI -GUNB_CELFI/651-733 QGTTQ..........TLVGT...TTAA....A....YILRDLTPGTAYSY -IL7R_HUMAN/129-221 KDENK..........WTHVN...LSST....KL..TLLQRKLQPAAMYEI -ITB4_HUMAN/1127-1208 QGDSEs.........EAHLL...DSKV....P...SVELTNLYPYCDYEM -ITB4_HUMAN/1220-1310 VNDDNrpi.....gpMKKVL...VDNP....KNr.MLLIENLRESQPYRY -ITB4_HUMAN/1581-1665 LNGGE..........LHRLN...IPNP....AQt.SVVVEDLLPNHSYVF -ITB4_HUMAN/1694-1781 AQGGGpa.......tAFRVD...GDSP....ES..RLTVPGLSENVPYKF -KALM_CHICK/178-271 QGIHPsed.....daTNWQT...VAQT....TDe.RVQLSDIRASRWYQF -KALM_CHICK/544-642 VTTESrqnslpnsiiSQSQI...LPAD....HY..VLTVPNLRPSMLYRL -KMLC_CHICK/60-145 SVDNK..........WTDLT...TCRS....T...SFNVQDLQADREYKF -LAR_DROME/322-404 KNANQ..........AFSEI...SGII....TM..YYVVRALSPYTEYEF -LAR_DROME/417-503 TNSNQpe......asWNSQM...VDNS....E...LTTVSDVTPHAIYTV -LAR_DROME/515-598 TYANQ..........AHHKR...ISNS....E...AYTLDGLYPDTLYYI -LAR_DROME/709-800 LRDEGkgf....lnePFKFD...VVDT....L...EFNVTGLQPDTKYSI -LAR_DROME/909-995 IDHGL..........GSERN...MTLR....K....AVFTNLEENTEYIF -MPSF_CHICK/371-457 VGLEN..........WVQCN...DAPV....KIc.KYPVTGLYEGRSYIF -MPSF_CHICK/499-585 VGSGS..........WQRVNaqvAVKS....P...RYAVFDLAEGKPYVF -MPSF_CHICK/600-684 VGSNQwe.......pANHKP...INYN....R....FVVHGLETGEQYIF -MPSF_CHICK/699-785 ANHKN..........WHEVNssvISRT....I....YTVEDLTEDAFYEF -MPSF_CHICK/801-887 VDTED..........WITAN...EKPT....SHr.YFKVTDLHQGHTYVF -NCA1_BOVIN/509-597 MGEEVw........hSKWYD...AKEA....SMegIVTIVGLKPETTYAV -NCA1_BOVIN/610-691 LSSEW..........KPEIR...LPSG....SD..HVMLKSLDWNAEYEV -NGCA_CHICK/700-794 LEEPGgggps.ggfpWAEST...VDAP....P....VVVGGLPPFSPFQI -NRCA_CHICK/623-709 GLHEPg........vWHYQT...EVPG....SH..TTVQLKLSPYVNYSF -NRCA_CHICK/726-810 DVDDE..........WTSVV...VANV....S...KYIVSGTPTFVPYEI -NRCA_CHICK/928-1014 INNTHel......gpLVEIR...IPAN....ES..SLILKNLNYSTRYKF -NRG_DROME/717-799 .RDIPaa......awENNNI...FDWR....QN..NIVIADQPTFVKYLI -NRG_DROME/815-905 ENEGEe........gLREIH...VKGD....TH..NALVTQFKPDSKNYA -NRG_DROME/917-1007 V.KESyvge..rreyDPHIT...DPRV....T...RMKMAGLKPNSKYRI -PHB_ALCFA/344-418 .NGS............KVGS...ATAT....A....YTDSGLIAGTTYSY -PTP1_DROME/123-205 LSEASss.......yNRTFQ...VNDN....TF..QHSVKELTPGATYQV -PTP1_DROME/217-301 PDANDsvl.....yvEKEGE...PPGP....A...QAAFKGLVPGRAYNI -PTP1_DROME/312-394 TRRQS..........TVPRS...NEPV....AF..SDFRDIAEPGKTFNV -PTP1_DROME/405-485 TFNGD..........TSTLT...TDRT....R....FTLESLLPGRNYSL -PTP1_DROME/583-661 DSEQQ..........WVRLP...SVRS....T...EADITDMTKGEKYTI -PTP1_DROME/864-944 A.DNL..........LAQNM...TTRN....E....ITISDLRPHRNYTF -PTP1_DROME/958-1044 INNLT..........DAGMQ...DFES....EEa.FGVIKNLKPGETYVF -PTP6_DROME/236-321 AGTPTft.......yHKDFI...NGSH....T...SYILDHFKPNTTYFL -PTP6_DROME/332-425 SGEVPkvi.....eeAIYQQ...NSRN....L...PYMFDKLKTATDYEF -PTP9_DROME/171-259 DNQTGve......ivKNSRN...SVET....LI..HFELQNLRPYTDYRV -PTPB_HUMAN/22-103 DTLGAa........lCPTFR...IDNT....TY..GCNLQDLQAGTIYNF -PTPB_HUMAN/112-192 ENNQKiq......gvQIQES...TSWN....E....YTFFNLTAGSKYNI -PTPB_HUMAN/467-543 R.DLL..........LIHKS...LSKD....AK..EFTFTDLVPGRKYMA -PTPB_HUMAN/554-632 ENVV...........IKNES...ISSE....TS..RYSFHSLKSGSLYSV -PTPB_HUMAN/643-725 DGKV...........VQSLV...IAKS....VR..ECSFSSLTPGRLYTV -PTPB_HUMAN/731-808 KNNF...........IQTKS...IPKS....EN..ECVFVQLVPGRLYSV -PTPB_HUMAN/907-984 H.SQK..........VDSQT...IPKH....VF..EHTFHRLEAGEQYQI -PTPB_HUMAN/995-1074 ENGIL..........LRNTS...EPAT....TK..QHKFEDLTPGKKYKI -PTPB_HUMAN/1085-1162 PDGNLq.........ERAQV...DPLV....Q...SFSFQNLLQGRMYKM -PTPB_HUMAN/1173-1250 PNGTKk.........ENWKD...KDLT....E....WRFQGLVPGRKYVL -PTPB_HUMAN/1261-1344 RDALTv.........FNPYN...NRKS....E...GRIVYGLRPGRSYQF -PTPB_HUMAN/1355-1434 MDTQEv.........EFSRK...LEKE....KS..LLNIMMLVPHKRYLV -PTPK_MOUSE/290-376 T.SGS..........WTETH...AVNA....P...TYKLWHLDPDTEYEI -PTPZ_HUMAN/312-401 LDGEDq........tKHEFL...TDGY....QDl.GAILNNLLPNMSYVL -SEK_MOUSE/441-525 KDQN...........ERSYR...IVRT....AAr.NTDIKGLNPLTSYVF -TENA_CHICK/593-671 TSSGGl.........DLQFT...VPGN....QT..SATIHELEPGVEYFI -TENA_CHICK/682-767 KDDNG..........DITSS...LKRP....ET..SYMQPGLAPGQQYNV -TENA_CHICK/774-853 KDVPGd.........RTTID...LSED....EN..QYSIGNLRPHTEYEV -TENA_CHICK/864-945 ISGGD..........HTELT...VPKGnq.aTT..RATLTGLRPGTEYGI -TENA_CHICK/956-1033 P.SGK..........KNEME...IPVD....ST..SFILRGLDAGTEYTI -TENA_CHICK/1045-1124 SDNPEe.........TWNIT...VPGG....QH..SVNVTGLKANTPYNV -TENA_CHICK/1136-1215 SNRLLe.........PMEFN...ISGN....SR..TAHISGLSPSTDFIV -TENA_CHICK/1227-1306 KRKSD..........PLELI...VPGH....ER..THDITGLKEGTEYEI -TENA_CHICK/1317-1395 ITGGT..........PNVVT...VDGS....KT..RTKLVKLVPGVDYNV -TENA_CHICK/1406-1483 EDEP...........EVTQM...VSGN....TV..EYDLNGLRPATEYTL -TENA_CHICK/1494-1571 I.DGR..........VKEVI...LDPE....TT..SYTLTELSPSTQYTV -TENA_HUMAN/1254-1334 ADQVEe.........AHNLT...VPGS....LR..SMEIPGLRAGTPYTV -TENA_HUMAN/1528-1607 SGKLLd.........PQEFT...LSGT....QR..KLELRGLITGIGYEV -TIE1_HUMAN/446-533 DSTMD..........WSTIV...VDPS....E...NVTLMNLRPKTGYSV -TIE1_HUMAN/545-632 GTRGQ..........ERREN...VSSP....QAr.TALLTGLTPGTHYQL -TIE1_HUMAN/644-729 AGGAGd.........PLWID...VDRP....EEt.STIIRGLNASTRYLF -TIE2_HUMAN/444-529 VNHYEa.........WQHIQ...VTNE....I....VTLNYLEPRTEYEL -TIE2_HUMAN/543-626 VQKSD..........QQNIK...VPGN....LT..SVLLNNLHPREQYVV -TIE2_HUMAN/639-724 QGKNE..........DQHVDv.kIKNA....TIi.QYQLKGLEPETAYQV -UFO_HUMAN/327-411 QDTPE..........VLMDI...GLRQ....EV..TLELQGDGSVSNLTV - -7LES_DROME/1799-1891 WVQ.AHATPTk....SNSS -7LES_DROVI/1917-1997 RLA.LAYAATp....GAPI -APU_THETY/928-1009 AVT.AVDNDGn...eSALS -APU_THETY/1165-1248 KVV.AVDTSYn....RTAS -AXO1_CHICK/602-692 RVL.ASNILGv....GEPS -AXO1_CHICK/807-896 SVR.AYNRAGa....GPPS -CAML_HUMAN/812-907 EVQ.AFNGRGs....GPAS -CHI1_BACCI/465-542 TIK.AKDAAGn...lSAAS -CHIT_STRLI/142-219 SVK.ARDTADq...tGPAS -CHIX_STROI/169-240 QVA.AVNGA.......GES -CONT_CHICK/799-884 DVS.AFNSAGy....GPPS -CPSF_CHICK/630-716 RVF.AVNAIGv....SQPS -CPSF_CHICK/923-1008 RVY.SENVCGt....SQEP -ECK_HUMAN/329-420 TVE.ARNGV........SG -ECK_HUMAN/436-519 QVQ.ALTQEGq....GAGS -EPH1_HUMAN/333-435 NVE.AQNGVSglgssGHAS -EPH3_CHICK/333-429 EIQ.AVNGVTd...qSPFS -EPH3_CHICK/444-528 QVR.ARTVAGy....GRYS -ETK1_CHICK/325-421 EID.AVNGVSd...lSTLS -FAS2_SCHAM/530-616 RFA.AQNEVGf....GPWS -FAS2_SCHAM/642-735 EVR.ATNAIGn....SVPG -FINC_BOVIN/577-660 QLI.SVQHY......GQRE -FINC_BOVIN/689-768 NVY.EISEE.......GEQ -FINC_BOVIN/780-858 TIY.AVEEN.......QES -FINC_BOVIN/875-955 KVF.AVNQG.......RES -FINC_BOVIN/1142-1225 SVY.TVKDD.......KES -FINC_BOVIN/1236-1316 SVS.SVYEQ.......HES -FINC_BOVIN/1327-1406 SIV.ALNSK.......EES -FINC_BOVIN/1417-1499 TVY.AVTGRGd....SPAS -FINC_BOVIN/1511-1590 SVY.AQNQN.......GES -FINC_BOVIN/1601-1680 SVY.ALKDT.......LTS -FINC_BOVIN/1693-1771 HLY.TLNDN.......ARS -FINC_BOVIN/1782-1861 QVI.ALKNN.......QKS -FINC_CHICK/551-630 NIV.AIYDD.......MES -FINC_RAT/1266-1346 SVI.TLING.......GES -GUNB_CELFI/651-733 VVK.AKDVAGn...vSAAS -IL7R_HUMAN/129-221 KVR.SIPDHYfkgfwSEWS -ITB4_HUMAN/1127-1208 KVC.AYGAQGe....GPYS -ITB4_HUMAN/1220-1310 TVK.ARNGAGw....GPER -ITB4_HUMAN/1581-1665 RVR.AQSQEGw....GRER -ITB4_HUMAN/1694-1781 KVQ.ARTTEGf....GPER -KALM_CHICK/178-271 RVA.AVNVHGt...rGFTA -KALM_CHICK/544-642 EVQ.VLTTGGe....GPAT -KMLC_CHICK/60-145 RVR.AANVYGi....SEPS -LAR_DROME/322-404 YVI.AVNNIGr....GPPS -LAR_DROME/417-503 RVQ.AYTSMGa....GPMS -LAR_DROME/515-598 WLA.ARSQRGe....GATT -LAR_DROME/709-800 QVA.ALTRKGd....GDRS -LAR_DROME/909-995 RVR.AYTKQGa....GPFS -MPSF_CHICK/371-457 RVR.AVNSAGi....SRPS -MPSF_CHICK/499-585 RVL.SANKHGi....SDPS -MPSF_CHICK/600-684 RVK.AVNAVGf....SENS -MPSF_CHICK/699-785 KIA.AANVVGi....GHPS -MPSF_CHICK/801-887 KVR.AVNDAGv....GKSS -NCA1_BOVIN/509-597 RLA.ALNGKGl....GEIS -NCA1_BOVIN/610-691 YVV.AENQQ.......GKS -NGCA_CHICK/700-794 RVQ.AVNGAGk....GPEA -NRCA_CHICK/623-709 RVI.AVNEIGr....SQPS -NRCA_CHICK/726-810 KVQ.ALNDLGy...aPEPS -NRCA_CHICK/928-1014 YFN.AQTSV......GSGS -NRG_DROME/717-799 KVV.AINDR.......GES -NRG_DROME/815-905 RIL.AYNGRFn....GPPS -NRG_DROME/917-1007 SIT.ATTKMGe....GSEH -PHB_ALCFA/344-418 TVT.AVDPTAg...eSQPS -PTP1_DROME/123-205 QAY.TIYDG.......KES -PTP1_DROME/217-301 SVQ.TMSED.......EIS -PTP1_DROME/312-394 IVK.TVSGK.......VTS -PTP1_DROME/405-485 SVQ.AVSKK.......MES -PTP1_DROME/583-661 QVN.TVSFG.......VES -PTP1_DROME/864-944 TVV.VRSGTEss..vLRSS -PTP1_DROME/958-1044 KIQ.AKTAIGf....GPER -PTP6_DROME/236-321 RIV.GKNSIGn....GQPT -PTP6_DROME/332-425 RVR.ACSDLTkt..cGPWS -PTP9_DROME/171-259 IVK.AFTTKNe....GEPS -PTPB_HUMAN/22-103 KII.SLDEE........RT -PTPB_HUMAN/112-192 AIT.AVSGG.......KRS -PTPB_HUMAN/467-543 TVT.SISGD........LK -PTPB_HUMAN/554-632 VVT.TVSGG.......ISS -PTPB_HUMAN/643-725 TIT.TRSGKYe...nHSFS -PTPB_HUMAN/731-808 TVT.TKSGQ........YE -PTPB_HUMAN/907-984 MIA.SVSGS........LK -PTPB_HUMAN/995-1074 QIL.TVSGG.......LFS -PTPB_HUMAN/1085-1162 VIV.THSGE........LS -PTPB_HUMAN/1173-1250 WVV.THSGD........LS -PTPB_HUMAN/1261-1344 NVK.TVSGDSw....KTYS -PTPB_HUMAN/1355-1434 SIK.VQSAG.......MTS -PTPK_MOUSE/290-376 RVLlTRPGEGg...tGLPG -PTPZ_HUMAN/312-401 QIV.AICTNGl...yGKYS -SEK_MOUSE/441-525 HVR.ARTAAGy....GDFS -TENA_CHICK/593-671 RVF.AILKN.......KKS -TENA_CHICK/682-767 SLH.IVKNNTr...gPGLS -TENA_CHICK/774-853 TLI.SRRGD.......MES -TENA_CHICK/864-945 GVT.AVRQD.......RES -TENA_CHICK/956-1033 SLV.AEKGR.......HKS -TENA_CHICK/1045-1124 TLY.GVIRG.......YRT -TENA_CHICK/1136-1215 YLY.GISHG.......FRT -TENA_CHICK/1227-1306 ELY.GVSSG.......RRS -TENA_CHICK/1317-1395 NII.SVKGF.......EES -TENA_CHICK/1406-1483 RVH.AVKDA.......QKS -TENA_CHICK/1494-1571 KLQ.ALSRS.......MRS -TENA_HUMAN/1254-1334 TLH.GEVRG.......HST -TENA_HUMAN/1528-1607 MVS.GFTQG.......HQT -TIE1_HUMAN/446-533 RVQlSRPGEGg...eGAWG -TIE1_HUMAN/545-632 DVQ.LYHCTLl....GPAS -TIE1_HUMAN/644-729 RMR.ASI.QGl....GDWS -TIE2_HUMAN/444-529 CVQ.LVRRGEg....GEGH -TIE2_HUMAN/543-626 RAR..VNTKAq....GEWS -TIE2_HUMAN/639-724 DIF.AENNIGs....SNPA -UFO_HUMAN/327-411 CVA.AYTAAGd....GPWS - diff --git a/forester/archive/RIO/others/hmmer/tutorial/globins50.msf b/forester/archive/RIO/others/hmmer/tutorial/globins50.msf deleted file mode 100644 index 2f04100..0000000 --- a/forester/archive/RIO/others/hmmer/tutorial/globins50.msf +++ /dev/null @@ -1,427 +0,0 @@ -!!AA_MULTIPLE_ALIGNMENT 1.0 -PileUp of: *.pep - - Symbol comparison table: GenRunData:blosum62.cmp CompCheck: 6430 - - GapWeight: 12 - GapLengthWeight: 4 - - pileup.msf MSF: 308 Type: P August 16, 1999 09:09 Check: 9858 .. - - Name: lgb1_pea Len: 308 Check: 2200 Weight: 1.00 - Name: lgb1_vicfa Len: 308 Check: 214 Weight: 1.00 - Name: myg_escgi Len: 308 Check: 3961 Weight: 1.00 - Name: myg_horse Len: 308 Check: 5619 Weight: 1.00 - Name: myg_progu Len: 308 Check: 6401 Weight: 1.00 - Name: myg_saisc Len: 308 Check: 6606 Weight: 1.00 - Name: myg_lycpi Len: 308 Check: 6090 Weight: 1.00 - Name: myg_mouse Len: 308 Check: 6613 Weight: 1.00 - Name: myg_musan Len: 308 Check: 3942 Weight: 1.00 - Name: hba_ailme Len: 308 Check: 4558 Weight: 1.00 - Name: hba_prolo Len: 308 Check: 5054 Weight: 1.00 - Name: hba_pagla Len: 308 Check: 5383 Weight: 1.00 - Name: hba_macfa Len: 308 Check: 5135 Weight: 1.00 - Name: hba_macsi Len: 308 Check: 5198 Weight: 1.00 - Name: hba_ponpy Len: 308 Check: 5050 Weight: 1.00 - Name: hba2_galcr Len: 308 Check: 5609 Weight: 1.00 - Name: hba_mesau Len: 308 Check: 4702 Weight: 1.00 - Name: hba2_bosmu Len: 308 Check: 4241 Weight: 1.00 - Name: hba_erieu Len: 308 Check: 4680 Weight: 1.00 - Name: hba_frapo Len: 308 Check: 3549 Weight: 1.00 - Name: hba_phaco Len: 308 Check: 4440 Weight: 1.00 - Name: hba_trioc Len: 308 Check: 5465 Weight: 1.00 - Name: hba_ansse Len: 308 Check: 3300 Weight: 1.00 - Name: hba_colli Len: 308 Check: 3816 Weight: 1.00 - Name: hbad_chlme Len: 308 Check: 4571 Weight: 1.00 - Name: hbad_pasmo Len: 308 Check: 6777 Weight: 1.00 - Name: hbaz_horse Len: 308 Check: 7187 Weight: 1.00 - Name: hba4_salir Len: 308 Check: 7329 Weight: 1.00 - Name: hbb_ornan Len: 308 Check: 2667 Weight: 1.00 - Name: hbb_tacac Len: 308 Check: 4356 Weight: 1.00 - Name: hbe_ponpy Len: 308 Check: 3827 Weight: 1.00 - Name: hbb_speci Len: 308 Check: 1556 Weight: 1.00 - Name: hbb_speto Len: 308 Check: 2051 Weight: 1.00 - Name: hbb_equhe Len: 308 Check: 3414 Weight: 1.00 - Name: hbb_sunmu Len: 308 Check: 2927 Weight: 1.00 - Name: hbb_calar Len: 308 Check: 3836 Weight: 1.00 - Name: hbb_mansp Len: 308 Check: 4322 Weight: 1.00 - Name: hbb_ursma Len: 308 Check: 4428 Weight: 1.00 - Name: hbb_rabit Len: 308 Check: 4190 Weight: 1.00 - Name: hbb_tupgl Len: 308 Check: 4185 Weight: 1.00 - Name: hbb_triin Len: 308 Check: 1163 Weight: 1.00 - Name: hbb_colli Len: 308 Check: 3958 Weight: 1.00 - Name: hbb_larri Len: 308 Check: 3517 Weight: 1.00 - Name: hbb1_varex Len: 308 Check: 6009 Weight: 1.00 - Name: hbb2_xentr Len: 308 Check: 7617 Weight: 1.00 - Name: hbbl_ranca Len: 308 Check: 5606 Weight: 1.00 - Name: hbb2_tricr Len: 308 Check: 8767 Weight: 1.00 - Name: glb2_mormr Len: 308 Check: 6103 Weight: 1.00 - Name: glbz_chith Len: 308 Check: 8634 Weight: 1.00 - Name: hbf1_ureca Len: 308 Check: 9035 Weight: 1.00 - -// - - 1 50 - lgb1_pea ~~~~~~~~~G FTDKQEALVN SSSE.FKQNL PGYSILFYTI VLEKAPAAKG -lgb1_vicfa ~~~~~~~~~G FTEKQEALVN SSSQLFKQNP SNYSVLFYTI ILQKAPTAKA - myg_escgi ~~~~~~~~~V LSDAEWQLVL NIWAKVEADV AGHGQDILIR LFKGHPETLE - myg_horse ~~~~~~~~~G LSDGEWQQVL NVWGKVEADI AGHGQEVLIR LFTGHPETLE - myg_progu ~~~~~~~~~G LSDGEWQLVL NVWGKVEGDL SGHGQEVLIR LFKGHPETLE - myg_saisc ~~~~~~~~~G LSDGEWQLVL NIWGKVEADI PSHGQEVLIS LFKGHPETLE - myg_lycpi ~~~~~~~~~G LSDGEWQIVL NIWGKVETDL AGHGQEVLIR LFKNHPETLD - myg_mouse ~~~~~~~~~G LSDGEWQLVL NVWGKVEADL AGHGQEVLIG LFKTHPETLD - myg_musan ~~~~~~~~~~ ~~~VDWEKVN SVWSAVESDL TAIGQNILLR LFEQYPESQN - hba_ailme ~~~~~~~~~V LSPADKTNVK ATWDKIGGHA GEYGGEALER TFASFPTTKT - hba_prolo ~~~~~~~~~V LSPADKANIK ATWDKIGGHA GEYGGEALER TFASFPTTKT - hba_pagla ~~~~~~~~~V LSSADKNNIK ATWDKIGSHA GEYGAEALER TFISFPTTKT - hba_macfa ~~~~~~~~~V LSPADKTNVK AAWGKVGGHA GEYGAEALER MFLSFPTTKT - hba_macsi ~~~~~~~~~V LSPADKTNVK DAWGKVGGHA GEYGAEALER MFLSFPTTKT - hba_ponpy ~~~~~~~~~V LSPADKTNVK TAWGKVGAHA GDYGAEALER MFLSFPTTKT -hba2_galcr ~~~~~~~~~V LSPTDKSNVK AAWEKVGAHA GDYGAEALER MFLSFPTTKT - hba_mesau ~~~~~~~~~V LSAKDKTNIS EAWGKIGGHA GEYGAEALER MFFVYPTTKT -hba2_bosmu ~~~~~~~~~V LSAADKGNVK AAWGKVGGHA AEYGAEALER MFLSFPTTKT - hba_erieu ~~~~~~~~~V LSATDKANVK TFWGKLGGHG GEYGGEALDR MFQAHPTTKT - hba_frapo ~~~~~~~~~V LSAADKNNVK GIFGKISSHA EDYGAEALER MFITYPSTKT - hba_phaco ~~~~~~~~~V LSAADKNNVK GIFTKIAGHA EEYGAEALER MFITYPSTKT - hba_trioc ~~~~~~~~~V LSANDKTNVK TVFTKITGHA EDYGAETLER MFITYPPTKT - hba_ansse ~~~~~~~~~V LSAADKGNVK TVFGKIGGHA EEYGAETLQR MFQTFPQTKT - hba_colli ~~~~~~~~~V LSANDKSNVK AVFAKIGGQA GDLGGEALER LFITYPQTKT -hbad_chlme ~~~~~~~~~M LTADDKKLLT QLWEKVAGHQ EEFGSEALQR MFLTYPQTKT -hbad_pasmo ~~~~~~~~~M LTAEDKKLIQ QIWGKLGGAE EEIGADALWR MFHSYPSTKT -hbaz_horse ~~~~~~~~~S LTKAERTMVV SIWGKISMQA DAVGTEALQR LFSSYPQTKT -hba4_salir ~~~~~~~~~S LSAKDKANVK AIWGKILPKS DEIGEQALSR MLVVYPQTKA - hbb_ornan ~~~~~~~~VH LSGGEKSAVT NLWGKV..NI NELGGEALGR LLVVYPWTQR - hbb_tacac ~~~~~~~~VH LSGSEKTAVT NLWGHV..NV NELGGEALGR LLVVYPWTQR - hbe_ponpy ~~~~~~~~VH FTAEEKAAVT SLWSKM..NV EEAGGEALGR LLVVYPWTQR - hbb_speci ~~~~~~~~VH LSDGEKNAIS TAWGKV..HA AEVGAEALGR LLVVYPWTQR - hbb_speto ~~~~~~~~VH LTDGEKNAIS TAWGKV..NA AEIGAEALGR LLVVYPWTQR - hbb_equhe ~~~~~~~~VQ LSGEEKAAVL ALWDKV..NE EEVGGEALGR LLVVYPWTQR - hbb_sunmu ~~~~~~~~VH LSGEEKACVT GLWGKV..NE DEVGAEALGR LLVVYPWTQR - hbb_calar ~~~~~~~~VH LTGEEKSAVT ALWGKV..NV DEVGGEALGR LLVVYPWTQR - hbb_mansp ~~~~~~~~VH LTPEEKTAVT TLWGKV..NV DEVGGEALGR LLVVYPWTQR - hbb_ursma ~~~~~~~~VH LTGEEKSLVT GLWGKV..NV DEVGGEALGR LLVVYPWTQR - hbb_rabit ~~~~~~~~VH LSSEEKSAVT ALWGKV..NV EEVGGEALGR LLVVYPWTQR - hbb_tupgl ~~~~~~~~VH LSGEEKAAVT GLWGKV..DL EKVGGQSLGS LLIVYPWTQR - hbb_triin ~~~~~~~~VH LTPEEKALVI GLWAKV..NV KEYGGEALGR LLVVYPWTQR - hbb_colli ~~~~~~~~VH WSAEEKQLIT SIWGKV..NV ADCGAEALAR LLIVYPWTQR - hbb_larri ~~~~~~~~VH WSAEEKQLIT GLWGKV..NV ADCGAEALAR LLIVYPWTQR -hbb1_varex ~~~~~~~~VH WTAEEKQLIC SLWGKI..DV GLIGGETLAG LLVIYPWTQR -hbb2_xentr ~~~~~~~~VH WTAEEKATIA SVWGKV..DI EQDGHDALSR LLVVYPWTQR -hbbl_ranca ~~~~~~~~VH WTAEEKAVIN SVWQKV..DV EQDGHEALTR LFIVYPWTQR -hbb2_tricr ~~~~~~~~VH LTAEDRKEIA AILGKV..NV DSLGGQCLAR LIVVNPWSRR -glb2_mormr PIVDSGSVSP LSDAEKNKIR AAWDIVYKNY EKNGVDILVK FFTGTPAAQA -glbz_chith ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ -hbf1_ureca ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - - 51 100 - lgb1_pea LFSFLKD... TAGVEDSPKL QAHAEQVFGL VRDSAAQLRT KGEVVLGNAT -lgb1_vicfa MFSFLKD... SAGVVDSPKL GAHAEKVFGM VRDSAVQLRA TGEVVLDGKD - myg_escgi KFDKFKHLKT EAEMKASEDL KKHGNTVLTA LGGILKKKGH ...HEAELKP - myg_horse KFDKFKHLKT EAEMKASEDL KKHGTVVLTA LGGILKKKGH ...HEAELKP - myg_progu KFDKFKHLKA EDEMRASEEL KKHGTTVLTA LGGILKKKGQ ...HAAELAP - myg_saisc KFDKFKHLKS EDEMKASEEL KKHGTTVLTA LGGILKKKGQ ...HEAELKP - myg_lycpi KFDKFKHLKT EDEMKGSEDL KKHGNTVLTA LGGILKKKGH ...HEAELKP - myg_mouse KFDKFKNLKS EEDMKGSEDL KKHGCTVLTA LGTILKKKGQ ...HAAEIQP - myg_musan HFPKFKN.KS LGELKDTADI KAQADTVLSA LGNIVKKKGS ...HSQPVKA - hba_ailme YFPHF.DLSP .....GSAQV KAHGKKVADA LTTAVGHLDD ...LPGALSA - hba_prolo YFPHF.DLSP .....GSAQV KAHGKKVADA LTLAVGHLDD ...LPGALSA - hba_pagla YFPHF.DLSH .....GSAQV KAHGKKVADA LTLAVGHLED ...LPNALSA - hba_macfa YFPHF.DLSH .....GSAQV KGHGKKVADA LTLAVGHVDD ...MPQALSA - hba_macsi YFPHF.DLSH .....GSAQV KGHGKKVADA LTLAVGHVDD ...MPQALSA - hba_ponpy YFPHF.DLSH .....GSAQV KDHGKKVADA LTNAVAHVDD ...MPNALSA -hba2_galcr YFPHF.DLSH .....GSTQV KGHGKKVADA LTNAVLHVDD ...MPSALSA - hba_mesau YFPHF.DVSH .....GSAQV KGHGKKVADA LTNAVGHLDD ...LPGALSA -hba2_bosmu YFPHF.DLSH .....GSAQV KGHGAKVAAA LTKAVGHLDD ...LPGALSE - hba_erieu YFPHF.DLNP .....GSAQV KGHGKKVADA LTTAVNNLDD ...VPGALSA - hba_frapo YFPHF.DLSH .....GSAQV KGHGKKVVAA LIEAANHIDD ...IAGTLSK - hba_phaco YFPHF.DLSH .....GSAQI KGHGKKVVAA LIEAVNHIDD ...ITGTLSK - hba_trioc YFPHF.DLHH .....GSAQI KAHGKKVVGA LIEAVNHIDD ...IAGALSK - hba_ansse YFPHF.DLQP .....GSAQI KAHGKKVAAA LVEAANHIDD ...IAGALSK - hba_colli YFPHF.DLSH .....GSAQI KGHGKKVAEA LVEAANHIDD ...IAGALSK -hbad_chlme YFPHF.DLHP .....GSEQV RGHGKKVAAA LGNAVKSLDN ...LSQALSE -hbad_pasmo YFPHF.DLSQ .....GSDQI RGHGKKVVAA LSNAIKNLDN ...LSQALSE -hbaz_horse YFPHF.DLHE .....GSPQL RAHGSKVAAA VGDAVKSIDN ...VAGALAK -hba4_salir YFSHWASVAP .....GSAPV KKHGITIMNQ IDDCVGHMDD ...LFGFLTK - hbb_ornan FFEAFGDLSS AGAVMGNPKV KAHGAKVLTS FGDALKNLDD ...LKGTFAK - hbb_tacac FFESFGDLSS ADAVMGNAKV KAHGAKVLTS FGDALKNLDN ...LKGTFAK - hbe_ponpy FFDSFGNLSS PSAILGNPKV KAHGKKVLTS FGDAIKNMDN ...LKTTFAK - hbb_speci FFDSFGDLSS ASAVMGNAKV KAHGKKVIDS FSNGLKHLDN ...LKGTFAS - hbb_speto FFDSFGDLSS ASAVMGNAKV KAHGKKVIDS FSNGLKHLDN ...LKGTFAS - hbb_equhe FFDSFGDLSN PAAVMGNPKV KAHGKKVLHS FGEGVHHLDN ...LKGTFAQ - hbb_sunmu FFDSFGDLSS ASAVMGNPKV KAHGKKVLHS LGEGVANLDN ...LKGTFAK - hbb_calar FFESFGDLST PDAVMNNPKV KAHGKKVLGA FSDGLTHLDN ...LKGTFAH - hbb_mansp FFDSFGDLSS PDAVMGNPKV KAHGKKVLGA FSDGLNHLDN ...LKGTFAQ - hbb_ursma FFDSFGDLSS ADAIMNNPKV KAHGKKVLNS FSDGLKNLDN ...LKGTFAK - hbb_rabit FFESFGDLSS ANAVMNNPKV KAHGKKVLAA FSEGLSHLDN ...LKGTFAK - hbb_tupgl FFDSFGDLSS PSAVMSNPKV KAHGKKVLTS FSDGLNHLDN ...LKGTFAK - hbb_triin FFEHFGDLSS ASAIMNNPKV KAHGEKVFTS FGDGLKHLED ...LKGAFAE - hbb_colli FFSSFGNLSS ATAISGNPNV KAHGKKVLTS FGDAVKNLDN ...IKGTFAQ - hbb_larri FFASFGNLSS PTAINGNPMV RAHGKKVLTS FGEAVKNLDN ...IKNTFAQ -hbb1_varex QFSHFGNLSS PTAIAGNPRV KAHGKKVLTS FGDAIKNLDN ...IKDTFAK -hbb2_xentr YFSSFGNLSN VSAVSGNVKV KAHGNKVLSA VGSAIQHLDD ...VKSHLKG -hbbl_ranca YFSTFGDLSS PAAIAGNPKV HAHGKKILGA IDNAIHNLDD ...VKGTLHD -hbb2_tricr YFHDFGDLSS CDAICRNPKV LAHGAKVMRS IVEATKHLDN ...LREYYAD -glb2_mormr FFPKFKGLTT ADALKKSSDV RWHAERIINA VNDAVKSMDD TEKMSMKLQE -glbz_chith ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ -hbf1_ureca ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - - 101 150 - lgb1_pea LGAIHVQKGV TNP.HFVVVK EALLQTIKKA SGNNWSEELN TAWEVAYDGL -lgb1_vicfa .GSIHIQKGV LDP.HFVVVK EALLKTIKEA SGDKWSEELS AAWEVAYDGL - myg_escgi LAQSHATKHK IPIKYLEFIS DAIIHVLHSR HPGDFGADAQ AAMNKALELF - myg_horse LAQSHATKHK IPIKYLEFIS DAIIHVLHSK HPGNFGADAQ GAMTKALELF - myg_progu LAQSHATKHK IPVKYLEFIS EAIIQVLQSK HPGDFGADAQ GAMSKALELF - myg_saisc LAQSHATKHK IPVKYLELIS DAIVHVLQKK HPGDFGADAQ GAMKKALELF - myg_lycpi LAQSHATKHK IPVKYLEFIS DAIIQVLQNK HSGDFHADTE AAMKKALELF - myg_mouse LAQSHATKHK IPVKYLEFIS EIIIEVLKKR HSGDFGADAQ GAMSKALELF - myg_musan LAATHITTHK IPPHYFTKIT TIAVDVLSEM YPSEMNAQVQ AAFSGAFKII - hba_ailme LSDLHAHKLR VDPVNFKLLS HCLLVTLASH HPAEFTPAVH ASLDKFFSAV - hba_prolo LSDLHAYKLR VDPVNFKLLS HCLLVTLACH HPAEFTPAVH ASLDKFFTSV - hba_pagla LSDLHAYKLR VDPVNFKLLS HCLLVTLACH HPAEFTPAVH SALDKFFSAV - hba_macfa LSDLHAHKLR VDPVNFKLLS HCLLVTLAAH LPAEFTPAVH ASLDKFLASV - hba_macsi LSDLHAHKLR VDPVNFKLLS HCLLVTLAAH LPAEFTPAVH ASLDKFLASV - hba_ponpy LSDLHAHKLR VDPVNFKLLS HCLLVTLAAH LPAEFTPAVH ASLDKFLASV -hba2_galcr LSDLHAHKLR VDPVNFKLLR HCLLVTLACH HPAEFTPAVH ASLDKFMASV - hba_mesau LSDLHAHKLR VDPVNFKLLS HCLLVTLANH HPADFTPAVH ASLDKFFASV -hba2_bosmu LSDLHAHKLR VDPVNFKLLS HSLLVTLASH LPSDFTPAVH ASLDKFLANV - hba_erieu LSDLHAHKLR VDPVNFKLLS HCLLVTLALH HPADFTPAVH ASLDKFLATV - hba_frapo LSDLHAHKLR VDPVNFKLLG QCFLVVVAIH HPSALTPEVH ASLDKFLCAV - hba_phaco LSDLHAHKLR VDPVNFKLLG QCFLVVVAIH HPSALTPEVH ASLDKFLCAV - hba_trioc LSDLHAQKLR VDPVNFKLLG QCFLVVVAIH HPSVLTPEVH ASLDKFLCAV - hba_ansse LSDLHAQKLR VDPVNFKFLG HCFLVVLAIH HPSLLTPEVH ASMDKFLCAV - hba_colli LSDLHAQKLR VDPVNFKLLG HCFLVVVAVH FPSLLTPEVH ASLDKFVLAV -hbad_chlme LSNLHAYNLR VDPANFKLLA QCFQVVLATH LGKDYSPEMH AAFDKFLSAV -hbad_pasmo LSNLHAYNLR VDPVNFKFLS QCLQVSLATR LGKEYSPEVH SAVDKFMSAV -hbaz_horse LSELHAYILR VDPVNFKFLS HCLLVTLASR LPADFTADAH AAWDKFLSIV -hba4_salir LSELHATKLR VDPTNFKILA HNLIVVIAAY FPAEFTPEIH LSVDKFLQQL - hbb_ornan LSELHCDKLH VDPENFNRLG NVLIVVLARH FSKDFSPEVQ AAWQKLVSGV - hbb_tacac LSELHCDKLH VDPENFNRLG NVLVVVLARH FSKEFTPEAQ AAWQKLVSGV - hbe_ponpy LSELHCDKLH VDPENFKLLG NVMVIILATH FGKEFTPEVQ AAWQKLVSAV - hbb_speci LSELHCDKLH VDPENFKLLG NMIVIVMAHH LGKDFTPEAQ AAFQKVVAGV - hbb_speto LSELHCDKLH VDPENFKLLG NMIVIVMAHH LGKDFTPEAQ AAFQKVVAGV - hbb_equhe LSELHCDKLH VDPENFRLLG NVLVVVLARH FGKDFTPELQ ASYQKVVAGV - hbb_sunmu LSELHCDKLH VDPENFRLLG NVLVVVLASK FGKEFTPPVQ AAFQKVVAGV - hbb_calar LSELHCDKLH VDPENFRLLG NVLVCVLAHH FGKEFTPVVQ AAYQKVVAGV - hbb_mansp LSELHCDKLH VDPENFKLLG NVLVCVLAHH FGKEFTPQVQ AAYQKVVAGV - hbb_ursma LSELHCDKLH VDPENFKLLG NVLVCVLAHH FGKEFTPQVQ AAYQKVVAGV - hbb_rabit LSELHCDKLH VDPENFRLLG NVLVIVLSHH FGKEFTPQVQ AAYQKVVAGV - hbb_tupgl LSELHCDKLH VDPENFRLLG NVLVRVLACN FGPEFTPQVQ AAFQKVVAGV - hbb_triin LSELHCDKLH VDPENFRLLG NVLVCVLARH FGKEFSPEAQ AAYQKVVAGV - hbb_colli LSELHCDKLH VDPENFRLLG DILVIILAAH FGKDFTPECQ AAWQKLVRVV - hbb_larri LSELHCDKLH VDPENFRLLG DILIIVLAAH FAKDFTPDSQ AAWQKLVRVV -hbb1_varex LSELHCDKLH VDPTNFKLLG NVLVIVLADH HGKEFTPAHH AAYQKLVNVV -hbb2_xentr LSKSHAEDLH VDPENFKRLA DVLVIVLAAK LGSAFTPQVQ AVWEKLNATL -hbbl_ranca LSEEHANELH VDPENFRRLG EVLIVVLGAK LGKAFSPQVQ HVWEKFIAVL -hbb2_tricr LSVTHSLKFY VDPENFKLFS GIVIVCLALT LQTDFSCHKQ LAFEKLMKGV -glb2_mormr LSVKHAQSFY VDRQYFKVLA GII....... ..ADTTAPGD AGFEKLMSMI -glbz_chith ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ -hbf1_ureca ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - - 151 200 - lgb1_pea ATAIKKAMKT A~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ -lgb1_vicfa ATAIKAA~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - myg_escgi RKDIAAKYKE LGFQG~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - myg_horse RNDIAAKYKE LGFQG~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - myg_progu RNDIAAKYKE LGFQG~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - myg_saisc RNDMAAKYKE LGFQG~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - myg_lycpi RNDIAAKYKE LGFQG~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - myg_mouse RNDIAAKYKE LGFQG~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - myg_musan CSDIEKEYKA ANFQG~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hba_ailme STVLTSKYR~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hba_prolo STVLTSKYR~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hba_pagla STVLTSKYR~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hba_macfa STVLTSKYR~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hba_macsi STVLTSKYR~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hba_ponpy STVLTSKYR~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ -hba2_galcr STVLTSKYR~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hba_mesau STVLTSKYR~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ -hba2_bosmu STVLTSKYR~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hba_erieu ATVLTSKYR~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hba_frapo GNVLTAKYR~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hba_phaco GTVLTAKYR~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hba_trioc GNVLSAKYR~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hba_ansse ATVLTAKYR~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hba_colli GTVLTAKYR~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ -hbad_chlme AAVLAEKYR~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ -hbad_pasmo ASVLAEKYR~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ -hbaz_horse SSVLTEKYR~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ -hba4_salir ALALAEKYR~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hbb_ornan AHALGHKYH~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hbb_tacac SHALAHKYH~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hbe_ponpy AIALAHKYH~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hbb_speci ANALAHKYH~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hbb_speto ANALSHKYH~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hbb_equhe ANALAHKYH~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hbb_sunmu ANALAHKYH~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hbb_calar ANALAHKYH~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hbb_mansp ANALAHKYH~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hbb_ursma ANALAHKYH~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hbb_rabit ANALAHKYH~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hbb_tupgl ANALAHKYH~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hbb_triin ANALAHKYH~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hbb_colli AHALARKYH~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hbb_larri AHALARKYH~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ -hbb1_varex SHSLARRYH~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ -hbb2_xentr VAALSHGYF~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ -hbbl_ranca VDALSHSYH~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ -hbb2_tricr SHALGHGY~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ -glb2_mormr CILLSSAY~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ -glbz_chith ~~~~~MKFII LALCVAAASA LSGDQIGLVQ STYGKVKGDS VGILYAVFKA -hbf1_ureca ~~~~~~~~~~ ~~~~GLTTAQ IKAIQDHWFL NIKGCLQAAA DSIFFKYLTA - - 201 250 - lgb1_pea ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ -lgb1_vicfa ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - myg_escgi ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - myg_horse ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - myg_progu ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - myg_saisc ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - myg_lycpi ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - myg_mouse ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - myg_musan ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hba_ailme ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hba_prolo ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hba_pagla ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hba_macfa ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hba_macsi ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hba_ponpy ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ -hba2_galcr ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hba_mesau ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ -hba2_bosmu ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hba_erieu ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hba_frapo ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hba_phaco ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hba_trioc ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hba_ansse ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hba_colli ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ -hbad_chlme ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ -hbad_pasmo ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ -hbaz_horse ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ -hba4_salir ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hbb_ornan ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hbb_tacac ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hbe_ponpy ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hbb_speci ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hbb_speto ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hbb_equhe ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hbb_sunmu ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hbb_calar ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hbb_mansp ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hbb_ursma ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hbb_rabit ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hbb_tupgl ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hbb_triin ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hbb_colli ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hbb_larri ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ -hbb1_varex ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ -hbb2_xentr ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ -hbbl_ranca ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ -hbb2_tricr ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ -glb2_mormr ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ -glbz_chith DPTIQAAFPQ FVGKDLDAIK GGAEFSTHAG RIVGFLGGVI DDL.PNIGKH -hbf1_ureca YPGDLAFFHK FSSVPLYGLR SNPAYKAQTL TVINYLDKVV DALGGNAGAL - - 251 300 - lgb1_pea ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ -lgb1_vicfa ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - myg_escgi ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - myg_horse ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - myg_progu ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - myg_saisc ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - myg_lycpi ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - myg_mouse ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - myg_musan ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hba_ailme ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hba_prolo ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hba_pagla ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hba_macfa ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hba_macsi ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hba_ponpy ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ -hba2_galcr ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hba_mesau ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ -hba2_bosmu ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hba_erieu ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hba_frapo ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hba_phaco ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hba_trioc ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hba_ansse ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hba_colli ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ -hbad_chlme ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ -hbad_pasmo ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ -hbaz_horse ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ -hba4_salir ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hbb_ornan ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hbb_tacac ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hbe_ponpy ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hbb_speci ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hbb_speto ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hbb_equhe ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hbb_sunmu ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hbb_calar ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hbb_mansp ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hbb_ursma ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hbb_rabit ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hbb_tupgl ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hbb_triin ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hbb_colli ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ - hbb_larri ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ -hbb1_varex ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ -hbb2_xentr ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ -hbbl_ranca ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ -hbb2_tricr ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ -glb2_mormr ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ -glbz_chith VDALVATHKP RGVTHAQFNN FRAAFIAYLK GHVDYTAAVE AAWGATFDAF -hbf1_ureca MKAKVPSHDA MGITPKHFGQ LLKLVGGVFQ EEFSADPTTV AAWGDAAGVL - - 301 - lgb1_pea ~~~~~~~~ -lgb1_vicfa ~~~~~~~~ - myg_escgi ~~~~~~~~ - myg_horse ~~~~~~~~ - myg_progu ~~~~~~~~ - myg_saisc ~~~~~~~~ - myg_lycpi ~~~~~~~~ - myg_mouse ~~~~~~~~ - myg_musan ~~~~~~~~ - hba_ailme ~~~~~~~~ - hba_prolo ~~~~~~~~ - hba_pagla ~~~~~~~~ - hba_macfa ~~~~~~~~ - hba_macsi ~~~~~~~~ - hba_ponpy ~~~~~~~~ -hba2_galcr ~~~~~~~~ - hba_mesau ~~~~~~~~ -hba2_bosmu ~~~~~~~~ - hba_erieu ~~~~~~~~ - hba_frapo ~~~~~~~~ - hba_phaco ~~~~~~~~ - hba_trioc ~~~~~~~~ - hba_ansse ~~~~~~~~ - hba_colli ~~~~~~~~ -hbad_chlme ~~~~~~~~ -hbad_pasmo ~~~~~~~~ -hbaz_horse ~~~~~~~~ -hba4_salir ~~~~~~~~ - hbb_ornan ~~~~~~~~ - hbb_tacac ~~~~~~~~ - hbe_ponpy ~~~~~~~~ - hbb_speci ~~~~~~~~ - hbb_speto ~~~~~~~~ - hbb_equhe ~~~~~~~~ - hbb_sunmu ~~~~~~~~ - hbb_calar ~~~~~~~~ - hbb_mansp ~~~~~~~~ - hbb_ursma ~~~~~~~~ - hbb_rabit ~~~~~~~~ - hbb_tupgl ~~~~~~~~ - hbb_triin ~~~~~~~~ - hbb_colli ~~~~~~~~ - hbb_larri ~~~~~~~~ -hbb1_varex ~~~~~~~~ -hbb2_xentr ~~~~~~~~ -hbbl_ranca ~~~~~~~~ -hbb2_tricr ~~~~~~~~ -glb2_mormr ~~~~~~~~ -glbz_chith FGAVFAKM -hbf1_ureca VAAMK~~~ - diff --git a/forester/archive/RIO/others/hmmer/tutorial/globins630.fa b/forester/archive/RIO/others/hmmer/tutorial/globins630.fa deleted file mode 100644 index b936a34..0000000 --- a/forester/archive/RIO/others/hmmer/tutorial/globins630.fa +++ /dev/null @@ -1,2520 +0,0 @@ -> BAHG_VITSP -MLDQQTINIIKATVPVLKEHGVTITTTFYKNLFAKHPEVRPLFDMGRQESLEQPKALAM -TVLAAAQNIENLPAILPAVKKIAVKHCQAGVAAAHYPIVGQELLGAIKEVLGDAATDDIL -DAWGKAYGVIADVfiqveadLYAQAVE -> GLB1_ANABR -PSVQGAAAQLTADVKKDLRDSWKVIGSDKKGNGVALMTTLFADNQETIGYFKRLGNVSQ -GMANDKLRGHSITLMYALQNFIDQLDNTDDLVCVVEKFAVNHITRKISAAEFGKINGPIK -KVLASKNFGDKYANAWAKLVAVVQAAL -> GLB1_ARTSX -ERVDPITGLSGLEKNAILDTWGKVRGNLQEVGKATFGKLFAAHPEYQQMFRFFQGVQLA -FLVQSPKFAAHTQRVVSALDQTLLALNRPSDQFVYMIKELGLDHINRGTDRSFVEYLKES -LGDSVDEFTVQSFGEVIVNFLNEGLRQA -> GLB1_CALSO -VSANDIKNVQDTWGKLYDQWDAVHAsKFYNKLFKDSEDISEAFVKAGTGSGIAMKRQAL -VFGAILQEFVANLNDPTALTLKIKGLCATHKTRGITNMELFAFALADLVAYMGTtISFTA -AQKASWTAVNDVILHQMSSYFATVA -> GLB1_CHITH -GPSGDQIAAAKASWNTVKNNQVDILYAVFKANPDIQTAFSQFAGKDLDSIKGTPDFSKH -AGRVVGLFSEVMDLLGNDANTPTILAKAKDFGKSHKSRASPAQLDNFRKSLVVYLKGATK -WDSAVESSWAPVLDFVFSTLKNEL -> GLB1_GLYDI -GLSAAQRQVIAATWKDIAGADNGAGVGKDCLIKFLSAHPQMAAVFGFSGASDPGVAALG -AKVLAQIGVAVSHLGDEGKMVAQMKAVGVRHKGYGNKHIKAQYFEPLGASLLSAMEHRIG -GKMNAAAKDAWAAAYADISGALISGLQS -> GLB1_LUMTE -ECLVTEGLKVKLQWASAFGHAHQRVAFGLELwkgILREHPEIKAPFSRVRGDNIYSPQF -GAHSQRVLSGLDITISMLDTPDmLAAQLAHLKVQHVERNLKPEFFDIFLKHLLHVLGDRL -GTHFDFGAWHDCVDQIIDGIKDI -> GLB1_MORMR -PIVDSGSVSPLSDAEKNKIRAAWDLVYKDYEKTGVDILVKFFTGTPAAQAFFPKFKGLT -TADDLKQSSDVRWHAERIINAVNDAVKSMDDTEKMSMKLKELSIKHAQSFYVDRQYFKVL -AGIIADTTAPGDAGFEKLMSMICILLSSAY -> GLB1_PARCH -GGTLAIQSHGDLTLAQKKIVRKTWHQLMRNKTSFVTDLFIRIFAYDPAAQNKFPQMAGM -SASQLRSSRQMQAHAIRVSSIMSEYIEELDSDILPELLATLARTHDLNKVGPAHYDLFAK -VLMEALQAELGSDFNQKTRDSWAKAFSIVQAVLLVKHG -> GLB1_PETMA -PIVDSGSVPALTAAEKATIRTAWAPVYAKYQSTGVDILIKFFTSNPAAQAFFPKFQGLT -SADQLKKSMDVRWHAERIINAVNDAVVAMDDTEKMSLKLRELSGKHAKSFQVDPQYFKVL -AAVIVDTVLPGDAGLEKLMSMICILLRSSY -> GLB1_PHESE -DCNTLKRFKVKHQWQQVFSGEhHRTEFSLHFWKEFLHDHPDLVSLFKRVQGENIYSPEF -QAHGIRVLAGLDSVIGVLDEDDTFTVQLAHLKAQHTERGTKPEYFDLFGTQLFDILGDKL -GTHFDQAAWRDCYAVIAAGIKP -> GLB1_SCAIN -PSVYDAAAQLTADVKKDLRDSWKVIGSDKKGNGVALMTTLFADNQETIGYFKRLGNVSQ -GMANDKLRGHSITLMYALQNFIDQLDNPDDLVCVVEKFAVNHITRKISAAEFGKINGPIK -KVLASKNFGDKYANAWAKLVAVVQAAL -> GLB1_TYLHE -TDCGILQRIKVKQQWAQVYSVGESRTDFAIDVFNNFFRTNPDRSLFNRVNGDNVYSPEF -KAHMVRVFAGFDILISVLDDKPVLDQALAHYAAFHKQFGTIPFKAFGQTMFQTIAEHIHG -ADIGAWRACYAEqIVTGITA -> GLB2_ANATR -PSVQDAAAQLTADVKKDLRDSWKVLGSDKKGDGMALMTTLFNDHQETIAYFKRMGDVSQ -GMANSKLRGHSITLMYALQNFIDQLDSTDDLICVVEKFAVNHITRKISGAEFGKINGPMK -KVLASKNFGDKYANAWAKLVGVVQAAL -> GLB2_CALSO -VSQADIAAVQTSWRRCYCSWDNEDGLKFYQTLFDSNSKIRHAFESAGATNDTEMEKQAN -LFGLMMTQFIDNLDDTTALNYKISGLMATHKTRNVVDPALFAIALNELVKFIGNQQPAWK -NVTAVILSQMKIALSSN -> GLB2_CHITH -APLSADEASLVRGSWAQVKHSEVDILYYIFKANPDIMAKFPQFAGKDLETLKGTGQFAT -HAGRIVGFVSEIVALMGNSANMPAMETLIKDMAANHKARGIPKAQFNEFRASLVSYLQSK -VSWNDSLGAAWTQGLDNVFNMMFSYL -> GLB2_LUMTE -KKQCGVLEGLKVKSEWGRAYGSGhDREAFSQAIWRATFAQVPESRSLFKRVHGDDTSHP -AFIAHAERVLGGLDIAISTLDQPATLKEELDHLQVQHEGRKIPDNYFDAFKTAILHVVAA -QLGRCYDREAWDACIDHIEDGIKGHH -> GLB2_MORMR -PIVDSGSVSPLSDAEKNKIRAAWDIVYKNYEKNGVDILVKFFTGTPAAQAFFPKFKGLT -TADALKKSSDVRWHAERIINAVNDAVKSMDDTEKMSMKLQELSVKHAQSFYVDRQYFKVL -AGIIADTTAPGDAGFEKLMSMICILLSSAY -> GLB2_TYLHE -SSDHCGPLQRLKVKQQWAKAYGVGHERVELgialwksMFAQDNDARDLFKRVHGEDVHS -PAFEAHMARVFNGLDRVISSLTDEPVLNAQLEHLRQQHIKLGITGHMFNLMRTGLAYVLP -AQLGRCFDKEAWAACWDEVIYPGIKHD -> GLB3_CHITH -MKFLILALCFAAASALSADQISTVQASFDKVKGDPVGILYAVFKADPSIMAKFTQFAGK -DLESIKGTAPFEIHANRIVGFFSKIIGELPNIEADVNTFVASHKPRGVTHDQLNNFRAGF -VSYMKAHTDFAGAEAAWGATLDTFFGMIFSKM -> GLB3_CHITP -LSADQISTVQASFDKVKGDPVGILYAVFKADPSIMAKFTQFAGKDLESIKGTAPFETHA -NRIVGFFSKIIGELPNIEADVNTFVASHKPRGVTHDQLNNFRAGFVSYMKAHTDFAGAEA -AWGATLDTFFGMIFSKM -> GLB3_LAMSP -YECGPLQRLKVKRQWAEAYGSGnDREEFGHFIWTHVFKDAPSARDLFKRVRGDNIHTPA -FRAHATRVLGGLDMCIALLDDEGVLNTQLAHLASQHSSRGVSAAQYDVVEHSVMMGVEHE -IGqNVFDKDAWQACLDVITGGIQGN -> GLB3_MORMR -PIVDSGSVSPLTAADKTKILAAWDLVYKNYEKNSVDILVKFFTGTPAAQAFFPKFKGLT -TADDLKKSSDVRWHAERIINAVNDAVKSMDDTEKMSMKLKELSNKHVKNFNVDRKYFKVL -AGVIADTVAPGDASFEKLMSIICILLNSAY -> GLB3_MYXGL -PITDHGQPPTLSEGDKKAIRESWPQIYKNFEQNSLAVLLEFLKKFPKAQDSFPKFSAKK -SHLEQDPAVKLQAEVIINAVNHTIGLMDKEAAMKKYLKDLSTKHSTEFQVNPDMFKELSA -VFVSTMGGKAAYEKLFSIIATLLRSTYDA -> GLB3_PETMA -PIVDSGSVAPLSAAEKTKIRSAWAPVYSNYETTGVDILVKFFTSTPAAQEFFPKFKGLT -TADQLKKSADVRWHAERIINAVNDAVVSMDDTEKMSMKLGDLSGKHAKSFQVDPQYFKVL -AAVIADTVAAGDAGFEKLMSMICILLRSAY -> GLB3_TYLHE -DDCCSAADRHEVLDNWKGIWSAEftgRRVAIGQAIFQELFALDPNAKGVFGRVNVDKPS -EADWKAHVIRVINGLDLAVNLLEDPKALQEELKHLARQHRERSGVKAVYFDEMEKALLKV -LPQVSSHFNSGAWDRCFTRIADVIKAELP -> GLB4_CHITH -MKLLILALCFAAASALTADQISTVQSSFAGVKGDAVGILYAVFKADPSIQAKFTQFAGK -DLDSIKGSADFSAHANKIVGFFSKIIGDLPNIDGDVTTFVASHTPRGVTHDQLNNFRAGF -VSYMKAHTDFAGAEAAWGATLDAFFGMVFAKM -> GLB4_GLYDI -GLSAAQRQVVASTWKDIAGSDNGAGVGKECFTKFLSAHHDIAAVFGFSGASDPGVADLG -AKVLAQIGVAVSHLGDEGKMVAEMKAVGVRHKGYGYKHIKAEYFEPLGASLLSAMEHRIG -GKMTAAAKDAWAAAYADISGALISGLQS -> GLB4_LUMTE -ADDEDCCSYEDRREIRHIWDDVWSSSftdRRVAIVRAVFDDLFKHYPTSKALFERVKID -EPESGEFKSHLVRVANGLDLLINLLDDTLVLQSHLGHLADQHIQRKGVTKEYFRGIGEAF -ARVLPQVLSCFNVDAWNRCFHRLVARIAKDLP -> GLB4_TYLHE -DTCCSIEDRREVQALWRSIWSAEDTGRRTLigrllfEELFEIDGATKGLFKRVNVDDTH -SPEEFAHVLRVVNGLDTLIGVLGDSDTLNSLIDHLAEQHKARAGFKTVYFKEFGKALNHV -LPEVASCFNPEAWNHCFDGLVDVISHRIDG -> GLB5_PETMA -PIVDTGSVAPLSAAEKTKIRSAWAPVYSTYETSGVDILVKFFTSTPAAQEFFPKFKGLT -TADQLKKSADVRWHAERIINAVNDAVASMDDTEKMSMKLRDLSGKHAKSFQVDPQYFKVL -AAVIADTVAAGDAGFEKLMSMICILLRSAY -> GLB6_CHITH -AVLTTEQADLVKKTWSTVKFNEVDILYAVFKAYPDIMAKFPQFAGKDLDSIKDSAAFAT -HATRIVSFLSEVISLAGSDANIPAIQNLAKELATSHKPRGVSKDQFTEFRTALFTYLKAH -INFDGPTETAWTLALDTTYAMLFSAMDS -> GLB7_ARTSX -ALTALEKQSIQDIWTILKAVGLEFLqvkmfGKLFADHPEYKAHFDNFLTAIFSVAedlv -pKLRAHLHRVIDAFDLVIFALGRESLRGSLKDLGIFHTGRDIVDPVEsltgFKLMVAVIE -EGLDTFRAVPEYSKGLEGrFGNVDNINENAPFR -> GLB7_CHITH -APLSADQASLVKSTWAQVRNSEVEILAAVFTAYPDIQARFPQFAGKDVASIKDTGAFAT -HAGRIVGFVSEIIALIGNESNAPAVQTLVGQLAASHKARGISQAQFNEFRAGLVSYVSSN -VAWNAAAESAWTAGLDNIFGLLFAAL -> GLB8_CHITH -AVTPMSADQLALFKSSWNTVKHNEVDILYAVFKANPDIQAKFPQFAGKDLDSIKDSADF -AVHSGRIVGFFSEVIGLIGNPENRPALKTLIDGLASSHKARGIEKAQFEEFRASLVDYLS -HHLDWNDTMKSTWDLALNNmFFYILHALEVAQ -> GLB9_CHITH -DPVSSDEANAIRASWAGVKHNEVDILAAVFSDHPDIQARFPQFAGKDLASIKDTGAFAT -HAGRIVGFISEIVALVGNESNAPAMATLINELSTSHHNRGITKGQFNEFRSSLVSYLSSH -ASWNDATADAWTHGLDNIFGMIFAHL -> GLBA_ANATR -VADAVAKVCGSEAIKGNLRRSWGVLMSADIEATGLTYLANLFTLRPDTKTYFTRLGDVQ -KGKANSKLRGHAITLTYALDWFVDSLDDPSRLKCVVEKFAVNHINRKISGDAFGSIIPEM -KETLKARMGSYSDDVGAAWVQAILGMQNAVLSAL -> GLBA_SCAIN -VADAVAKVCGSEAIKANLRRSWGVLSADIEATGLMLMSNLFTLRPDTKTYFTRLGDVQK -GKANSKLRGHAITLTYALNNFVDSLDDPSRLKCVVEKFAVNHINRKISGDAFGAIVEPMK -ETLKARMGNYYSDDVAGAWAALVGVVQAAL -> GLBB_ANATR -STVAELANAVVSNADQKDLLRLSWGVLSVDMEGTGLMLMANLFKTSSAARTKFARLGDV -SAGKDNSKLRGHSITLMYALQNFIDALDNVDRLKCVVEKFAVNHINRQISADEFGEIVGP -LRQTLKARMGSYFDEDTVSAWAALVAVVQASL -> GLBB_SCAIN -SKVAELANAVVSNADQKDLLRMSWGVLSVDMEGTGLMLMANLFKTSPSAKGKFARLGDV -SAGKDNSKLRGHSITLMYALQNFVDALDDVERLKCVVEKFAVNHINRQISADEFGEIVGP -LRQTLKARMGNYFDEDTVSAWASLVAVVQASL -> GLBC_CAUAR -GTLAIQAQGDLTLAQKKIVRKTWHQLMRNKTSFVTDVFIRIFAYDPSAQNKFPQMAGMS -ASQLRSSRQMQAHAIRVSSIMSEYVEELDSDILPELLATLARTHDLNKVGADHYNLFAKV -LMEALQAELGSAFNEKTRDAWAKAFSVVQAVLLVKHGN -> GLBC_CHITH -MKFFAVLALCIVGaiaSPLTADEASLVQSSWKAVSHNEVDILAAVFAAYPDIQAKFPQF -AGKDLASIKDTGAFATHATRIVSFLSEVIALSGNESNASAVNSLVSKLGDDHKARGVSAA -QFGEFRTALVAYLSNHVSWGDNVAAAWNKALDNTYAIVVPRL -> GLBD_CAUAR -GQATSFQSVGDLTPAEKDLIRSTWDQLMTHRTGFVADVFIRIFHNDPWAQRKFPQMAGL -SPAELRTSRQMHAHAIRVSALMTTYIDEMDTEVLPELLATLTRTHDKNHVGKKNYDLFGK -VLMEAIKAELGVGFTKQVHDAWAKTFAIVQGVLITKHAS -> GLBD_CHITH -MKFFAVLALCIVGaiaSPLTADEASLVQSSWKAVSHNEVDILAAVFAAYPDIQAKFPQF -AGKDLASIKDTGAFATHATRIVSFLSEVIALSGNASNAAAVEGLLNKLGSDHKARGVSAA -QFGEFRTALVSYLSNHVSWGDNVAAAWNKALDNTMAVAVAHL -> GLBE_CHITH -MKFFAVLALCIVGaiaSPLTADEASLVQSSWKAVSHNEVEILAAVFAAYPDIQNKFSQF -AGKDLASIKDTGAFATHATRIVSFLSEVIALSGNTSNAAAVNSLVSKLGDDHKARGVSAA -QFGEFRTALVAYLQANVSWGDNVAAAWNKALDNTFAIVVPRL -> GLBF_CHITH -MKFFAVLALCIVGaiaSPLTADEASLVQSSWKAVSHNEVEILAAVFAAYPDIQNKFSQF -AGKDLASIKDTGAFATHATRIVSFLSEVIALSGNDSNAAAVNSLVSKLGDDHKARGVSAA -QFGEFRTALVAYLQANVSWGDNVAAAWNKALDNTFAIVVPRL -> GLBH_CHITH -MKFFAVLALCVVGaiaSPLSADEAAIVKSSWDQVKHNEVDILAAVFAAYPDIQAKFPQF -AGKDLASIKDTAAFATHATRIVSFFTEVISLSGNQANLSAVYALVSKLGVDHKARGISAA -QFGEFRTALVSYLQAHVSWGDNVAAAWNHALDNTYAVALKSLE -> GLBI_CHITP -MKFFAVLALCIVGaiaSPLTADEASLVQSSWKAVSHNEVEILAAVFAAYPDIQNKFPQF -AGKDLASIKDTGAFATHATRIVSFLSEVIALSGNESNASAVNSLVSKLGDDHKARGVSAA -QFGEFRTALVAYLQANVSWGDNVAAAWNKALDNTFAIVVPRL -> GLBM_ANATR -STFGELANEVVNNSYHKDLLRLSWGVLSDDMEGTGLMLMANLFNMSPESRLKFGRLGHL -STGRDNSKLRGHSITLMYALKNFVDALDDVDRLKCVVEKFAVNHINRQISAEEFGKIVGP -FRAVLRIRMGDYFDEEIVAAWAALIAVVQAAL -> GLBT_CHITH -VATPAMPSMTDAQVAAVKGDWEKIKGSGVEILYFFLNKFPGNFPMFKKLGNDLAAAKGT -AEFKDQADKIIAFLQGVIEKLGSDMGGAKALLNQLGTSHKAMGITKDQFDQFRQALTELL -GNLGFGGNIGAWNATVDLMFHVIFNALDGTPV -> GLBX_CHITH -DPEWHTLDAHEVEQVQATWKAVSHDEVEILYTVFKAHPDIMAKFPKFAGKDLEAIKDTA -DFAVHASRIIGFFGEYVTLLGSSGNQAAIRTLLHDLGVFHKTRGITKAQFGEFRETMTAY -LKGHNKwnADISHSWDDAFDKAFSVIFEVLES -> GLBY_CHITP -MKVLAIFALCIIGALATPcDDFKIMQEAWNTMKNEEVEILYTVFKAYPDIQAKFPQFVG -KDLETIKGTAEFAVHATRIVSFMTEVISLLGNPDNLPAIMSLLSKLGKDHKGRGITVKQF -DEFHEAFHNFLHTHSVWNDNVDAAWHCNEKEIRKVINANLE -> GLBZ_CHITH -MKFIILALCVAAASALSGDQIGLVQSTYGKVKGDSVGILYAVFKADPTIQAAFPQFVGK -DLDAIKGGAEFSTHAGRIVGFLGGVIDDLPNIGKHVDALVATHKPRGVTHAQFNNFRAAF -IAYLKGHVDYTAAVEAAWGATFDAFFGAVFAKM -> GLB_APLJU -ALSAADAGLLAQSWAPVFANSDANGASFLVALFTQFPESANFFNDFKGKSLADIQASPK -LRDVSSRIFARLNEFVSNAADAGKMGSMLQQFATEHAGFGVGSAQFQNVRSMFPGFVASL -SAPAADAAWNSLFGLIISALQSAGK -> GLB_APLKU -SLSAAEADLVGKSWAPVYANKDADGANFLLSLFEKFPNNANYFADFKGKSIADIKASPK -LRDVSSRIFTRLNEFVNNAADAGKMSAMLSQFASEHVGFGVGSAQFENVRSMFPAFVASL -SAPPADDAWNKLFGLIVAALKAAGK -> GLB_APLLI -SLSAAEADLAGKSWAPVFANKNANGADFLVALFEKFPDSANFFADFKGKSVADIKASPK -LRDVSSRIFTRLNEFVNDAANAGKMSAMLSQFAKEHVGFGVGSAQFENVRSMFPGFVASV -AAPPAGADAWTKLFGLIIDALKAAGK -> GLB_BUSCA -GLDGAQKTALKESWKVLGADGPtmmKNGSLLFGLLFKTYPDTKKHFKHFDDATFAAMDT -TGVGKAHGVAVFSGLGSMICSIDDDDCVXGLAKKLSRNHLARGVSAADFKLLEAVFKXFL -DEATQRKATDAQKDADGALLTMLIKAHV -> GLB_CERRH -SLQPASKSALASSWKTLAKDAAtiqNNGATLFSLLFKQFPDTRNYFTHFGNMSDAEMKT -TGVGKAHSMAVFAGIGSMIDSMDDADCMNGLALKLSRNHIQRKIGASRFGEMRQVFPNFL -DEALGGGASGDVKGAWDALLAYLqdnkqAQAL -> GLB_DOLAU -ALSAAEAEVVAKSWGPVFANKDANGDNFLIALFEAYPDSPNFFADFKGKSIADIRASPK -LRNVSSRIVSRLNEFVSSAADAGKMAAMLDQFSKEHAGFGVGSQQFQNVSAMFPGFVASI -AAPPAGADAAWGKLFGLIIDAMKKAGK -> GLB_LAMFL -PIVDSGSVAPLSAAEKTKIRSAWAPVYSNYETSGVDILVKFFTSTPAAQEFFPKFKGMT -SADQLKKSADVRWHAERIINAVNDAVASMDDTEKMSMKLRDLSGKHAKSFQVDPQYFKVL -AAVIADTVAAGDAGFEKLMSMICILLRSAY -> GLB_TETPY -MNKPQTIYEKLGGENAMKAAVPLFYKKVLADERVKHFFKNTDMDHQTKQQTDFLTMLLG -GPNHYKGKNMTEAHKGMNLQNLHFDAIIENLAATLKELGVTDAVINEAAKVIEHTRKDML -GK -> GLB_TUBTU -ECDALQRFKVKHQWAEAFGTShHRLDFGLKLWNSIFRDAPEIRGLFKRVDGDNAYSAEF -EAHAERVLGGLDMTISLLDDQAAFDAQLAHLKSQHAERNIKADYYGVFVNELLAVLPDYL -GTKLDFKAWSECLGVITGAIHD -> GLP1_GLYDI -MHLTADQVAALKASWPEVSAGDGGAQLGLEMFTKYFHENPQMMFIFGYSGRTEALKHSS -KLQHHGKVIIDQIGKAVAEMDNAKQMAGTLHALGVRHKGFGDIRAEFFPALGMCLLDAME -EKVPGLNRTLWAAAYREISDACIAGLQS -> GLP2_GLYDI -MPLTADQVAALKASWPEVSAGDGGGQLGLELFTKYFHENPQMMFIFGYSGRTDALKHNA -KLQNHGKVIIDQIGKAVAEMDNAKQMAGTLHALGVRHKGFGDIRADFFPALGMCLLDAME -EKVPGLNRTLWAAAYREISDALVAGLES -> GLP3_GLYDI -MHLTADQVAALKASWPEVSAGDGGAQLGLEMFTRYFDENPQMMFVFGYSGRTSALKHNS -KLQNHGKIIVHQIGQAVSELDDGSKFEATLHKLGQEHKGFGDIKGEYFPALGDALLEAMN -SKVHGLDRTLWAAGYRVISDALIAGLES -> HBA1_BOSMU -VLSAADKGNVKAAWGKVGGHAAEYGAEALERMFLSFPTTKTYFPHFDLSQGSAQVKGHG -AKVAAALTKAVEHLDDLPGALSELSDLHAHKLRVDPVNFKLLSHSLLVTLASHLPSDFTP -AVHASLDKFLANVSTVLTSKYR -> HBA1_GALCR -VLSPTDKSIVKAAWEKVGAHAGDYGAEALERMFLSFPTTKTYFPQFDLSHGSAQVKGHG -KKVADALTNAVLHVDDMPSALSALSDLHAHKLTVDPVNFKLLSHCLLVTLACHLPAEFTP -AVHASLDKFMASVSTVLTSKYR -> HBA1_IGUIG -VLTEDDKNHIRAIWGHVDNNPEAFGVEALTRLFLAYPATKTYFAHFDLNPGSAQIKAHG -KKVVDALTQAVNNLDDIPDALAKLADLHAEKLRVDPVNFGLLGHCILVTIAAHNHGPLKA -DVALSMDKFLTKVAKTLVAHYR -> HBA1_LEMVA -VLSPADKNNVKSAWNAIGSHAGEHGAEALERMFLSFPPTKTYFPHFDLSHGSAQIKTHG -KKVADALTNAVNHIDDMPGALSALSDLHAHKLRVDPVNFKLLSHCLLVTLASHHPAEFTP -AVHASLDKFFAAVSTVLTSKYR -> HBA1_NOTCO -SLSDKDKAAVKALWSKIGKSADAIGNDALSRMIVVYPQTKTYFSHWPSVTPGHPDIKAH -GKKVMGGLAIAVSKINDLKAGLSNLSQQHAYKLRVDPANFKILNHCILVVISTMFPKNFT -PQAHVSLNKFLSGVALALAQRYR -> HBA1_PLEWA -KLTAEDKHNVKAIWDHVKGHEEAIGAEALYRMFCCMPTTRIYFPAKDLSERSSYLHSHG -KKVVGALTNAVAHIDDIDTAFSKLSDKHAEELMVDPANFPKLAHNILVVLGIHLKPHFTY -SVHRSVDKFLSTVAYVLASKYR -> HBA1_SALIR -SLTAKDKSVVKAFWGKISGKADVVGAEALGRdkMLTAYPQTKTYFSHWADLSPGSGPVK -KHGGIIMGAIGKAVGLMDDLVGGMSALSDLHAFKLRVDPGNFKILSHNILVTLAIHFPSD -FTPEVHIAVDKFLAAVSAALADKYR -> HBA1_TACAC -VLTDAEKKEVTSLWGKASGHAEEYGAEALERLFLSFPTTKTYFSHMDLSKGSAQVKAHG -KRVADALTTAAGHFNDMDSALSALSDLHAHKLRVDPVNFKLLAHCFLVVLARHHPAEFTP -SAHAAMDKFLSRVATVLTSKYR -> HBA1_TADBR -VLSPEDKNNVKAAWSKVGGQAGDYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHG -KKVGEALTTAVNHMDDLPGALSTLSDLHAYKLRVDPVNFKLLSHCLLVTLACHNPGEFTP -AVHASLDKFLASVSTVLTSKYR -> HBA1_TORMA -VLSEGNKKAIKNLLQKIHSQTEVLGAEALARLFECHPQTKSYFPKFSGFSANDKRVKHH -GALVLKALVDTNKHLDDLPHHLNKLAEKHGKGLLVDPHNFKLFSDCIAVTLAAHLQEFSP -ETHCAVDKFLEEVTYQLSSLYR -> HBA1_TRICR -MKLSADDKHNVKAIWEHVKGHEEAIGAEALCRMFTSLPTTRTYFPTKDIKEGSSFLHSH -GKKVMGALSNAVAHIDDIDGALSKLSDKHAEELMVDPANFPKLAHNILVVLGIHLKPHLT -YSVHSSVDKFLATVGYVLASKYR -> HBA1_XENBO -LLSADDKKHIKAIMPSIAAHGDKFGGEALYRMFLVNPKTKTYFPTFDFHHNSKQISAHG -KKVVDALNEASNHLDNIAGSLSKLSDLHAYDLRVDPGNFPLLAHNILVVVAMNFPKQFDP -ATHKALDKFLATVSSVLTSKYR -> HBA1_XENLA -LLSADDKKHIKAIMPAIAAHGDKFGGEALYRMFIVNPKTKTYFPSFDFHHNSKQISAHG -KKVVDALNEASNHLDNIAGSMSKLSDLHAYDLRVDPGNFPLLAHNILVVVAMNFPKQFDP -ATHKALDKFLATVSTVLTSKYR -> HBA2_BOSMU -VLSAADKGNVKAAWGKVGGHAAEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHG -AKVAAALTKAVGHLDDLPGALSELSDLHAHKLRVDPVNFKLLSHSLLVTLASHLPSDFTP -AVHASLDKFLANVSTVLTSKYR -> HBA2_GALCR -VLSPTDKSNVKAAWEKVGAHAGDYGAEALERMFLSFPTTKTYFPHFDLSHGSTQVKGHG -KKVADALTNAVLHVDDMPSALSALSDLHAHKLRVDPVNFKLLRHCLLVTLACHHPAEFTP -AVHASLDKFMASVSTVLTSKYR -> HBA2_LEMVA -VLSPADKNNVKSAWKAIGSHAGEHGAEALERMFLSFPPTKTYFPHFDLSHGSAQIKTHG -KKVADALTNAVNHIDDMPGALSALSDLHAHKLRVDPVNFKLLSHCLLVTLASHHPAEFTP -AVHASLDKFFAAVSTVLTSKYR -> HBA2_NOTCO -SLSTKDKETVKAFWSKVSGKSEDIGNDALSRMLVVYPQTKTYFSHWKELTPGSAPVRKH -GMTVMKGVGDAVSKIEDLTAGLMELSELHAFTLRVDPANFKISHNILVVFAIMFPKEFTA -EVHVSMDKFLAALARALSEKYR -> HBA2_PLEWA -NVKAVWEHVKGHEEVYGAEALYRAFLCDPQTQTYFAGKDLSENSAFLHSHGKKVMCALT -NAIAHIDDIDGCMSKLSDKHAHELMVDPGNFDILAHHILTVLAMFLSQLLTCANHRSVDK -FLSCVKNVLTSRYR -> HBA2_TACAC -VLTDAERKEVTSLWGKASGHAEDYGAEALERLFLSFPTTKTYFSHMDLSKGSAHVRAHG -KKVADALTTAVGHFNDMDGALSDLSDLHAHKLRVDPVNFKLLAHCFLVVLARHHPEEFTP -SAHAAMDKFLSRVATVLTSKYR -> HBA2_TORMA -VLSEGNKKIIKNLLQKIHSQTEVLGAEALARLFECHPQTKSYFPKFSGFSANDKRVKHH -GDLVLKALVDTNDHLDDLPHHLHKLAEKHGKDLLVDPHNFKLFSDCIAVTLAAHLQEKSP -ETHCAVDKFLEEVTYQLSSLYR -> HBA2_TRICR -VLSSQDKANVKAVWEHVKGHEEVYGAEALHRAFVCDPQTQTYFAGKDLKENSAYLHGHG -KKVMSALTNAVAHIDDIEGSMSKLSDKHAHELMVDPGNFDILAHHILTTMAMFMPQCLTS -ANHRSVDKFLSTVKHVLTSKYR -> HBA2_VAREX -VLTEDDKNHVKGLWAHVHDHIDEIAADALTRMFLAHPASKTYFAHFDLSPDNAQIKAHG -KKVANALNQAVAHLDDIKGTLSKLSELHAQQLRVDPVNFGFLRHCLEVSIAAHLHDHLKA -SVIVSLDKFLEEVCKDLVSKYR -> HBA2_XENBO -LLTADDKKHIKAILPSIAAHGDKFGGEALYRMFLINPKTKTYFPNFDFHHNSKQISAHG -KKVVDALNEAANHLDNIAGSMSKLSDLHAYDLRVDPGNFPLLAHNILVTVAMYFPQQFDP -HTHKALDKFLASVSSVLTSKYR -> HBA2_XENLA -LLSADDKKHIKAIMPSIAAHGDKFGGEASYRMFLVNPKTKTYFPSFDFHHNSKQITSHG -KKVVDALNEAANHLDNIAGSMSKLSDLHAYDLRVDPGNFPLLAHNLLVVVAMHFPKQFDP -ATHKALDKFLATVSTVLTSKYR -> HBA3_GORGO -VLSPADKTNVKAAWGKVGAHAGDYGAEALERMFLSFPTTKTYFPHFDLSHGSAXVKGHG -KKVAKALTXAVXHLDDMPNALSALSXLHAHKLRVXPVXFKLLNHCLLVTLAAXFPSXFTP -AVHASVDKFLASVSTVLTSKYR -> HBA3_PANTR -VLSPADKTNVKAAWGKVGAHAGXYGAEALERMFLSFPTTKTYFPHFDLSHGSAXVKGHG -KKVAKALSXAVXHLDDMPNALSALSXLHAHKLRVXPVXFKLLNHCLLVTLAAXFPSXFTP -AVHASVDKFLASVSTVLTSKYR -> HBA3_PLEWA -MVLSAEEKALVVGLCGKISGHCDALGGEALDRLFASFGQTRTYFSHFDLSPGSADVKRH -GGKVLSAIGEAAKHIDSMDQALSKLSDLHAYNLRVDPGNFQLLSHCIQAVLAAHFPADFT -PQCQAAWDKFLAAVSAVLTSKYR -> HBA3_RANCA -SLSASEKAAVLSIVGKIGSQGSALGSEALTRLFLSFPQTKTYFPHFDLTPGSADLNTHG -GKIINALAGAANHLDDLAGNLSSLSDLHAYNLRVDPGNFPLLAHIIQVVLATHFPGDFTA -EVQAAWDKFLALVSAVLTSKYR -> HBA3_XENLA -TLTDSDKAAVVALWGKIAPQANAIGAEALERLFLSYPQTKTYFSHFDLSHGSADLANHG -GKVVNALGEAAKHIDDLDAALSTLSDLHAYNLRVDPGNFKLLSHTIQVTLAIHFHKEFDA -ATQAAWDKFLAEVATVLTSKYR -> HBA3_XENTR -TLTDSEKAAVVALWSKIAPQASAIGAEALERLFLSYPQTKTYFSHFDVSHGSADLQNHG -GKVVNALGEAAKHLNDLDAALSTLSDLHAYNLRVDPGNFKLLSHTIQVTLAVHFQKEFDA -ATQAAWDKFLSEVATVLTSKYR -> HBA4_SALIR -SLSAKDKANVKAIWGKILPKSDEIGEQALSRMLVVYPQTKAYFSHWASVAPGSAPVKKH -GITIMNQIDDCVGHMDDLFGFLTKLSELHATKLRVDPTNFKILAHNLIVVIAAYFPAEFT -PEIHLSVDKFLQQLALALAEKYR -> HBA4_XENLA -TLTDSDKAAIVALWGKIAPQASAIGAEALERLFLSYPQTKTYFSHFDVSHGSADLSNHG -GKVVNALGEAAKHIDDLDSALSTLSDLHAYNLRIDPGNFKLLSHTIQVTLAIHFHKEFDA -ATQAAWDKFLAEVATVLTSKYR -> HBA5_XENLA -TFSSAEKAAIASLWGKVSGHTDEIGAEALERLFLSYPQTKTYFSHFDLSHGSKDLRSHG -GKVVKAIGNAATHIDDIPHALSALSDLHAFKLKVDPGNFKLLSHAIQVTLAIHFPAEFNA -DAQAAWDKFLAVVSAVLVSKYR -> HBAD_ACCGE -MLTAEDKKLIQAIWDKVQGHQEDFGAEALQRMFITYPTTKTYFPHFDLSPGSDQVRSHG -KKVVNALGNAVKSMDNLSQALSELSNLHAYNLRVDPVNFKLLSQCFQVVLAVHLGKEYTP -EVHSAFDKFLSAVAAVLAEKYR -> HBAD_AEGMO -MLTADDKKLIQATWDKVQGHQEDFGAEALQRMFITYPPTKTYFPHFDLSPGSDQVRGHG -KKVVNALGNAVKSMDNLSQALSELSNLHAYNLRVDPVNFKLLSQCFQVVLAVHLGKEYTP -EVHAAFDKFLSAVAAVLAEKYR -> HBAD_ANAPL -MLTAEDKKLITQLWEKVAGHQEEFGSEALQRMFLAYPQTKTYFPHFDLHPGSEQVRGHG -KKVAAALGNAVKSLDNLSQALSELSNLHAYNLRVDPVNFKLLAQCFQVVLAAHLGKDYSP -EMHAAFDKFMSAVAAVLAEKYR -> HBAD_ANSAN -MLTADDKKLLAQLWEKVAGHQDEFGNEALQRMFVTYPQTKTYFPHFDLHPGSEQVRSHG -KKVAAALGNAVKSLDNISQALSELSNLHAYNLRVDPANFKLLSQCFQVVLAVHLGKDYTP -EMHAAFDKFLSAVAAVLAEKYR -> HBAD_ANSIN -MLSADDKKIIAQLWEKVAGHQDEFGNEALQRMFVTYPQTKTYFPHFDVHPGSEQVRSHG -KKVAAALGNAVKSLDNISQALSELSNLHAYNLRVDPANFKLLSQCFQVVLAVHLGKDYTP -EMHAAFDKFLSAVAAVLAEKYR -> HBAD_APUAP -MLTAEDKKLIQQVWDKLQGCQEEVGAETLQRMFTTYPQTKTYFPHFDLSPGSDQIRGHG -KKVVAALGTAVKSLDNLSQALSELSNLHAYNLRVDPVNFKLLAQCLQVVLATHMTKDYTP -EIHAAFDKFLSAVAAVLAEKYR -> HBAD_BRACA -MLTADDKKILAQLWEKVAGHQDEFGNEALERMFVTYPQTKTYFPHFDLHPGSEQVRSHG -KKVAAALSNAVKSIDNLSQALSELSNLHAYNLRVDPANFKLLSQCFQVVLAVHLGKDYTP -EMHAAFDKFLSAVAAVLAEKYR -> HBAD_CAIMO -MLTAEDKKLIVQVWEKVAGHQEEFGSEALQRMFLAYPQTKTYFPHFDLHPGSEQVRGHG -KKVAAALGNAVKSLDNLSQALSELSNLHAYNLRVDPVNFKLLAQCFQVVLAAHLGKDYSP -EMHAAFDKFLSAVAAVLAEKYR -> HBAD_CHICK -MLTAEDKKLIQQAWEKAASHQEEFGAEALTRMFTTYPQTKTYFPHFDLSPGSDQVRGHG -KKVLGALGNAVKNVDNLSQAMAELSNLHAYNLRVDPVNFKLLSQCIQVVLAVHMGKDYTP -EVHAAFDKFLSAVSAVLAEKYR -> HBAD_CHLME -MLTADDKKLLTQLWEKVAGHQEEFGSEALQRMFLTYPQTKTYFPHFDLHPGSEQVRGHG -KKVAAALGNAVKSLDNLSQALSELSNLHAYNLRVDPANFKLLAQCFQVVLATHLGKDYSP -EMHAAFDKFLSAVAAVLAEKYR -> HBAD_CHRPI -MLNHDEKQLIKHAWEKVLGHQEDFGAEALERMFAVYPQTKTYFPHFDLHHDSEQIRHHG -KKVVTALGDAVRHMDNLSEALSELSNLHAYNLRVDPVNFKLLSHCFQVVLAVHLADEYTP -QVHVAYDKFLAAVSAVLAEKYR -> HBAD_GYPRU -MLTADDKKLIQTTWDKVQGHQEDFGAEALQRMFITYPQTKTYFPHFDLSPGSDQVRGHG -KKVVNALGNAVKSMDNLSQALSELSNLHAYNLRVDPVNFKLLSQCFQVVLAVHLGKEYTP -EVHSAFDKFLSAVAAVLAEKYR -> HBAD_PASMO -MLTAEDKKLIQQIWGKLGGAEEEIGADALWRMFHSYPSTKTYFPHFDLSQGSDQIRGHG -KKVVAALSNAIKNLDNLSQALSELSNLHAYNLRVDPVNFKFLSQCLQVSLATRLGKEYSP -EVHSAVDKFMSAVASVLAEKYR -> HBAD_PHACA -MLGAEETALVRGVWQKVESAKDEMGEETLTRMFLVYPKTKTYFPHFDLHHGSEQIRNHG -KKVVTALGNAIQNLDNLRQTLADLSNLHAYNLRVDPVNFKLLAQCFQVVLAVHLGQEYTP -EVHVAFDKFLTAVAAVLAEKYR -> HBAD_PHACO -MLNAEDKKLIQQAWEKAASHQQEFGAEALVRMFTAYPQTKTYFPHFDLSPGSDQIRGHG -KKVLGALSNAVKNVDNLSQAMSELSNLHAYNLRVDPVNFKLLSQCIEVVLAVHMGKDYTP -EVHAAFDKFLSAVSAVLAEKYR -> HBAD_PHRHI -MLSADEKQLILHAWEKVHTHQEDFGAEALERMFTVYPQTKTYFHHFDLHHGSEQIRRHG -KKVVVALENAVHHMDNLSAALCKLSDLHAYNLRVDPVNFKLLSHCFHVVLAGHLGEEYSP -QVHVAYDKFLAAVSDVLAEKYR -> HBAD_RHEAM -MLTADDKKLISQIWTKVAEHGGEFGGEALERMFITYPQTKTYFPHFDLHVGSEQVRGHG -KKVVNALSNAVKNLDNLSQALAELSNLHAYNLRVDPVNFKLLSQCFQVVLAVHLGKEYTP -EVHAAYDKFLSAVASVLAEKYR -> HBAD_SPHPU -VLTHEDCELLQQTWEKVLGHQEDFGAEALERMFITYPQTKTYFPHFDLHHGSEQIRNHG -RKVVNALGEAVKNMDHMSTASGELSNLHAYNLRVDPVNFKLLSECFEVVLAVHLKDQYTP -DVHRAYDKFLSAVGDMLAEKYR -> HBAD_STRCA -MLTADDKKLIQQIWEKVGSHLEDFGAEALERMFITYPQTKTYFPHFDLHPGSEQIRGHG -KKVANALGNAVKSLDNLSQALSELSNLHAYNLRVDPVNFKLLSQCFQVVLAVHMGKDYTP -EVHAAYDKFLTAVAAVLAEKYR -> HBAD_STUVU -VLTAEDKKLIQQTWGKLGGAEEEIGAEALWRMFHAYPPTKTYFPHFDLSQGSDQIRGHG -KKVVAALGNAIKNLDNLSQALSELSNLHAYNLRVDPVNFKFLSQCLQVTLATRLGKEYSP -EVHSAVDKFMSAVAAVLAEKYR -> HBAD_TURME -VLTGEDKKHVQHIWGLLSGAEEDLGAEVLYRMFQSYPPTKTYFPHFDVTQGSEQIRGHG -KKFMAALGNAVKNVDNLSQALSELSNLHAYNLRVDPVNFKFLSQCLQVALAARLGKEYSP -EVHSAVDKFMAAVAAVLAEKYR -> HBAM_RANCA -GLSDSEKSAVASLWEKIAPQTNKLGAESMERLFKNHPETKSFFSRFDISPGSQDLLTHG -GKIFGALGEAIKSLDNLQKYQDLHTNKLKLSSDHMKLLSAAIIEVFTAHFGGEVNQAAWN -KFLGEVGAILTSS -> HBAT_HORSE -ALAAADRATVRALWKKMGSNVGVYATEALERMFLGFPSTTTYFLHLDLSLGSTQVKAHG -QKVADALTLAVEHLEDLPRALSALRHRHVRELRVDPASFQLLGHCLLVTPARHFPGDFSP -TLHASLVKFLSHVISALASDCR -> HBAT_HUMAN -ALSAEDRALVRALWKKLGSNVGVYTTEALERTFLAFPATKTYFSHLDLSPGSSQVRAHG -QKVADALSLAVERLDDLPHALSALSHLHACQLRVDPASFQLLGHCLLVTLARHYPGDFSP -ALQASLDKFLSHVISALVSEYR -> HBAT_PAPAN -ALSAEDRALVRALWKKLGSNVGVYATEALERTFLAFPATKTYFSHLDLSPGSAQVRAHG -QKVADALSLAVERLDDLPRALSALSHLHACQLRVDPANFPAPGPLPAGDPRPALPRRLQP -GAAGVAGQVPEPRDLCAGFRVP -> HBAT_PONPY -ALSAEDRALVRALWKKLGSNVGVYTTEALERTFLAFPATKTYFSHLDLSPGSSQVRAHG -QKVADALSLAVERLDDLPHALSALSHLHACQLRVDPASFQLLGHCLLVTLARHYPGDFSP -ALQASLDKFLSHVISALASEYR -> HBAZ_CAPHI -SLTRTERTIILSLWSKISTQADVIGTETLERLFSCYPQAKTYFPHFDLHSGSAQLRAHG -SKVVAAVGDAVKSIDNVTSALSKLSELHAYVLRVDPVNFKFLSHCLLVTLASHFPADFTA -DAHAAWDKFLSIVSGVLTEKYR -> HBAZ_HORSE -SLTKAERTMVVSIWGKISMQADAVGTEALQRLFSSYPQTKTYFPHFDLHEGSPQLRAHG -SKVAAAVGDAVKSIDNVAGALAKLSELHAYILRVDPVNFKFLSHCLLVTLASRLPADFTA -DAHAAWDKFLSIVSSVLTEKYR -> HBAZ_HUMAN -SLTKTERTIIVSMWAKISTQADTIGTETLERLFLSHPQTKTYFPHFDLHPGSAQLRAHG -SKVVAAVGDAVKSIDDIGGALSKLSELHAYILRVDPVNFKLLSHCLLVTLAARFPADFTA -EAHAAWDKFLSVVSSVLTEKYR -> HBAZ_MOUSE -SLMKNERAIIMSMWEKMAAQAEPIGTETLERLFCSYPQTKTYFPHFDLHHGSQQLRAHG -FKIMTAVGDAVKSIDNLSSALTKLSELHAYILRVDPVNFKLLSHCLLVTMAARFPADFTP -EVHEAWDKFMSILSSILTEKYR -> HBAZ_PANTR -SLTKTEGTIIVSMWAKISTQADTIGTETLERLFLSHPQTKTYFPHFDLHPGSAQLRAHG -SKVVAAVGDAVKSIDNIGGALSKLSELHAYILRVDPVNFKLLSHCLLVTLAARFPADFTA -EAHAAWDKFLSVVSSVLTEKYR -> HBAZ_PIG -SLTKAERTIIGSMWTKISSQADTIGTETLERLFASYPQAKTYFPHFDLNPGSAQLRAHG -SKVLAAVGEAVKSIDNVSAALAKLSELHAYVLRVDPVNFKFLSHCLLVTLASHFPADLTA -EAHAAWDKFLTIVSGVLTEKYR -> HBA_ACCGE -VLSANDKTNVKNVFTKIGGHAEEYGAETLERMFTTYPPTKTYFPHFDLHHGSAQIKAHG -KKVVGALIEAVNHIDDIAGALSKLSDLHAQKLRVDPVNFKLLGQCFLVVVAIHHPSVLTP -EVHASLDKFLCAVGNVLTAKYR -> HBA_AEGMO -VLSANDKTNVKTVFTKITGHAEDYGAETLERMFITYPPTKTYFPHFDLHHGSAQIKAHG -KKVVGALIEAVNHIDDIAGALSKLSDLHAQKLRVDPVNFKLLGQCFLVVVAIHHPSVLTP -EVHASLDKFLCAVGNVLTAKYR -> HBA_AILFU -VLSPADKTNVKSTWDKLGGHAGEYGGEALERTFASFPTTKTYFPHFDLSPGSAQVKAHG -KKVADALTLAVGHLDDLPGALSALSDLHAHKLRVDPVNFKLLSHCLLVTLACHHPAEFTP -AVHASLDKFFSAVSTVLTSKYR -> HBA_AILME -VLSPADKTNVKATWDKIGGHAGEYGGEALERTFASFPTTKTYFPHFDLSPGSAQVKAHG -KKVADALTTAVGHLDDLPGALSALSDLHAHKLRVDPVNFKLLSHCLLVTLASHHPAEFTP -AVHASLDKFFSAVSTVLTSKYR -> HBA_ALCAA -VLSATDKSNVKAAWGKVGGNAPAYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKAHG -EKVANALTKAVGHLDDLPGTLSDLSDLHAHKLRVDPVNFKLLSHTLLVTLAAHLPSDFTP -AVHASLDKFLANVSTVLTSKYR -> HBA_ALLMI -VLSMEDKSNVKAIWGKASGHLEEYGAEALERMFCAYPQTKIYFPHFDMSHNSAQIRAHG -KKVFSALHEAVNHIDDLPGALCRLSELHAHSLRVDPVNFKFLAHCVLVVFAIHHPSALSP -EIHASLDKFLCAVSAVLTSKYR -> HBA_AMBME -FKLSGEDKANVKAVWDHVKGHEDAFGHEALGRMFTGIEQTHTYFPDKDLNEGSFALHSH -GKKVMGALSNAVAHIDDLEATLVKLSDKHAHDLMVDPAEFPRLAEDILVVLGFHLPAKFT -YAVQCSIDKFLHVTMRLCISKYR -> HBA_ANAPE -VLSAADKTNVKGVFSKIGGHAEEYGAETLERMFIAYPQTKTYFPHFDLSHGSAQIKAHG -KKVAAALVEAVNHIDDIAGALSKLSDLHAQKLRVDPVNFKFLGHCFLVVVAIHHPAALTP -EVHASLDKFLCAVGAVLTAKYR -> HBA_ANAPL -VLSAADKTNVKGVFSKIGGHAEEYGAETLERMFIAYPQTKTYFPHFDLSHGSAQIKAHG -KKVAAALVEAVNHVDDIAGALSKLSDLHAQKLRVDPVNFKFLGHCFLVVVAIHHPAALTP -EVHASLDKFMCAVGAVLTAKYR -> HBA_ANAPP -VLSAADKTNVKGVFSKIGGHAEEYGAETLERMFIAYPQTKTYFPHFDLSHGSAQIKAHG -KKVAAALVEAVNHIDDIAGALSKLSDLHAQKLRVDPVNFKFLGHCFLVVVAIHHPAALTP -EVHASLDKFMCAVGAVLTAKYR -> HBA_ANSAN -VLSAADKTNVKGVFSKIGGHAEEYGAETLERMFTAYPQTKTYFPHFDLQHGSAQIKAHG -KKVAAALVEAVNHIDDIAGALSKLSDLHAQKLRVDPVNFKFLGHCFLVVVAIHHPSALTP -EVHASLDKFLCAVGTVLTAKYR -> HBA_ANSIN -VLSAADKTNVKGVFSKISGHAEEYGAETLERMFTAYPQTKTYFPHFDLQHGSAQIKAHG -KKVVAALVEAVNHIDDIAGALSKLSDLHAQKLRVDPVNFKFLGHCFLVVVAIHHPSALTA -EVHASLDKFLCAVGTVLTAKYR -> HBA_ANSSE -VLSAADKGNVKTVFGKIGGHAEEYGAETLQRMFQTFPQTKTYFPHFDLQPGSAQIKAHG -KKVAAALVEAANHIDDIAGALSKLSDLHAQKLRVDPVNFKFLGHCFLVVLAIHHPSLLTP -EVHASMDKFLCAVATVLTAKYR -> HBA_ANTPA -VLSPADKTNVKAAWDKVGGHAGDYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHG -KKVGDALGNAVAHMDDLPGALSALSDLHAYKLRVDPVNFKLLSHCLLVTLACHHPGDFTP -AVHASLDKFLASVSTVLVSKYR -> HBA_APTFO -VLSADDKSNVKSIFSKLHTHACEYGAEPLERMFXTYPTTKTYFPHFDLSHGSAXVKAHG -KKVAXXIGKAIAXLXXIAGALSKLSXLHAXKLRVXPVXFKLLSHGLXVAXAKXLVRXFTP -GVTASLXKIHKSVSAAHQAKYR -> HBA_APUAP -VLSAADKTNVKGVFAKIGGQAEALGGEALARMFAAYPPTKTYFPHFDLSPGSAQVKAHG -KKVASALVEAANNIDDIAGALSKLSDLHAQKLRVDPVNFKLLGHCFLVVVAIHHPSVLTP -EVHASLDKFLCAVATVLTAKYR -> HBA_AQUCH -VLSANDKTNVKNVFTKISGHAEDYGAEALERMFTTYPPTKTYFPHFDLHHGSAQIKAHG -KKVVGALIEAVNHIDDMAGALSKLSDLHAQKLRVDPVNFKLLGQCFLVVVAIHHPSVLTP -EVHASLDKFLCAVGNVLTAKYR -> HBA_ARAAR -VLSGSDKTNVKGIFSKIGGQAEDYGAEALERMFATFPQTKTYFPHFDVSPGSAQVKAHG -KKVAAALVEAANHIDDIATALSKLSDLHAQKLRVDPVNFKLLGQCFLVVVAIHNPSALTP -EVHASLDKFLCAVGNVLTAKYR -> HBA_ATEGE -VLSPADKSNVKAAWGKVGGHAGDYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHG -KKVADALTNAVAHVDDMPNALSALSDLHAHKLRVDPVNFKLLSHCLLVTLAAHHPADFTP -AVHASLDKFLASVSTVLTSKYR -> HBA_BALAC -VLSPTDKSNVKATWAKIGNHGAEYGAEALERMFMNFPSTKTYFPHFDLGHDSAQVKGHG -KKVADALTKAVGHMDNLLDALSDLSDLHAHKLRVDPANFKLLSHCLLVTLALHLPAEFTP -SVHASLDKFLASVSTVLTSKYR -> HBA_BISBO -VLSAADKGNVKAAWGKVGGHAAEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHG -AKVAAALTKAVGHLDDLPGALSELSDLHAHKLRVDPVNFKLLSHSLLVTLASHLPNDFTP -AVHASLDKFLANVSTVLTSKYR -> HBA_BOSGA -VLSAADKGNVKAAWGKVGDHAAEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHG -AKVAAALTKAVGHLDDLPGALSELSDLHAHKLRVDPVNFKLLSHSLLVTLASHLPNDFTP -AVHASLDKFLANVSTVLTSKYR -> HBA_BOVIN -VLSAADKGNVKAAWGKVGGHAAEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHG -AKVAAALTKAVEHLDDLPGALSELSDLHAHKLRVDPVNFKLLSHSLLVTLASHLPSDFTP -AVHASLDKFLANVSTVLTSKYR -> HBA_BRACA -VLSAADKTNVKGVFSKIGGHADEYGAETLERMFVAYPQTKTYFPHFDLQHGSAQIKAHG -KKVAAALVEAVNHIDDIAGALSKLSDLHAQKLRVDPVNFKFLGHCFLVVVAIHHPSALTP -EVHASLDKFLCAVGTVLTAKYR -> HBA_BRATR -VLSAADKAHVKAFWTKIGGHAGEYGGEALERTFLSFPTTKTYFPHFDLSPGSAQVKAHG -KKVGDALTLAVGHLDDLPGALSDLSDLHAHKLRVDPVNFKLLGHCVLVTLALHHPDAFTP -AVHASLDKFITTVSTVLTSKYR -> HBA_CAICR -VLSEEDKSHVKAIWGKVAGHLEEYGAEALERMFCAYPQTKIYFPHFDMSHNSAQIRGHG -KKVFAALHDAVNHIDDLAGALCRLSDLHAHNLRVDPVNFKFLSQCILVVFGVHHPCSLTP -EVHASLDKFLCAVSAMLTSKYR -> HBA_CAIMO -VLSAADKTNVKGVFSKIGGHAEEYGAETLERMFIAYPQTKTYFPHFDLQHGSAQIKAHG -KKVAAALVEAVNHIDDIAGALSKLSDLHAQKLRVDPVNFKFLGHCFLVVVAIHHPAALTP -EVHASLDKFMCAVGAVLTAKYR -> HBA_CALAR -VLSPADKSNVKAAWGKVGSHAGDYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHG -KKVADALTNAVAHVDDMPNALSALSDLHAHKLRVDPVNFKLLSHCLLVTLAAHHPAEFTP -AVHASLDKFLASVSTVLTSKYR -> HBA_CAMDR -VLSSKDKTNVKTAFGKIGGHAAEYGAEALERMFLGFPTTKTYFPHFDLSHGSAQVKAHG -KKVGDALTKAADHLDDLPSALSALSDLHAHKLRVDPVNFKLLSHCLLVTVAAHHPGDFTP -SVHASLDKFLANVSTVLTSKYR -> HBA_CANFA -VLSPADKTNIKSTWDKIGGHAGDYGGEALDRTFQSFPTTKTYFPHFDLSPGSAQVKAHG -KKVADALTTAVAHLDDLPGALSALSDLHAYKLRVDPVNFKLLSHCLLVTLACHHPTEFTP -AVHASLDKFFAAVSTVLTSKYR -> HBA_CAPHI -VLSAADKSNVKAAWGKVGGNAGAYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHG -EKVAAALTKAVGHLDDLPGTLSDLSDLHAHKLRVDPVNFKLLSHSLLVTLACHLPNDFTP -AVHASLDKFLANVSTVLTSKYR -> HBA_CARAU -SLSDKDKAVVKALWAKIGSRADEIGAEALGRMLTVYPQTKTYFSHWSDLSPGSGPVKKH -GKTIMGAVGDAVSKIDDLVGALSALSELHAFKLRIDPANFKILAHNVIVVIGMLFPGDFT -PEVHMSVDKFFQNLALALSEKYR -> HBA_CATCL -SLSDKDKADVKIAWAKISPRADEIGAEALGRMLTVYPQTKTYFAHWADLSPGSGPVKHG -KKViMGAIGDAVTKFDDLLGGLASLSELHASKLRVDPSNFKILANCITVVIMFYLPGDFP -PEVHASVDKFFQNLALALGQKYR -> HBA_CAVPO -VLSAADKNNVKTTWDKIGGHAAEYVAEGLTRMFTSFPTTKTYFHHIDVSPGSGDIKAHG -KKVADALTTAVGHLDDLPTALSTLSDVHAHKLRVDPVNFKFLNHCLLVTLAAHLGADFTP -SIHASLDKFFASVSTVLTSKYR -> HBA_CEBAP -VLSPADKTNVKTAWGKVGGHAGDYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHG -KKVADALSNAVAHVDDMPNALSALSDLHAHKLRVDPVNFKLLSHCLLVTLAAHHPADFTP -AVHASLDKFLASVSTVLTSKYR -> HBA_CEBCA -VLSPADKTNVKTAWGKVGAHAGDYGADALERMFLSFPTTKTYFPHFDLSHGSAQVKGHG -KKVADALSNAVAHVDDMPNALSALSDLHAHKLRVDPVNFKLLSHCLLVTLAAHHPADFTP -AVHASLDKFLASVSTVLTSKYR -> HBA_CERAE -VLSPADKSNVKAAWGKVGGHAGEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHG -KKVADALTLAVGHVDDMPHALSALSDLHAHKLRVDPVNFKLLSHCLLVTLAAHLPAEFTP -AVHASLDKFLASVSTVLTSKYR -> HBA_CERSI -VLSPTDKTNVKTAWGHVGAQAGEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKAHG -KKVGDALTQAVGHLDDLPGALSALSDLHAYKLRVDPVNFKLLSHCLLVTLALHHPQDFTP -AVHASLDKFLSNVSTVLTSKYR -> HBA_CERTO -VLSPDDKKHVKAAWGKVGEHAGEYGAEALERMFLSFPTTKTYFPHFNLSHGSDQVKGHG -KKVADALTLAVGHVDDMPHALSKLSDLHAHKLRVDPVNFKLLSHCLLVTLAAHLPAEFTP -AVHASLDKFLASVSTVLTSKYR -> HBA_CHICK -VLSAADKNNVKGIFTKIAGHAEEYGAETLERMFTTYPPTKTYFPHFDLSHGSAQIKGHG -KKVVAALIEAANHIDDIAGTLSKLSDLHAHKLRVDPVNFKLLGQCFLVVVAIHHPAALTP -EVHASLDKFLCAVGTVLTAKYR -> HBA_CHLME -VLSAADKANVKGVFSKIGGHADDYGAETLERMFIAYPQTKTYFPHFDLHHGSAQIKAHG -KKVAAALVEAVNHIDDITGALSKLSDLHAQKLRVDPVNFKFLGHCFLVVVAIHHPAALTP -EVHASLDKFMCAVGAVLTAKYR -> HBA_CHRPI -VLNAGDKANVKAVWNKVAAHVEEYGAETLERMFTVYPQTKTYFPHFDLHHGSAQIRTHG -KKVLTALGEAVNHIDDLASALSKLSDIHAQTLRVDPVNFKFLNHCFLVVVAIHQPSVLTP -EVHVSLDKFLSAVGTVLTSKYR -> HBA_CICCI -VLSANDKSNVRGVFGKISAHADDYGAETLERMFTVHPTQKTYFPHFDLHRGSAQIKAHG -KKVAGALLEAVNHIDDIAGALSKLSDLHAQKLRVDPVNFKLLGQCFLVVVAVHHPSLLTP -EVHASLDKFLCTVSTVLTDKYR -> HBA_COLBA -VLSPADKTNVKTAWGKVGGHGGEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHG -KKVADALTLAAAHVDDMPSALSALSDLHAHKLRVDPVNFKLLSHCLLVTLAAHHPAEFTP -AVHASLDKFLASVSTVLTSKYR -> HBA_COLLI -VLSANDKSNVKAVFAKIGGQAGDLGGEALERLFITYPQTKTYFPHFDLSHGSAQIKGHG -KKVAEALVEAANHIDDIAGALSKLSDLHAQKLRVDPVNFKLLGHCFLVVVAVHFPSLLTP -EVHASLDKFVLAVGTVLTAKYR -> HBA_COTJA -VLSAADKTNVKGIFAKIAGHAEEYGAEALDRMFTTYPQTKTYFPHFDVSHGSAQIKGHG -KKVAAALVEAANHIDDIAGTLSKLSDLHAQKLRVDPVNFKLLGQCFLVVVAIHHPAALTP -EVHASLDKFLCAVGTVLTAKYR -> HBA_CRIGA -VLSADDKANIKATWEKIGGHGAEYGAEALERMFASFPTTKTYFPHFDVSHGSAQVKSHG -KKVADALANAAHHLDDLPGALSALSDLHAHKLRVDPVNFKLLGHCLLVTLATHLQAGLTP -AAHASLDKFLASVSTVLTSKYR -> HBA_CROCR -VLSSADKANIKATWDKIGGHGGEYGAEALERTFLCFPTTKTYFPHFDLSHGSAQVKAHG -KKVADALALAAAHLDDLPSALSALSDLHAYKLRVDPVNFKLLSHCLLVTLAAHHPAEFTP -AVHSDLDKFLSSVSTVLTSKYR -> HBA_CRONI -VLSSDDKCNVKAVWSKVAGHLEEYGAEALERMFCAYPQTKIYFPHFDLSHGSAQIRAHG -KKVFAALHEAVNHIDDLPGALCRLSELHAHSLRVDPVNFKFLAQCVLVVVAIHHPGSLTP -EVHASLDKFLCAVSSVLTSKYR -> HBA_CTEGU -VLSAADKTNVKAAWDKIGGHGGEYGAEALERMFLSFPTTKTYFPHFDVSHGSAQVKAHG -KKVADALANAASHLDDLPNALSALSDLHAHKLRVDPVNFKLLSHCLLVTLACHHPAEFTP -AVHASLDKFLATVATVLTSKYR -> HBA_CYGMA -SLSDKDKAAVKALWTTISKSSDAIGNDALSRMIVVYPQTKTYFSHWPDVTPGSTHIRDH -GKKVMGGISLAVSKIDDLKTGLFELSEQHAFKLRVDPANFKILNHCILVVIATMFPKEFT -PEAHVSLDKFLSGVALALAERYR -> HBA_CYGOL -VLSAADKTNVKGVFSKIGGHADDYGAETLERMFIAYPQTKTYFPHFDLQHGSAQIKAHG -KKVAAALVEAVNHIDDIAGALSKLSDLHAQKLRVDPVNFKFLGHCFLVVVAIHHPSALTP -EVHASLDKFLCAVGAVLTAKYR -> HBA_CYNSP -VLSPADKTNVKAAWDKVGGNAGEYGAEALERMFLSFPTTKTYFPHFDLAHGSPQVKGHG -KKVGDALTNAVSHIDDLPGALSALSDLHAYKLRVDPVNFKLLSHCLLVTLANHLPSDFTP -AVHASLDKFLASVSTVLTSKYR -> HBA_CYPCA -SLSDKDKAAVKGLWAKISPKADDIGAEALGRMLTVYPQTKTYFAHWADLSPGSGPVKKH -GKVIMGAVGDAVSKIDDLVGGLAALSELHAFKLRVDPANFKILAHNVIVVIGMLYPGDFP -PEVHMSVDKFFQNLALALSEKYR -> HBA_DASNO -VLSAADKTHVKAFWGKVGGHAAEFGAEALERMFASFPPTKTYFSHMDLSHGSAQVKAHG -KKVADALTLAVGHLDDLPGALSTLSDLHAHKLRVDPVNFKFLSHCLLVTLACHLPDDFTP -AVHASMDKFMAGVSTVLVSKYR -> HBA_DASVI -VLSDADKTHVKAIWGKVGGHAGAYAAEALARTFLSFPTTKTYFPHFDLSPGSAQIQGHG -KKVADALSQAVAHLDDLPGTLSKLSDLHAHKLRVDPVNFKLLSHCLIVTLAAHLSKDLTP -EVHASMDKFFASVATVLTSKYR -> HBA_DIDMA -VLSANDKTNVKGAWSKVGGNSGAYMGEALYRTFLSFPTTKTYFPNYDFSAGSAQIKTQG -QKIADAVGLAVAHLDDMPTALSSLSDLHAHELKVDPVNFKFLCHNVLVTMAAHLGKDFTP -EIHASMDKFLASVSTVLTSKYR -> HBA_ECHTE -VLSAADKANVKAVWEKAGGNVGKYGGEALDRTFLSFPTTKTYFPHMDLTPGSADIMAHG -KKVADALTLAVGHMDDLPGALSKLSDLHAYKLRVDPVNFKLLSHCLLVTLACHLGGDFTP -AAHASLDKFLSSVSTVLTSKYR -> HBA_ELEEL -SLTAKSKSIVKAFWGKIGSRADDIGAEAFGRMLTVYPETKTYFASWSDLSPGSAAVKKH -GKTIMGGIAEAVGHIDDLTGGLASLSELHAFKLRVDPANFKILAHNLIVVLALFFPADFT -PEVHMAVDKFFQNVASALSEKYR -> HBA_ELEMA -VLSDKDKTNVKATWSKVGDHASDYVAEALERMFFSFPTTKTYFPHFDLSHGSGQVKGHG -KKVGEALTQAVGHLDDLPSALSALSDLHAHKLRVDPVNFKLLSHCLLVTLSSHQPTEFTP -EVHASLDKFLSNVSTVLTSKYR -> HBA_EQUAS -VLSAADKTNVKAAWSKVGGNAGEFGAEALERMFLGFPTTKTYFPHFDLSHGSAQVKAHG -KKVGDALTLAVGHLDDLPGALSNLSDLHAHKLRVDPVNFKLLSHCLLSTLAVHLPNDFTP -AVHASLDKFLSSVSTVLTSKYR -> HBA_EQUHE -VLSAADKTNVKAAWSKVGGHAGDFGAEALERMFLGFPTTKTYFPHFDLSHGSAQVKAHG -KKVGDALTLAVGHLDDLPGALSNLSDLHAHKLRVDPVNFKLLSHCLLSTLAVHLPNDFTP -AVHASLDKFLSTVSTVLTSKTR -> HBA_EQUZE -VLSAADKTNVKAAWSKVGGNAGEFGAEALERMFLGFPTTKTYFPHFDLSHGSAQVKAHG -KKVGDALTLAVGHLDDLPGALSNLSDLHAHKLRVDPVNFKLLSHCLLSTLAVHLPNDFTP -AVHASLDKFLSTVSTVLTSKYR -> HBA_ERIEU -VLSATDKANVKTFWGKLGGHGGEYGGEALDRMFQAHPTTKTYFPHFDLNPGSAQVKGHG -KKVADALTTAVNNLDDVPGALSALSDLHAHKLRVDPVNFKLLSHCLLVTLALHHPADFTP -AVHASLDKFLATVATVLTSKYR -> HBA_EUDCR -VLSANDKSNVKGVFSKISSHAEEYGAETLERMFTTYPQTKTYFPHFDLHHGSAQVKAHG -KKVATALMEAANHIDDIAGALSKLSDLHAQKLRVDPVNFKLLGQCFLVVMAIHHPSALTP -EVHASLDKFLCAVGNVLTSKYR -> HBA_EUDSC -VLSAADKTNVKGIFAKIGGHGDDYGAETLDRMFTVYPQTKTYFPHFDVSHGSAQIKAHG -KKVVAALVEAVNHIDDIAGALSKLSDLHAHKLRVDPANFKLLGQCFLVVVGIHHASALTP -EVHASLDKFLCAVSTVLTAKYR -> HBA_FELCA -VLSAADKSNVKACWGKIGSHAGEYGAEALERTFCSFPTTKTYFPHFDLSHGSAQVKAHG -QKVADALTQAVAHMDDLPTAMSALSDLHAYKLRVDPVNFKFLSHCLLVTLACHHPAEFTP -AVHASLDKFFSAVSTVLTSKYR -> HBA_FRAPO -VLSAADKNNVKGIFGKISSHAEDYGAEALERMFITYPSTKTYFPHFDLSHGSAQVKGHG -KKVVAALIEAANHIDDIAGTLSKLSDLHAHKLRVDPVNFKLLGQCFLVVVAIHHPSALTP -EVHASLDKFLCAVGNVLTAKYR -> HBA_GORGO -VLSPADKTNVKAAWGKVGAHAGDYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHG -KKVADALTNAVAHVDDMPNALSALSDLHAHKLRVDPVNFKLLSHCLLVTLAAHLPAEFTP -AVHASLDKFLASVSTVLTSKYR -> HBA_GYPRU -VLSANDKTNVKNVFTKITGHAEDYGAETLERMFTTYPPTKTYFPHFDLHHGSAQIKAHG -KKVVGALIEAVNHIDDIAGALSKLSDLHAQKLRVDPVNFKLLGQCFLVVVAIHHPSVLTP -EVHASLDKFLCAVGNVLTAKYR -> HBA_HETPO -STSTSTSDYSAADRAELAALSKVLAQNAEAFGAEALARMFTVYAATKSYFKDYKDFTAA -APSIKAHGAKVVTALAKACDHLDDLKTHLHKLATFHGSELKVDPANFQYLSYCLEVALAV -HLTEFSPETHCALDKFLTNVCHELSSRYR -> HBA_HIPAM -VLSANDKSNVKAAWGKVGNHAPEYGAEALERMFLSFPTTKTYFPHFDLSHGSSQVKAHG -KKVADALTKAVGHLDDLPGALSDLSDLHAHKLRVDPVNFKLLSHCLLVTLAAHHPSDFTP -AAHASLDKFLANVSTVLTSKYR -> HBA_HORSE -VLSAADKTNVKAAWSKVGGHAGEYGAEALERMFLGFPTTKTYFPHFDLSHGSAQVKAHG -KKVGDALTLAVGHLDDLPGALSNLSDLHAHKLRVDPVNFKLLSHCLLSTLAVHLPNDFTP -AVHASLDKFLSSVSTVLTSKYR -> HBA_HUMAN -VLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHG -KKVADALTNAVAHVDDMPNALSALSDLHAHKLRVDPVNFKLLSHCLLVTLAAHLPAEFTP -AVHASLDKFLASVSTVLTSKYR -> HBA_LAMGL -VLSSKDKANIKTAFGKIGGHAADYGAEALERMFLGFPTTKTYFPHFDLSHGSAQVKAHG -KKVGDALTKAADHLDDLPSALSALSDLHAHKLRVDPVNFKLLSHCLLVTVAAHHPGDFTP -AVDASLDKFLANVSTVLTSKYR -> HBA_LAMPA -VLSSKDKANIKTAFGKIGGHAADYGAEALERMFLGFPTTKTYFPHFDLSHGSAQVKAHG -KKVGDALTKAADHLDDLPSALSALSDLHAHKLRVDPVNFKLLSHCLLVTVAAHHPGDFTP -AVHASLDKFLANVSTVLTSKYR -> HBA_LAMVI -VLSSKDKANVKTAFGKIGGHAADYGAEALERMFLGFPTTKTYFPHFDLSHGSAQVKAHG -KKVGDALTKAADHLDDLPSALSALSDLHAHKLRVDPVNFKLLSHCLLVTVAAHHPGDFTP -AVHASLDKFLTNVSTVLTSKYR -> HBA_LARRI -VLSGSDKTNVKGVFGKIGGHAEEYGAETLERMFATYPQTKTYFPHFDLQHGSAQVKAHG -KKVAAALVEAANHIDDIAGALSKLSDLHAQKLRVDPVNFKLLGQCFLVVVAIHHPSVLTP -EVHASLDKFLCAVGNVLTAKYR -> HBA_LATCH -GLTAADKTLIKSIWGKVEKETEAIGVEALVRLFKCFPQSKVYFDHFTDLSPSSQKLHAH -AKVVLGALTKAVNHLDNITDTLHDISLVHAKKLLVDPVNFELLGHCLEVALAAHFATDFT -PEVHLAIDKFLYEVEKALFETYR -> HBA_LEMFU -VLSPADKTNVKTAWNAVGGQAGEHGAEALERMFLSFPTTKTYFPHFDLSHGSGQVKAHG -KKVADALTNAVSHLDDMPGALSALSDLHAHKLRVDPVNFKLLSHCLLVTLASHHPAEFTP -AVHASLDKFFAAVSTVLTSKYR -> HBA_LEPPA -MRFSQDDEVLIKEAWGLLHQIPNAGGEALARMFSCYPGTKSYFPHFGhDFSANNEKVKH -HGKKVVDAIGQGVQHLHDLSSCLHTLSEKHARELMVDPCNFQYLIEAIMTTIAAHYGEKF -TPEINCAAEKCLGQIVHVLISLYR -> HBA_LEPWE -VLSPADKTNVKTTWDKIGGHAGEYGGEALERTFMAFPTTKTYFPHFDLSPGSAQVKTHG -KKVADALTTAVSHIDDLPGALSALSDLHAYKLRVDPVNFKLLSHCLLVTLACHHPADFTP -AVHASLDKFFSAVSTVLTSKYR -> HBA_LIOMI -VLTAEDRRLLQASVGKLGCRLEDIGADALNRLLITFPQSKTYFSHFNLSPGSKDIIHQG -EKVGKALDSALKHLDDIRGTLSQLSDLHAYNLRVDPVNFQLLSKCIHVSLATHLRNEYSA -SVTLAWDKFLELVADVLSEKYR -> HBA_LORTA -VLSPADKTNVKTAWEKVGGHAGEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKAHG -KKVADALTTAVSHVDDMPSALSALSDLHAHKLRVDPVNFKLLSHCLLVTLACHHPADFTP -AVHASLDKFLASVSTVLTSKYR -> HBA_LOXAF -VLSDNDKTNVKATWSKVGDHASDYVAEALERMFFSFPTTKTYFPHFDLGHGSGQVKAHG -KKVGEALTQAVGHLDDLPSALSALSDLHAHKLRVDPVNFKLLSHCLLVTLSSHQPTEFTP -EVHASLDKFLSNVSTVLTSKYR -> HBA_LUTLU -VLSPADKTNVKSTWDKIGGHAGEYGGEALERTFVSFPTTKTYFPHFDLSHGSAQVKAHG -KKVADALTNAVAHMDDLPGALSALSDLHAYKLRVDPVNFKLLSHCLLVTLACHHPAEFTP -AVHASLDKFFSAVSTVLTSKYR -> HBA_MACAS -VLSPADKTNVKAAWGKVGGHAGEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHG -KKVADALTLAVGHVDDMPHALSALSDLHAHKLRVDPVNFKLLSHCLLVTLAAHLPAEFTP -AVHASLDKFLASVSTVLTSKYR -> HBA_MACCA -VLSAADKGNVKAAWDKVGGQAGEYGAEALERMFLSFPTTKTYFPHFDLAHGSAQVKGHG -KKVADALTNAVGHMDDLPGALSALSDLHAYKLRVDPVNFKLLSHCLLVTLASHHPAEFTP -AIHASLDKFFASVSTVLTSKYR -> HBA_MACFA -VLSPADKTNVKAAWGKVGGHAGEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHG -KKVADALTLAVGHVDDMPQALSALSDLHAHKLRVDPVNFKLLSHCLLVTLAAHLPAEFTP -AVHASLDKFLASVSTVLTSKYR -> HBA_MACGG -VLSPADKANVKAAWDKVGGQAGDYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKAHG -KKVGDALSNAAGHLDDLPGALSALSDLHAYKLRVDPVNFKLLSHCLLVTLASHHAAEFTP -AVHASLDKFLASVGTVLTSKYR -> HBA_MACGI -VLSAADKGHVKAIWGKVGGHAGEYAAEGLERTFHSFPTTKTYFPHFDLSHGSAQIQAHG -KKIADALGQAVEHIDDLPGTLSKLSDLHAHKLRVDPVNFKLLSHCLLVTFAAHLGDAFTP -EVHASLDKFLAAVSTVLTSKYR -> HBA_MACMU -VLSPADKSNVKAAWGKVGGHAGEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHG -KKVADALTLAVGHVDDMPNALSALSDLHAHKLRVDPVNFKLLSHCLLVTLAAHLPAEFTP -AVHASLDKFLASVSTVLTSKYR -> HBA_MACNE -VLSPADKTNVKAAWGKVGGHAGEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHG -KKVADALTLAVDHVDDMPQALSALSDLHAHKLRVDPVNFKLLSHCLLVTLAAHLPAEFTP -AVHASLDKFLASVGTVLTSKYR -> HBA_MACSI -VLSPADKTNVKDAWGKVGGHAGEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHG -KKVADALTLAVGHVDDMPQALSALSDLHAHKLRVDPVNFKLLSHCLLVTLAAHLPAEFTP -AVHASLDKFLASVSTVLTSKYR -> HBA_MACSP -VLSPADKTNVKAAWDKVGGHAGEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHG -KKVADALTLAVGHVDDMPHALSALSDLHAHKLRVDPVNFKLLSHCLLVTLAAHLPAEFTP -AVHASLDKFLASVSTVLTSKYR -> HBA_MANSP -VLSPADKKNVKAAWDKVGGHAGEYGAEALERMFLSFPTTKTYFPHFNLSHGSDQVKGHG -KKVADALTLAVGHVDDMPQALSKLSDLHAHKLRVDPVNFKLLSHCLLVTLAAHLPAEFTP -AVHASLDKFLASVSTVLTSKYR -> HBA_MARFO -VLSPADKTNVKSTWDKIGGHAGEYGGEALERTFVSFPTTKTYFPHFDLSPGSAQVKAHG -KKVADALTLAVGHLDDLAGALSALSDLHAHKLRVDPVNFKLLSHCLLVTLACHHPAEFTP -AVHASLDKFFSTVSTVLTSKYR -> HBA_MARMA -VLSPADKTNVKAAWEKIGGHGAAYGAEALERMFLSFPTTKTYFPHFDLSHGSAQIQGHG -KKVADALANAAAHVDDLPSALSALSDLHAHKLRVDPVNFKLLSHCLLVTLAAHHPAEFTP -AVHASLDKFLASVSTVLTSKYR -> HBA_MEGLY -VLSAADKANVKAAFDKVGGQAGDYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKAHG -KKVGDALVNAVGHLDDLPGALSALSDLHAYKLRVDPVNFKLASNVLLVTLAVHVAAGFTP -AVHASLDKFLASVGTVLTSKYR -> HBA_MELCA -VLSPSDKANVKATWDKIGGHAGEYGGEALERTFASFPTTKTYFPHFDLSPGSAQVKAHG -KKVADALTNAVAHGDDLPMALSTLSDLHAYKLRVDPVNFKLLSHCLLVTLACHHPAEFTP -AVHASLDKFFSTVSTVLTSKYR -> HBA_MELME -VLSPADKANIKATWDKIGGHAGEYGGEALERTFASFPTTKTYFPHFDLSHGSAQVKGHG -KKVADALTNAVAHLDDLPGALSALSDLHAYKLRVDPVNFKLLSHCLLVTLACHHPAEFTP -AVHASLDKFLSSVSTVLTSKYR -> HBA_MESAU -VLSAKDKTNISEAWGKIGGHAGEYGAEALERMFFVYPTTKTYFPHFDVSHGSAQVKGHG -KKVADALTNAVGHLDDLPGALSALSDLHAHKLRVDPVNFKLLSHCLLVTLANHHPADFTP -AVHASLDKFFASVSTVLTSKYR -> HBA_MOUSE -VLSGEDKSNIKAAWGKIGGHGAEYGAEALERMFASFPTTKTYFPHFDVSHGSAQVKGHG -KKVADALASAAGHLDDLPGALSALSDLHAHKLRVDPVNFKLLSHCLLVTLASHHPADFTP -AVHASLDKFLASVSTVLTSKYR -> HBA_MUSLU -VLSPADKTNVKSTWDKIGGHAGEYGGEALERTFASFPTTKTYFPHFDLSHGSAQVKAHG -KKVADALTNAVAHMDDLPGAMSALSDLHAYKLRVDPVNFKLLSHCLLVTLACHHPAEFTP -AVHASLDKFFSAVSTVLTSKYR -> HBA_MUSPF -VLSPADKTNVKSTWDKIGGHAGEYGGEALERTFASFPTTKTYFPHFDLSHGSAQVKAHG -KKVADALTNAVAHVDDLPGALSALSDLHAYKLRVDPVNFKLLSHCLLVTLACHHPAEFTP -AVHASLDKFFSAVSTVLTSKYR -> HBA_MUSPU -VLSPADKTNVKSTWDKIGGHAGEYGGEALERTFASFPTTKTYFPHFDLSHGSAQVKAHG -KKVADALTNAVAHMDDLPGALSALSDLHAYKLRVDPVNFKLLSHCLLVTLACHHPAEFTP -AVHASLDKFFSAVSTVLTSKYR -> HBA_MYOVE -VLSPADKTNIKAAWDKVGAHAGDYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHG -KKVGDALGNAVAHMDDLPGALSALSDLHAYKLRVDPVNFKLLSHCLLVTLACHLPGEFTP -AIHASLDKFLASVSTVLVSKYR -> HBA_NASNA -VLSPADKTNIKSTWEKIGSHASEYGGEALERTFASFPTTKTYFPHFDLSPGSAQVKAHG -KKVAEALTNAVAHLDDLPGALSTLSDLHAYKLRVDPVNFKFLSHCLLVTLASHHPAEFTP -AVHASLDKFFSSVSTVLTSKYR -> HBA_NYCCO -VLSPADKTNVKAAWEKVGSHAGDYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKAHG -KKVADALTNAVSHVDDMPSALSALSDLHAHKLRVDPVNFKLLSHCLLVTLACHHPADFTP -AVHASLDKFLASVSTVLTSKYR -> HBA_ODORO -VLSPADKTNVKTTWDKLGGHAGEYGGEALERTFMSFPTTKTYFPHFDLSPGSAQVKAHG -KKVADALTTAVAHIDDLPGALSALSDLHAYKLRVDPVNFKLLSHCLLVTLACHHPAEFTP -AVHASLDKFFSTVSTVLTSKYR -> HBA_ODOVI -VLSAAXKSXVKAAWGKVGGNAAPYGAXALXRMFLSFPTTKTYFPHFXLSHGSAXVKAHG -XKVAXALTKAVGHLXXLPGTLSXLSXLHAHKLRVXPVXFKLLSHSLLVTLATHLPXXFTP -AVHASLXKFLAXVSTVLTSKYR -> HBA_ONDZI -VLSGEDKNNIKTAWGKIGGHAAEYGAEALERMFVVYPTTKTYFPHFDVSHGSGQVKAHG -KKVADALTTAVGHLDDLPGALSALSDLHAHKLRVDPVNFKLLSHCLLVTLANHIPADFTP -AVHASLDKFLASVSTVLTSKYR -> HBA_ORNAN -MLTDAEKKEVTALWGKAAGHGEEYGAEALERLFQAFPTTKTYFSHFDLSHGSAQIKAHG -KKVADALSTAAGHFDDMDSALSALSDLHAHKLRVDPVNFKLLAHCILVVLARHCPGEFTP -SAHAAMDKFLSKVATVLTSKYR -> HBA_PAGBE -SLSDKDKAAVRALWSKIGKSADAIGNDALSRMIVVYPQTKTYFSHWPDVTPGSPHIKAH -GKKVMGGIALAVSKIDDLKTGLMELSEQHAYKLRVDPANFKILNHCILVVISTMFPKEFT -PEAHVSLDKFLSGVALALAERYR -> HBA_PAGLA -VLSSADKNNIKATWDKIGSHAGEYGAEALERTFISFPTTKTYFPHFDLSHGSAQVKAHG -KKVADALTLAVGHLEDLPNALSALSDLHAYKLRVDPVNFKLLSHCLLVTLACHHPAEFTP -AVHSALDKFFSAVSTVLTSKYR -> HBA_PANLE -VLSSADKNNVKACWGKIGSHAGEYGAEALERTFCSFPTTKTYFPHFDLSHGSAQVQAHG -QKVADALTKAVVHINDLPNALSDLSDLHAYKLRVDPVNFKFLSHCLLVTLACHHPEEFTP -AVHASLDKFFSAVSTVLTSKYR -> HBA_PANPO -VLSSADKNNVKACWGKIGSHAGEYGAEALERTFCSFPTTKTYFPHFDLSHGSAQVQAHG -QKVADALTKAVAHINDLPNALSDLSDLHAYKLRVDPVNFKFLSHCLLVTLACHHPEEFTP -AVHASLDKFFSAVSTVLTSKYR -> HBA_PANPS -VLSSADKNNVKACWGKIGSHAGEYGAEALERTFCSFPTTKTYFPHFDLSHGSAQVQTHG -QKVADALTKAVAHINDLPNALSDLSDLHAYKLRVDPVNFKFLSHCLLVTLACHHPEEFTP -AVHASLDKFFSAVSTVLTSKYR -> HBA_PANTS -VLSSADKNNVKACWGKIGSHAGEYGAEALERTFCSFPTTKTYFPHFDLSHGSAQVQTHG -QKVADALTKAVAHINNLPNALSDLSDLHAYKLRVDPVNFKFLSHCLLVTLACHHPEEFTP -AVHASLDKFFSAVSTVLTSKYR -> HBA_PAPCY -VLSPDDKKHVKAAWGKVGEHAGEYGAEALERMFLSFPTTKTYFPHFDLSHGSDQVNKHG -KKVADALTLAVGHVDDMPQALSKLSDLHAHKLRVDPVNFKLLSHCLLVTLAAHLPAEFTP -AVHASLDKFLASVSTVLTSKYR -> HBA_PASMO -VLSPADKSNVKGVFAKIGGQAEEYGADALERMFATYPQTKTYFPHFDLGKGSAQVKGHG -KKVAAALVEAVNNIDDLAGALSKLSDLHAQKLRVDPVNFKLLGQCFLVVVATGNPALLTP -EVHAPLDKFLCAVGTVLTAKYR -> HBA_PHACA -VLSASDKTNVKGVFAKVGGSAEAYGAETLERMFTAYPQTKTYFPHFDLHHGSAQIKAHG -KKVAAALVEAANHIDDIAGALSKLSDLHAQKLRVDPVNFKLLGHCFLVVVAIHHPTLLTP -EVHASLDKFMCAVAKELTAKYR -> HBA_PHACO -VLSAADKNNVKGIFTKIAGHAEEYGAEALERMFITYPSTKTYFPHFDLSHGSAQIKGHG -KKVVAALIEAVNHIDDITGTLSKLSDLHAHKLRVDPVNFKLLGQCFLVVVAIHHPSALTP -EVHASLDKFLCAVGTVLTAKYR -> HBA_PHORU -VLSSHDKSNVKGLFGKVGGHLEEYCAETLARMFAAYPQTKTYFPHFDLQPGSAQVKAHG -KKVAGALAEAANHIDDIASALSKLSDLHQHKLRVDPVNFKLLAHCFLVVMAIHHPSLLTP -EVHASLDKFLCAVGTVLTAKYR -> HBA_PHOVI -VLSPADKTNVKATWDKIGGHAGEYGGEALERTFTAFPTTKTYFPHFDLSHGSAQVKAHG -KKVADALTTAVAHMDDLPGALSALSDLHAHKLRVDPVNFKLLSHCLLVTLACHHPADFTP -AVHASLDKFFSAVSTVLTSKYR -> HBA_PHYCA -VLSPADKTNVKAAWAKVGNHAADFGAEALERMFMSFPSTKTYFSHFDLGHNSTQVKGHG -KKVADALTKAVGHLDTLPDALSDLSDLHAHKLRVDPVNFKLLSHCLLVTLAAHLPGDFTP -SVHASLDKFLASVSTVLTSKYR -> HBA_PIG -VLSAADKANVKAAWGKVGGQAGAHGAEALERMFLGFPTTKTYFPHFNLSHGSDQVKAHG -QKVADALTKAVGHLDDLPGALSALSDLHAHKLRVDPVNFKLLSHCLLVTLAAHHPDDFNP -SVHASLDKFLANVSTVLTSKYR -> HBA_PONPY -VLSPADKTNVKTAWGKVGAHAGDYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKDHG -KKVADALTNAVAHVDDMPNALSALSDLHAHKLRVDPVNFKLLSHCLLVTLAAHLPAEFTP -AVHASLDKFLASVSTVLTSKYR -> HBA_PREEN -VLSPADKTNVKAAWGKVGGHGGEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHG -KKVADALTNAVAHVDDMPHALSALSDLHAHKLRVDPVNFKLLSHCLLVTLAAHLPAEFTP -AVHASLDKFLASVSTVLTSKYR -> HBA_PROCR -VLSSADKANIKATWDKIGGHGGEYGAEALERTFLCFPTTKTYFPHFDLSHGSAQVKAHG -KKVADALAVAAAHLDDLPAALSALSDLHAYKLRVDPVNFKLLSHCLLVTLAAHHPAEFTP -AVHASLDKFLSSVSTVLTSKYR -> HBA_PROHA -VLSAADKNNVKGAWEKVGTHAGEYGAEALERMFLSFPTTKTYFPHFDLTHGSAQVKAHG -QKVGAALTKAVGHLDDLPNALSDLSDLHAHKLRVDPVNFKLLSHCLLVTLSRHLPeQEFT -PAVHASLDKFFSNVSTVLTSKYR -> HBA_PROLO -VLSPADKANIKATWDKIGGHAGEYGGEALERTFASFPTTKTYFPHFDLSPGSAQVKAHG -KKVADALTLAVGHLDDLPGALSALSDLHAYKLRVDPVNFKLLSHCLLVTLACHHPAEFTP -AVHASLDKFFTSVSTVLTSKYR -> HBA_PSIKR -VLSGTDKTNVKSIFSKIGGQADDYGAEALERMFVTYPQTKTYFPHFDVSPGSAQVKAHG -KKVAGGLSEAANHIDDIATSLSKLSDLHAQKLRVDPVNFKLLGQCFLVVVAIHNPSALTP -EAHASLDKFLCAVGLVLTAKYR -> HBA_PTEAL -VLSSTDKSNVKAAWDKVGGHVGEYGAEALERMFLSFPTTKTYFPHFDLAHGSSQVKAHG -KKVGDALTNAVGHIDDLPGALSALSDLHAYKLRVDPVNFKLLSHCLLVTLASHLPSDFTP -AVHASLDKFLASVSTVLTSKYR -> HBA_PTEBR -VLSPADKTNVKATWDKIGGHAGEYGGEALERTFASFPTTKTYFPHFDLSPGSAQVKAHG -KKVADALTNAVAHMDDLPAALSALSDLHAYKLRVDPVNFKLLSHCLLVTLACHHPAEFTP -AVHASLDKFFSTVSTVLTSKYR -> HBA_PTEPO -VLSSTDKSNVKAAWDKVGGNVGEYGAEALERMFLSFPTTKTYFPHFDLAHGSSQVKAHG -KKVGDALTNAVGHMDDLPGALSALSDLHAYKLRVDPVNFKLLSHCLLVTLANHLPNDFTP -AVHASLDKFLASVSTVLTSKYR -> HBA_RABIT -VLSPADKTNIKTAWEKIGSHGGEYGAEAVERMFLGFPTTKTYFPHFDFTHGSEQIKAHG -KKVSEALTKAVGHLDDLPGALSTLSDLHAHKLRVDPVNFKLLSHCLLVTLANHHPSEFTP -AVHASLDKFLANVSTVLTSKYR -> HBA_RANTA -VLSAADKSNVKAAWGKVGGNAPAYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKAHG -EKVANALTKAVGHLDDLPGTLSDLSDLHAHKLRVDPVNFKLLSHTLLVTLASHLPSDFTP -AVHASLDKFLANVSTVLTSKYR -> HBA_RAT -VLSADDKTNIKNCWGKIGGHGGEYGEEALQRMFAAFPTTKTYFSHIDVSPGSAQVKAHG -KKVADALAKAADHVEDLPGALSTLSDLHAHKLRVDPVNFKFLSHCLLVTLACHHPGDFTP -AMHASLDKFLASVSTVLTSKYR -> HBA_RHEAM -VLSGPDKTNVKNVFAKIGGHADAYGAETLERMFTTYPQTKTYFPHFDLHHGSAQIKTHG -KKVVSALIDAANNIDDIYGALSKLSDLHAQKLRVDPVNFKLLGQCFLVVVAIHHPSLLTP -EVHASLDKFLCAVGAVLTAKYR -> HBA_RHIUN -VLSPTDKTNVKTAWSHVGAHAGEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKAHG -KKVGDALTQAVGHLDDLPGALSALSDLHAYKLRVDPVNFKLLSHCLLVTLALHNPQDFTP -AVHASLDKFLSNVSTVLTSKYR -> HBA_ROUAE -VLSSADKTNIKAAWDKVGGNAGEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHG -KKVGDALTNAVGHLDDLPGALSALSDLHAYKLRVDPVNFKLLSHCLLVTLANHLPSDFTP -AVHASLDKFLASVSTVLTSKYR -> HBA_SAGFU -VLSPADKSNVKAAWGKVGGHAGDYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHG -KKVADALTVAVAHVDDMPNALSALSDLHAHKLRVDPVNFKLLSHCLLVTLAAHLPADFTP -AVHASLDKFLASVSTVLTSKYR -> HBA_SALSA -SLTARDKSVVNAFWGKIKGKADVVGAEALGRMLTAYPQTKTYFSHWADLSPGSAPVKKH -GGVIMGAIGNAVGLMDDLVGGMSGLSDLHAFKLRVDPGNFKILSHNILVTLAIHFPADFT -PEVHIAVDKFLAALSAALADKYR -> HBA_SPAEH -VLSPEDKNHVRSTWDKIGGHGAEYGAEALERMFTSFPTTKTYFPHFDVSHGSAQVKAHG -KKVADALANAAGHLDDLPGALSALSDLHAHKLRVDPVNFKLLSHCLLVTLANHHPAEFTP -GVHASLDKFLASVSTVLTSKYR -> HBA_SPECI -VLSPADKKNVKDCWEKIGGHGAEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVQGHG -KKVADALANAAAHVDDLPGALSALSDLHAHKLRVDPVNFKLLSHCLLVTLAAHHPAEFTP -AVHASLDKFLASVSTVLTSKYR -> HBA_SPEPA -VLSPADKTNVKASWEKIGGHGAAYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVQGHG -KKVADALANAAAHVDDLPGALSALSDLHAHKLRVDPVNFKLLSHCLLVTLAAHHPAEFTP -AVHASLDKFLASVSTVLTSKYR -> HBA_SPETO -VLSPADKNNVKACWEKIGGHGAAYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVQGHG -KKVADALANAAAHVDDLPSALSALSDLHAHKLRVDPVNFKLLSHCLLVTLAAHHPAEFTP -AVHASLDKFLASVSTVLTSKYR -> HBA_SPHPU -MLSASDKANVKAIWSKVCVHAEEYGAETLERMFTVYPSTKTYFPHFDLTHGSAQVKAHG -KKVVNAMGEAVNHLDDMAGALLKLSDLHAQKLRVDPVNFKLLAQCFLVVLGVHHPAALTP -EVHASLDKFLCAVGLVLTAKYR -> HBA_SQUAC -VLSAADKTAIKHLTGSLRTNAEAWGAESLARMFATTPSTKTYFSKFTDFSANGKRVKAH -GGKVLNAVADATDHLDNVAGHLDPLAVLHGTTLCVDPHNFPLLTQCILVTLAAHLTELKP -ETHCALDKFLCEVATALGSHYR -> HBA_STRCA -VLSGTDKTNVKGIFSKISSHAEEYGAETLERMFITYPQTKTYFPHFDLHHGSAQIKAHG -KKVANALIEAVNHIDDISGALSKLSDLHAQKLRVDPVNFKLLGQCFLVVVAIHHPSALTP -EVHASLDKFLCAVGAVLTAKYR -> HBA_STUVU -VLSASDKANVKAVFGKIGGQAEEFGAETLERMFATYPQTKTYFPHFDLGKGSAQVKGHG -KKVAAALVEAANHVDDIAGALSKLSDLHAQKLRVDPVNFKLLGQCFLVVVASHNPALLTP -EVHASLDKFLCAVGTVLTAKYR -> HBA_SUNMU -VLSANDKANVKAAWDKVGGQAANYGAEALERTFASFPTTKTYFPHYDLSPGSAQVKAHG -KKVADALTKAVGSMDDLPGALSALSDLHAHKLRVDPVNFKLLSHCLLVTLAAHHPADFTP -AVHASLDKFLASVSTVLTSKYR -> HBA_TALEU -VLSGTDKSNIKAAWDKVGAHAGEYGAEALERTFTSFPTTKTYFPHFDLSHGSAQVKAHG -KKVADALTNAVGHLDDLPGAMSALSDLHAHKLRVDPVNFKLLSHCLLVTLACHHPNDFTP -AVHASLDKFLATVSTVLTSKYR -> HBA_TAPTE -VLSPTDKTNVKAAWSKVGSHAGEYGAEALERMFLGFPTTKTYFPHFDLSHGSAQVQAHG -KKVGDALTQAVGHLDDLPGALSALSDLHAYKLRVDPVNFKLLSHCLLVTLALHHPDDFTP -AIHASLDKFLSNVSTVLTSKYR -> HBA_TARBA -VLSPADKTNVKAAWDKVGGHAGDYGAEALERMFLSFPTTKTYFPHFDLSHGSSQVKGHG -KKVADALTTAVGHIDNMPNALSALSDLHAHKLRVDPVNFKLLSHCLLVTLACHHPADFTP -AVHASLDKFVASVSTVLTSKYR -> HBA_TARGR -MKLSAEDKHNVKTTWDHIKGHEEALGAEALFRMFTSLPATRTYFPAKDLSEGSSFLHSH -GKKVMGALSNAVAHIDDIDAALCKLSDKHAQDLMVDPANFPKLAHNILVVMGIHLKAHLT -YPVHCSVDKFLDVVGHVLTSKYR -> HBA_THEGE -VLSPDDKKHVKDAWGKVGEHAGQYGAEALERMFLSFPTTKTYFPHFDLSHGSDQVKKHG -KKVADALTLAVGHVDDMPQALSKLSDLHAHKLRVDPVNFKLLSHCLLVTLAAHLPAEFTP -AVHASLDKFLASVSTVLTSKYR -> HBA_THUTH -TTLSDKDKSTVKALWGKISKSADAIGADALGRMLAVYPQTKTYFSHWPDMSPGSGPVKA -HGKKVMGGVALAVTKIDDLTTGLGDLSELHAFKMRVDPSNFKILSHCILVVVAKMFPKEF -TPDAHVSLDKFLASVALALAERYR -> HBA_TRAST -LSAADKGHVKAAWGKVGSHAAEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHGA -KVAAALTKAVDHLDDLPGALSDLSDLHAHKLRVDPVNFKLLSHSLLVTLASHLPGDFTPA -VHASLDKFLANVSTVLTSKYR -> HBA_TRIIN -VLSDEDKTNVKTFWGKIGTHTGEYGGEALERMFLSFPTTKTYFPHFDLSHGSGQIKAHG -KKVADALTRAVGHLEDLPGTLSELSDLHAHRLRVDPVNFKLLSHCLLVTLSSHLREDFTP -SVHASLDKFLSSVSTVLTSKYR -> HBA_TRIOC -VLSANDKTNVKTVFTKITGHAEDYGAETLERMFITYPPTKTYFPHFDLHHGSAQIKAHG -KKVVGALIEAVNHIDDIAGALSKLSDLHAQKLRVDPVNFKLLGQCFLVVVAIHHPSVLTP -EVHASLDKFLCAVGNVLSAKYR -> HBA_TUPGL -VLSPGDKSNIKAAWGKIGGQAPQYGAEALERMFLSFPTTKTYFPHFDMSHGSAQIQAHG -KKVADALSTAVGHLDDLPTALSALSDLHAHKLRVDPANFKLLSHCILVTLACHHPGDFTP -EIHASLDKFLANVSTVLTSKYR -> HBA_TURME -VLSAADKTNVKSAFSKIGGQADEYGAETLERMFATYPQTKTYFPHFDLGKGSAQVKAHG -KKVAAALVEAANAVDDIAGALSKLSDLHAQKLRVDPVNFKLLGQCFLVTVATHNPSLLTP -EVHASLDKFLCAVGTVLTAKYR -> HBA_TURTR -VLSPADKTNVKGTWSKIGNHSAEYGAEALERMFINFPSTKTYFSHFDLGHGSAQIKGHG -KKVADALTKAVGHIDNLPDALSELSDLHAHKLRVDPVNFKLLSHCLLVTLALHLPADFTP -SVHASLDKFLASVSTVLTSKYR -> HBA_URSMA -VLSPADKSNVKATWDKIGSHAGEYGGEALERTFASFPTTKTYFPHFDLSPGSAQVKAHG -KKVADALTTAAGHLDDLPGALSALSDLHAHKLRVDPVNFKFLSHCLLVTLASHHPAEFTP -AVHASLDKFFSAVSTVLTSKYR -> HBA_VIPAS -VLSEDDKNRVRTSVGKNPELPGEYGSETLTRMFAAHPTTKTYFPHFDLSSGSPNLKAHG -KKVIDALDNAVEGLDDAVATLSKLSDLHAQKLRVDPANFKILSQCLLSTLANHRNPEFGP -AVLASVDKFLCNVSEVLESKYR -> HBA_VULGR -VLSGSDKTNVKGVFAKIGGHAEDYGAETLERMFITYPQTKTYFPHFDLQHGSAQIKGHG -KKVVGALIEAANHIDDIAASLSKLSDLHAQKLRVDPVNFKLLGQCFLVVVAIHHPSVLTP -EVHASLDKFLCAVGNVLTAKYR -> HBA_VULVV -VLSPADKTNIKSTWDKIGGHAGDYGGEALDRTFQSFPTTKTYFPHFDLSPGSAQVKAHG -KKVADALTTAVAHLDDLPGALSALSDLHAYKLRVDPVNFKLLSHCLLVTLACHHPNEFTP -AVHASLDKFFTAVSTVLTSKYR -> HBA_XENTR -HLTADDKKHIKAIWPSVAAHGDKYGGEALHRMFMCAPKTKTYFPDFDFSEHSKHILAHG -KKVSDALNEACNHLDNIAGCLSKLSDLHAYDLRVDPGNFPLLAHQILVVVAIHFPKQFDP -ATHKALDKFLVSVSNVLTSKYR -> HBB0_MOUSE -VHFTAEEKAAITSIWDKVDLEKVGGETLGRLLIVYPWTQRFFDKFGNLSSAQAIMGNPR -IKAHGKKVLTSLGLAVKNMDNLKETFAHLSELHCDKLHVDPENFKLLGNMLVIVLSSYFG -KEFTAEAQAAWQKLVVGVATALSHKYH -> HBB1_CYGMA -VKWSKTELTIINDIFSHLDYDDIGPKALSRCLIVYPWTQRHFSGFGNLYNAEAIIGNAN -VAAHGIKVLHGLDRGLKNMDNIVDAYAELSTLHSEKLHVDPDNFKLLSDCITIVLAAKLG -KAFTAETQAAFQKFMAVVVSALGKQYH -> HBB1_IGUIG -VHWTAEEKQLITQVWGKIDVAQIGGETLACLLVVYPWTQRFFPDFGNLSNAAAICGNAK -VKAHGKKVLTSFGDAVKNLDNIKDTFAKLSELHCDKLHVDPVNFRLLGNVMITRLAAHFG -KDFTPACHAAFQKLTGAVAHALARRYH -> HBB1_MOUSE -VHLTDAEKAAVSCLWGKVNSDEVGGEALGRLLVVYPWTQRYFDSFGDLSSASAIMGNAK -VKAHGKKVITAFNDGLNHLDSLKGTFASLSELHCDKLHVDPENFRLLGNMIVIVLGHHLG -KDFTPAAQAAFQKVVAGVATALAHKYH -> HBB1_RAT -VHLTDAEKAAVNGLWGKVNPDDVGGEALGRLLVVYPWTQRYFDSFGDLSSASAIMGNPK -VKAHGKKVINAFNDGLKHLDNLKGTFAHLSELHCDKLHVDPENFRLLGNMIVIVLGHHLG -KEFTPCAQAAFQKVVAGVASALAHKYH -> HBB1_SALIR -VEWTDAEKSTISAVWGKVNIDEIGPLALARVLIVYPWTQRYFGSFGNVSTPAAIMGNPK -VAAHGKVVCGALDKAVKNMGNILATYKSLSETHANKLFVDPDNFRVLADVLTIVIAAKFG -ASFTPEIQATWQKFMKVVVAAMGSRYF -> HBB1_SPHPU -VHWTAEEKHLLGSLWAKVDVADIGGEALGRLLVVYPWTQRFFADFGNLSSATAICGNPR -VKAHGKKVFTMFGEALKHLDNLKETFASLSELHCDKLHVDTENFKLLGNLVIVVLAARLH -DSFTPAAQAAFHKLAYSVAHALARRYH -> HBB1_TAPTE -VELTGEEKAAVLALWDKVDEDKVGGEALGRLLVVYPWTQRFFDSFGDLSTAAAVMGNPK -VKAHGKKVLHSFGDGVHHLDDLKVTFAQLSELHCDKLHVDPENFRLLGNVLVVVLAQQFG -KAFTPELQAAYQKVVAGVANALAHKYH -> HBB1_TORMA -VSLTDEEIRLIQHIWSNVNVVEITAKALERVFYVYPWTTRLFTSFnhNFKASDKQVHDH -AVNVSNAISAAIGDLHDINKNFSALSTKHQKKLGVDTSNFMLLGQAFLVELAALEKDKFT -PQYHKAALKLFEVVTEALSCQYH -> HBB1_TRICR -TFTNDESQHIHDVCGKIPVDQVGAEALGRLILVNPWTRRYFKSFGDLSSAEAIQHNPKV -ASHGAKVMHSIAEAVKHLDDLKAYYADLSTIHCKKLYVDPANFKLFGGIVSIVTGMHLGT -DYTAQKQAAFEKFLHHVEAALATGYH -> HBB1_UROHA -VHWTAEEKALINAYWGKVDVGSVGGETLANLLVVYPWTQRFFEDFGNLSTPSAILNNPK -XXXXXXXVITSFGDALKNLDNVXXXXXKLSEYHCNKLHVDPVNFRLLGDVLITLSAANFG -KXXXXXXXXXXXXLVGVVAHALARRYH -> HBB1_VAREX -VHWTAEEKQLICSLWGKIDVGLIGGETLAGLLVIYPWTQRQFSHFGNLSSPTAIAGNPR -VKAHGKKVLTSFGDAIKNLDNIKDTFAKLSELHCDKLHVDPTNFKLLGNVLVIVLADHHG -KEFTPAHHAAYQKLVNVVSHSLARRYH -> HBB1_XENBO -GLTAHDRQLINSTWGKVCAKTIGKEALGRLLWTYPWTQRYFSSFGNLNSADAVFHNEAV -AAHGEKVVTSIGEAIKHMDDIKGYYAQLSKYHSETLHVDPCNFKRFGGCLSISLARQFHE -EYTPELHAAYEHLFDAIADALGKGYH -> HBB1_XENLA -GLTAHDRQLINSTWGKLCAKTIGQEALGRLLWTYPWTQRYFSSFGNLNSADAVFHNEAV -AAHGEKVVTSIGEAIKHMDDIKGYYAQLSKYHSETLHVDPLNFKRFGGCLSIALARHFHE -EYTPELHAAYEHLFDAIADALGKGYH -> HBB1_XENTR -VNLTAKERQLITGTWSKICAKTLGKQALGSMLYTYPWTQRYFSSFGNLSSIEAIFHNAA -VATHGEKVLTSIGEAIKHMDDIKGYYAQLSKYHSETLHVDPYNFKRFCSCTIISMAQTLQ -EDFTPELQAAFEKLFAAIADALGKGYH -> HBB2_CYGMA -VEWTNFERATIKDIFSKLEYDVVGPATLARCLVVYPWTQRYFGKFGNLYNAAAIAENAM -VSKHGTTIIHGLDQAVKNMDDIKNTYAELSVLHCDKLHVDPDNFQLLAECLTIVLAAQLG -KEFTGEVQAAFQKFMAVVVSSLGKQYH -> HBB2_MOUSE -VHLTDAEKSAVSCLWAKVNPDEVGGEALGRLLVVYPWTQRYFDSFGDLSSASAIMGNPK -VKAHGKKVITAFNEGLKNLDNLKGTFASLSELHCDKLHVDPENFRLLGNAIVIVLGHHLG -KDFTPAAQAAFQKVVAGVATALAHKYH -> HBB2_NAJNA -VHWSAEEKQLITSLWAKVDVPEVGAATLGKMMVMYPWTQRFFAHFGNLSGPSALCGNPQ -VRAHGKKVLTSFGEALKHLDNVKETFAKLSELHFDKLHVDPENFKLLGNVLIIVLAGHHG -KEFTPSTHASFQKLVNVVAHALARRYH -> HBB2_PANLE -GHLTPEEKSAVTALWSKVNVDEVGGEALGRLLVVYPWTQRFFESFGDLSTPDAVMGNPK -VKAHGKKVLGAFSDGLAHLDNLKGTFATLSELHCDKLHVDPENFRLLGNVLVCVLAHHFG -KEFTPPVQAAYQKVVAGVANALAHKYH -> HBB2_RAT -VHLTDAEKATVSGLWGKVNADNVGAEALGRLLVVYPWTQRYFSKFGDLSSASAIMGNPQ -VKAHGKKVINAFNDGLKHLDNLKGTFAHLSELHCDKLHVDPENFRLLGNMIVIVLGHHLG -KEFTPCAQAAFQKVVAGVASALAHKYH -> HBB2_SPHPU -VHWTAEEKQLVTSLWTKVNVDECGGEALGRLLIVYPWTQRFFSSFGNLSSSTAICGNPR -VKAHGKKVFTSFGEAVKNLDNIKATYAKLSELHCEKLHVDPQNFNLLGDIFIIVLAAHFG -KDFTPACQAAWQKLVRVVAHALAYHYH -> HBB2_TAPTE -VHLHGDEKAAVLALWDKVDEEKVGGEALGRLLVVYPWTQRFFDSFGDLSTAAAVMGNPK -VKAHGKKVLHSFGEGVHHLDDLKVTFAQLSELHCDKLHVDPENFRLLGNVLVVVLAQQFG -KAFTPELQAAYQKVVAGVASALAHKYH -> HBB2_TORMA -VSLTDEEKHLIQHIWSNVNVVEITAKALERVFYVYPWTTRLFTSFNHNFKASDKgVHDH -AVNVSKALSAAIGDLHNVNKNFSALSTKHQKKLGVDTSNFMLLGQAFLVELAAFEKDKFT -PQYHKAALKLFEVVTEALSCQYH -> HBB2_TRICR -VHLTAEDRKEIAAILGKVNVDSLGGQCLARLIVVNPWSRRYFHDFGDLSSCDAICRNPK -VLAHGAKVMRSIVEATKHLDNLREYYADLSVTHSLKFYVDPENFKLFSGIVIVCLALTLQ -TDFSCHKQLAFEKLMKGVSHALGHGY -> HBB2_XENBO -GLTAHEKQLITGSWGKINAKAIGKEALGRLLNTFPWTQRYFSSFGNLGSAEAIFHNEAV -AAHGEKVVTSVGEAIKHMDDIKGYYAELSKYHSETLHVDPNNFKRFGGCLSITLGHHFGE -EYTPELHAAYEHLFDAIADALGKGYH -> HBB2_XENLA -VHWTAEEKAAITSVWQKVNVEHDGHDALGRLLIVYPWTQRYFSNFGNLSNSAAVAGNAK -VQAHGKKVLSAVGNAISHIDSVKSSLQQLSKIHATELFVDPENFKRFGGVLVIVLGAKLG -TAFTPKVQAAWEKFIAVLVDGLSQGYN -> HBB2_XENTR -VHWTAEEKATIASVWGKVDIEQDGHDALSRLLVVYPWTQRYFSSFGNLSNVSAVSGNVK -VKAHGNKVLSAVGSAIQHLDDVKSHLKGLSKSHAEDLHVDPENFKRLADVLVIVLAAKLG -SAFTPQVQAVWEKLNATLVAALSHGYF -> HBB4_SALIR -VDWTDAERSAIVGLWGKISVDEIGPQALARLLIVSPWTQRHFSTFGNLSTPAAIMGNPA -VAKHGKTVMHGLDRAVQNLDDIKNTYATLSVMHSEKLHVDPDNFRLLADCITVCVAAKLG -pAVFSADTQEAFQKFLAVVVSALGRQYH -> HBBA_BOSJA -MLTAEEKAAVTAFWGKVHVDEVGGEALGRLLVVYPWTQRFFESFGDLSTADAVMNNPKV -KAHGKKVLDSFSDGMKHLDDLKGTFAALSELHCDKLHVDPENFKLLGNVLVVVLARNFGK -EFTPVLQADFQKVVAGVANALAHRYH -> HBBA_CAPHI -MLTAEEKAAVTGFWGKVKVDEVGAEALGRLLVVYPWTQRFFEHFGDLSSADAVMNNAKV -KAHGKKVLDSFSNGMKHLDDLKGTFAQLSELHCDKLHVDPENFKLLGNVLVVVLARHHGS -EFTPLLQAEFQKVVAGVANALAHRYH -> HBBC_CAPHI -PNKALITGFWSKVKVDEVGAEALGRLLVVYPWTQRFFEHFGDLSSADAVLGNAKVKAHG -KKVLDSFSNGVQHLDDLKGTFAELSELHCDKLHVDPENFRLLGNVLVIVLARHFGKEFTP -ELQAEFQKVVAGVASALAHRYH -> HBBC_RANCA -GGSDVSAFLAKVDKRAVGGEALARLLIVYPWTQRYFSTFGNLGSADAISHNSKVLAHGQ -RVLDSIEEGLKHPXXLKAYYAKLSERHSGELHVDPANFYRLGNVLITVMARHFHEEFTPE -LQCALHSSFCAVGEALAKGYH -> HBBC_SHEEP -PNKALITGFWSKVKVDEVGAEALGRLLVVYPWTQRFFEHFGDLSTADAVLGNAKVKAHG -KKVLDSFSNGVQHLDDLKGTFAQLSELHCDKLHVDPENFRLLGNVLVVVLARHFGKEFTP -ELQAEFQKVVAGVASALAHRYH -> HBBF_BOVIN -MLSAEEKAAVTSLFAKVKVDEVGGEALGRLLVVYPWTQRFFESFGDLSSADAILGNPKV -KAHGKKVLDSFCEGLKQLDDLKGAFASLSELHCDKLHVDPENFRLLGNVLVVVLARRFGS -EFSPELQASFQKVVTGVANALAHRYH -> HBBF_CAPHI -MLSAEEKASVLSLFAKVNVEEVGGEALGRLLVVYPWTQRFFEHFGDLSSADAILGNPKV -KAHGKKVLDTFSEGLKQLDDLKGAFASLSELHCDKLHVDPENFRLLGNVLVVVLARRFGG -EFTPELQANFQKVVTGVANALAHRYH -> HBBF_SHEEP -MLTAEEKASVISLFAKVNVEEVGGEALGRLLVVYPWTQRFFEHFGDLSSADAILGNPKV -KGHGKKVLNSFSEGLKQLDDLKGAFASLSELHCDKLHVDPENFRLLGNVLVVVLARRFGG -EFTPELQANFQKVVTGVANALAHRYH -> HBBL_RANCA -VHWTAEEKAVINSVWQKVDVEQDGHEALTRLFIVYPWTQRYFSTFGDLSSPAAIAGNPK -VHAHGKKILGAIDNAIHNLDDVKGTLHDLSEEHANELHVDPENFRRLGEVLIVVLGAKLG -KAFSPQVQHVWEKFIAVLVDALSHSYH -> HBBL_XENLA -VHLSADEKSAINAVWSKVNIENDGHDALTRLLVVFPWTQRYFSSFGNLSNVAAISGNAK -VRAHGKKVLSAVDESIHHLDDIKNFLSVLSTKHAEELHVDPENFKRLADVLVIVLAGKLG -AAFTPQVQAAWEKFSAGLVAALSHGYF -> HBBN_AMMLE -PXKALITGFWSKVKVXXVGAXALGRLLVVYPWTXRFFXHFGXLSSAXAVMXXAKVKAHG -KKVLXSFSXGLKHLXXLKGAFASLSXLHCXKLHVXPXXFRLLGXVLVVVLARHFGKXFXP -XLXAXFXKVVAGVASALAHRYH -> HBBZ_MOUSE -VHFTAEEKAAITSIWDKVDLEKVGGETLGRLLIVYPWTQRFFDKFGNLSSALAIMGNPR -IRAHGKKVLTSLGLGVKNMDNLKETFAHLSELHCDKLHVDPENFKLLGNMLVIVLSTHFA -KEFTPEVQAAWQKLVIGVANALSHKYH -> HBB_ACCGE -VQWAAEEKQLITGLWGKVNVADCGAEALARLLIVYPWTQRFFASFGNLSSATAVLGNPM -VRAHGKKVLTSFGEAVKNLDNIKNTFAQLSELHCDKLHVDPENFRLLGDILIVVLAAHFG -KDFSPDCQAAWQKLVRAVAHALARKYH -> HBB_AEGMO -VHWTAEEKQLITGLWGKVNVADCGAEALARLLIVYPWTQRFFASFGNLSSPTAIIGNPM -VRAHGKKVLTSFGEAVKNLDNIKNTFAQLSELHCDKLHVDPENFRLLGDILIIVLAAHFG -KDFSPDCQAAWQKLVRAVAHALARKYH -> HBB_AILFU -VHLTGEEKAAVTGLWSKVNVDEVGGEALGRLLVVYPWTQRFFDSFGDLSSPDAVMGNPK -VKAHGKKVLNSFSEGLKNLDNLKGTFAKLSELHCDKLHVDPENFKLLGNVLVCVLAHHFG -KEFTPQVQAAYQKVVAGVANALAHKYH -> HBB_AILME -VHLTGEEKAAVTGLWSKVNVDEVGGEALGRLLVVYPWTQRFFDSFGDLSTPDAVMNNPK -VKAHGKKVLNSFSEGLKNLDNLKGTFAKLSELHCDKLHVDPENFKLLGNVLVCVLAHHFG -KEFTPQVQAAYQKVVAGVANALAHKYH -> HBB_ALCAA -MLTAEEKAAVTAFWGKVKVDEVGGEALGRLLVVYPWTQRFFEHFGDLSTADAVMHNAKV -KEHGKRVLDAFSEGLKHLDDLKGAFAKLSELHCDKLHVDPENFRLLGNVLVVVLARHFGK -EFTPELQADYQKVVTGVANALAHRYH -> HBB_ALLMI -ASFDAHERKFIVDLWAKVDVAQCGADALSRMLIVYPWKRRYFEHFGKMCNAHDILHNSK -VQEHGKKVLASFGEAVKHLDNIKGHFANLSKLHCEKFHVDPENFKLLGDIIIIVLAAHHP -EDFSVECHAAFQKLVRQVAAALAAEYH -> HBB_ANAPL -VHWTAEEKQLITGLWGKVNVADCGAEALARLLIVYPWTQRFFASFGNLSSPTAILGNPM -VRAHGKKVLTSFGDAVKNLDNIKNTFAQLSELHCDKLHVDPENFRLLGDILIIVLAAHFT -KDFTPECQAAWQKLVRVVAHALARKYH -> HBB_ANAPP -VHWTAEEKQLITGLWGKVNVADCGAEALARLLIVYPWTQRFFASFGNLSSPTAILGNPM -VRAHGKKVLTSFGDAVKNLDNIKNTFAQLSELHCDKLHVDPENFRLLGDILIIVLAAHFP -KEFTPECQAAWQKLVRVVAHALARKYH -> HBB_ANSAN -VHWSAEEKQLITGLWGKVNVADCGAEALARLLIVYPWTQRFFSSFGNLSSPTAILGNPM -VRAHGKKVLTSFGDAVKNLDNIKNTFAQLSELHCDKLHVDPENFRLLGDILIIVLAAHFA -KEFTPECQAAWQKLVRVVAHALARKYH -> HBB_ANSIN -VHWSAEEKQLITGLWGKVNVADCGAEALARLLIVYPWTQRFFSSFGNLSSPTAILGNPM -VRAHGKKVLTSFGDAVKNLDNIKNTFAQLSELHCDKLHVDPENFRLLGDILIIVLAAHFA -KEFTPDCQAAWQKLVRVVAHALARKYH -> HBB_ANSSE -VHWSAEEKQLITGLWGKVNVADCGAEALARLLIVYPWTERFFSSFGNLSSPTAIIGNPM -VRAHGKKVLTSFGEAVKNLDNIKNTFAQLSELHCDKLHVDPENFRLLGDILIIVLAAHFS -KDFTPDCQAAWQKLVRVVAHALARKYH -> HBB_ANTPA -VHLTADEKSAVTGLWGKVNVEEVGGEALGRLLVVYPWTQRFFESFGDLSNAGAVMGNAK -VKAHGKKVLNAFSDGLKNLDNLKGTFAKLSELHCDKLHVDPENFRLLGNVLMIVLARHFG -KEFCPPVQAAFQKVSLGVATALGHKYH -> HBB_AOTTR -VHLTGEEKAAVTALWGKVNVDEVGGEALGRLLVVYPWTQRFFDSFGDLSSPDAVMNNPK -VKAHGKKVLGAFSDGLAHLDNLKGTFAQLSELHCDKLHVDPENFRLLGNVLVCVLAHHFG -KEFTPQVQAAYQKVVAGVANALAHKYH -> HBB_APUAP -VQWTAEEKQLITGLWGKVNVADCGAEALARLLIVYPWTQRFFASFGNLSSATAVIGNPM -VRAHGKKVLTSFGEAVKNLDSIKSTFAQLSELHCDKLHVDPENFRLLGDILIIVLAAHFS -KDFTPEAQQAWAKLVRAVAHALARKYH -> HBB_AQUCH -VHWTAEEKQLITGLWGKVNVADCGAEALARLLIVYPWTQRFFASFGNLSSPTAIIGNPM -VRAHGKKVLTSFGEAVKNLDNIKNTFAQLSELHCDKLHVDPENFRLLGDILIIVLAAHFT -KDFSPDCQAAWQKLVRAVAHALARKYH -> HBB_ARAAR -VHWTAEEKQLITGLWGKVNVADCGAEALARLLIVYPWTQRFFASFGNLSSPTAILGNPM -VRAHGKKVLTSFGEAVKNLDNIKNTFAQLSELHCDKLHVDPENFRLLGDILIIVLAAHFG -KDFTPECQAALQKLVRVVAHALARKYH -> HBB_ATEGE -VHLTGEEKAAVTALWGKVNVDEVGGEALGRLLVVYPWTQRFFESFGDLSTPDAVMSNPK -VKAHGKKVLGAFSDGLAHLDNLKGTFAQLSELHCDKLHVDPENFRLLGNVLVCVLAHHFG -KEFTPQLQAAYQKVVAGVANALAHKYH -> HBB_BALAC -VHLTAEEKSAVTALWAKVNVEEVGGEALGRLLVVYPWTQRFFEAFGDLSTADAVMKNPK -VKAHGKKVLASFSDGLKHLDDLKGTFATLSELHCDKLHVDPENFRLLGNVLVIVLARHFG -KEFTPELQAAYQKVVAGVANALAHKYH -> HBB_BISBO -MLTAEEKAAVTAFWGKVHVDEVGGEALGRLLVVYPWTQRFFESFGDLSSADAVMNNAKV -KAHGKKVLDSFSNGMKHLDDLKGTFAALSELHCDKLHVDPENFKLLGNVLVVVLARHFGK -EFTPVLQADFQKVVTGVANALAHRYH -> HBB_BOSGA -MLTAEEKAAVTAFWGKVHVDEVGGEALGRLLVVYPWTQRFFESFGDLSTADAVMNNPKV -KAHGKKVLDSFSNGMKHLDDLKGTFAALSELHCDKLHVDPENFKLLGNVLVVVLARHFGK -EFTPVLQADFQKVVAGVANALAHRYH -> HBB_BOSMU -MLTAEEKAAVTAFWGKVKVDEVGGEALGRLLVVYPWTQRFFESFGDLSSADAVMNNPKV -KAHGKKVLDSFSNGMKHLDDLKGTFAALSELHCDKLHVDPENFKLLGNVLVVVLARHFGK -EFTPVLQADFQKVVVGVANALAHRYH -> HBB_BOVIN -MLTAEEKAAVTAFWGKVKVDEVGGEALGRLLVVYPWTQRFFESFGDLSTADAVMNNPKV -KAHGKKVLDSFSNGMKHLDDLKGTFAALSELHCDKLHVDPENFKLLGNVLVVVLARNFGK -EFTPVLQADFQKVVAGVANALAHRYH -> HBB_BRACA -VHWTAEEKQLITGLWGKVNVADCGAEALARLLIVYPWTQRFFSSFGNLSSPTAILGNPM -VRAHGKKVLTSFGDAVKNLDNIKNTFAQLSELHCDKLHVDPENFRLLGDILIIVLAAHFA -KDFTPDCQAAWQKLVRVVAHALARKYH -> HBB_BRATR -VHLADDEKAAVSALWNKVHVEEFGGEALGRLLVVYPWTSRFFESFGDLSSADAVFSNAK -VKAHGKKVLTSFGEGLKHLDDLKGTYAHLSELHCDKLHVDPENFKLLGNVLVIVLARHFG -KEFTPQLQASYQKVTTGVSTALAHKYH -> HBB_CAICR -SPFSAHEEKLIVDLWAKVDVASCGGDALSRMLIIYPWKRRYFEHFGKLSTDQDVLHNEK -IREHGKKVLASFGEAVKHLDNIKGHFAHLSKLHFEKFHVDCENFKLLGDIIIVVLGMHHP -KDFTLQTHAAFQKLVRHVAAALSAEYH -> HBB_CAIMO -VHWTAEEKQLITGLWGKVNVADCGAEALARLLIVYPWTQRFFASFGNLSSPTAILGNPM -VRAHGKKVLTSFGDAVKNLDNIKNTFAQLSELHCDKLHVDPENFRLLGDILIIVLAAHFT -KDFTPDCQAAWQKLVRVVAHALARKYH -> HBB_CALAR -VHLTGEEKSAVTALWGKVNVDEVGGEALGRLLVVYPWTQRFFESFGDLSTPDAVMNNPK -VKAHGKKVLGAFSDGLTHLDNLKGTFAHLSELHCDKLHVDPENFRLLGNVLVCVLAHHFG -KEFTPVVQAAYQKVVAGVANALAHKYH -> HBB_CAMDR -VHLSGDEKNAVHGLWSKVKVDEVGGEALGRLLVVYPWTRRFFESFGDLSTADAVMNNPK -VKAHGSKVLNSFGDGLNHLDNLKGTYAKLSELHCDKLHVDPENFRLLGNVLVVVLARHFG -KEFTPDLQAAYQKVVAGVANALAHRYH -> HBB_CANFA -VHLTAEEKSLVSGLWGKVNVDEVGGEALGRLLIVYPWTQRFFDSFGDLSTPDAVMSNAK -VKAHGKKVLNSFSDGLKNLDNLKGTFAKLSELHCDKLHVDPENFKLLGNVLVCVLAHHFG -KEFTPQVQAAYQKVVAGVANALAHKYH -> HBB_CARAU -VEWTDAERSAIIGLWGKLNPDELGPQALARCLIVYPWTQRYFATFGNLSSPAAIMGNPK -VAAHGRTVMGGLERAIKNMDNIKATYAPLSVMHSEKLHVDPDNFRLLADCITVCAAMKFG -pSGFNADVQEAWQKFLSVVVSALCRQYH -> HBB_CAVPO -VHLTAAEKSAILDLWGKVNVGEIGAEALGRLLVVYPWTQRFFEKFGDLSSASAIMSNAH -VKSHGAKVLASFSEGLKHLQDLKGTFAKLSELHCDKLHVDPENFRLLGNMIVIALAHHHP -SEFTPCTQAAFQKVTAGVANALAHKYH -> HBB_CEBAL -VHLTAEEKSAVTALWGKVNVDEVGGEALGRLLVVYPWTQRFFDSFGDLSTPDAVMNNPK -VKAHGKKVLGAFSDGLTHLDNLKGTFAQLSELHCDKLHVDPENFRLLGNVLVCVLAHHFG -KEFTPQVQAAYQKVVAGVATALAHKYH -> HBB_CEBAP -VHLTAEEKSAVTTLWGKVNVDEVGGEALGRLLVVYPWTQRFFDSFGDLSTPDAVMNNPK -VKAHGKKVLGAFSDGLTHLDNLKGTFAQLSELHCDKLHVDPENFRLLGNVLVCVLAHHFG -KEFTPQVQAAYQKVVAGVATALAHKYH -> HBB_CERAE -VHLTPEEKTAVTTLWGKVNVDEVGGEALGRLLVVYPWTQRFFESFGDLSSPDAVMGNPK -VKAHGKKVLGAFSDGLAHLDNLKGTFAQLSELHCDKLHVDPENFKLLGNVLVCVLAHHFG -KEFTPQVQAAYQKVVAGVANALAHKYH -> HBB_CERSI -VELTAEEKAAVLALWDKVKEDEVGGEALGRLLVVYPWTQRFFDSFGDLSTPAAVMGNAK -VKAHGKKVLHSFGDGVHHLDNLKGTFAALSELHCDKLHVDPENFRLLGNVLVVVLAKHFG -KQFTPELQAAYQKVVAGVANALAHKYH -> HBB_CERTO -VHLTPEEKVAVTTLWGKVNVDEVGGEALGRLLVVYPWTQRFFESFGDLSNPDAVMGNPK -VKAHGKKVLGAFSDGLNHLDNLKGTFAQLSELHCDKLHVDPENFKLLGNVLVCVLAHHFG -KEFTPQVQAAYQKVVAGVANALAHKYH -> HBB_CHICK -VHWTAEEKQLITGLWGKVNVAECGAEALARLLIVYPWTQRFFASFGNLSSPTAILGNPM -VRAHGKKVLTSFGDAVKNLDNIKNTFSQLSELHCDKLHVDPENFRLLGDILIIVLAAHFS -KDFTPECQAAWQKLVRVVAHALARKYH -> HBB_CHLME -VHWTAEEKQLITGLWGKVNVADCGAEALARLLIVYPWTQRFFASFGNLSSPTAISGNPM -VRAHGKKVLTSFGDAVKNLDNIKNTFSQLSELHCDKLHVDPENFRLLGDILIIVLAAHFT -KDFTPDCQAAWQKLVRVVAHALARKYH -> HBB_CHRPI -VHWTADEKQLITSLWGKVNVEECGSEALARLLIVYPWTQRFFSTFGNLSNAEAILHNPH -VHAHGKKVLTSFGEAVKNLDHIKQTFATLSKLHCEKLHVDPENFKLLGNVLIIVLASHFT -KEFTPACQAAWQKLVSAVAHALALGYH -> HBB_CICCI -VHWTAEEKQLITGLWGKVNVDECGAEALARLLIVYPWTQRFFASFGNLATASAITGNAM -VHAHGKKVLTSFGEAVKNLDNIKNTFAQLSELHCDKLHVDPENFKLLGDILIIVLAAHFG -KDFTPDCQAAWKKLVRVVAHALARKYH -> HBB_COLBA -VHLTPDEKNAVTALWGKVNVDEVGGEALGRLLVVYPWTQRFFDSFGDLSTADAVMGNPK -VKAHGKKVLGAFSDGLAHLDNLKGTFAQLSELHCDKLHVDPENFKLLGNVLVCVLAHHFG -KEFTPQVQAAYQKVVAGVANALAHKYH -> HBB_COLLI -VHWSAEEKQLITSIWGKVNVADCGAEALARLLIVYPWTQRFFSSFGNLSSATAISGNPN -VKAHGKKVLTSFGDAVKNLDNIKGTFAQLSELHCDKLHVDPENFRLLGDILVIILAAHFG -KDFTPECQAAWQKLVRVVAHALARKYH -> HBB_COLPO -VHLTPDEKAAVTALWGKVNVDEVGGEALGRLLVVYPWTQRFFESFGDLSSPDAVMGNPK -VKAHGKKVLGAFSDGLAHLDNLKGTFAQLSELHCDKLHVDPENFKLLGNVLVCVLAHHFG -KEFTPQVQAAYQKVVAGVANALAHKYH -> HBB_CROCR -GFLTAEEKSLVNDLWSKVNVDEVGGEALGRLLVVYPWTQRFFQSFGDLSSADAIMGNSK -VKAHGKKVLNSFSDGLKHIDDLKGTFAKLSELHCDKLHVDPENFKLLGNVLVCVLAHHFG -NEFTPPVQAAYQKVVAGVANALAHKYH -> HBB_CRONI -ASFDPHEKQLIGDLWHKVDVAHCGGEALSRMLIVYPWKRRYFENFGDISNAQAIMHNEK -VQAHGKKVLASFGEAVCHLDGIRAHFANLSKLHCEKLHVDPENFKLLGDIIIIVLAAHYP -KDFGLECHAAYQKLVRQVAAALAAEYH -> HBB_CTEGU -VHLSAEEKAAVTGLWGKVNVEEVGGEALGRLLVVYPWTQRFFESFGDLSSAAAVMGNPK -VKAHGKKVLTSFSEGLSHLDNLKGTFAKLSELHCDKLHVDPENFRLLGNMIVITLAHHYG -PEFGPQTQAAFQKVVAGVANALAHKYH -> HBB_CYNSP -VHLSGEEKSAVTSLWGKVKVDEVGGEALGRLLVVYPWTQRFFDSFGDLSSASAVMGNAK -VKAHGKKVLDSFSEGLQHLDSLKGTFAKLSELHCDKLHVDPENFRLLGNVLVVVLARHFG -KEFTPQLQAAYQKVVAGVATALAHKYH -> HBB_CYPCA -VEWTDAERSAIIALWGKLNPDELGPEALARCLIVYPWTQRFFASYGNLSSPAAIMGNPK -VAAHGRTVEGGLMRAIKDMDNIKATYAPLSVMHSEKLHVDPDNFRLLADCITVCAAMKFG -pSGFSPNVQEAWQKFLSVVVNALKRQYH -> HBB_DASNO -VNLTSDEKTAVLALWNKVXVXXHGGXALGRLLVVYPWTQRFFESFGDLSTPAAVFANAK -VKAHGKKVLTSFGEGMNHLDNLKGTFAKLSELHCDKLHVDPENFRLLGNMLVVVMARHFG -KEFDHWMHACFQRVVAGVANALAHKYH -> HBB_DIDMA -VHLTSEEKNCITTIWSKVQVDQTGGEALGRMLVVYPWTTRFFGSFGDLSSPGAVMSNSK -VQAHGAKVLTSFGEAVKHLDNLKGTYAKLSELHCDKLHVDPENFKMLGNIIVICLAEHFG -KDFTPECQVAWQKLVAGVAHALAHKYH -> HBB_ECHTE -VHMTDAEKKLVTTMWGKLDVDAAGAETLGRVLVVYPWTQRFFGHFGDLSSACAVMDNPK -VQAHGKKVLHSLGDGLNHLDDLKHFYAALSELHCDKLHVDPENFRLLGNVLVCVMSRHFG -AEFTPQVQAAYQKVVAGVANALAHKYH -> HBB_ELEEL -VELTEAQRGAIVNLWGHLSPDEIGPQALARLLIVYPWTQRYFASFGNISSAAAIMGNPK -VAAHGKVVVGALDKAVKNLNNIKGTYAALSTIHSEKLHVDPDNFRLLAESFTVSVAMKLG -pSGFNAETQHALAKFLAEVVSALGKQYH -> HBB_ELEMA -VNLTAAEKTQVTNLWGKVNVKELGGEALSRLLVVYPWTRRFFEHFGDLSTADAVLHNAK -VLAHGEKVLTSFGEGLKHLDNLKGTFADLSELHCDKLHVDPENFRLLGNVLVIVLARHFG -KEFTPDVQAAYEKVVAGVANALAHKYH -> HBB_EQUHE -VQLSGEEKAAVLALWDKVNEEEVGGEALGRLLVVYPWTQRFFDSFGDLSNPAAVMGNPK -VKAHGKKVLHSFGEGVHHLDNLKGTFAQLSELHCDKLHVDPENFRLLGNVLVVVLARHFG -KDFTPELQASYQKVVAGVANALAHKYH -> HBB_ERIEU -VHLTAEEKALVTGLWGKVKVEEFGGEALGRLLVVYPWTQRFFDSFGDLSSADAVMGNPK -VKAHGAKVLQSMGDGIKNLDNLKGTFSKLSELHCDKLHVDPENFRLLGNVLVCVLARHFG -KDFTPAAQAAFQKVVAGVANALAAKYH -> HBB_EUDCR -VHWSAEEKQLITGLWGKVNVAQCGGEALARLLIVYPWTQRFFSSFGNLSSPSAILGNPM -VRAHGKKVLTSFGDAVKNMDNIKNTFAQLSELHCDKLHVDPENFRLLGDILIIVLAAHFA -KDFTPECQAAWEKLVRVVAHALARKYH -> HBB_FELCA -GFLTAEEKGLVNGLWGKVNVDEVGGEALGRLLVVYPWTQRFFESFGDLSSADAIMSNAK -VKAHGKKVLNSFSDGLKNIDDLKGAFAKLSELHCDKLHVDPENFRLLGNVLVCVLAHHFG -HDFNPQVQAAFQKVVAGVANALAHKYH -> HBB_FRAPO -VHWTAEEKQLITGLWGKVNVAECGAEALARLLIVYPWTQRFFASFGNLSSPTAILGNPM -VRAHGKKVLTSFGDAVKNLDNIKNTFSQLSELHCDKLHVDPENFRLLGDILIIVLAAHFS -KDFTPDCQAAWQKLVRVVAHALARKYH -> HBB_GALCR -VHLTPDEKNAVCALWGKVNVEEVGGEALGRLLVVYPWTQRFFDSFGDLSSPSAVMGNPK -VKAHGKKVLSAFSDGLQHLDNLCGTFAKLSELHCDKLHVNPENFRLLGNVLVCVLAHHFG -KDFTPEVQAAYEKVVAGVATALAHKYH -> HBB_GORGO -VHLTPEEKSAVTALWGKVNVDEVGGEALGRLLVVYPWTQRFFESFGDLSTPDAVMGNPK -VKAHGKKVLGAFSDGLAHLDNLKGTFATLSELHCDKLHVDPENFKLLGNVLVCVLAHHFG -KEFTPPVQAAYQKVVAGVANALAHKYH -> HBB_HETPO -VHWSEVELHEITTTWKSIDKHSLGAKALARMFIVYPWTTRYFGNLKEFTACSYGVKEHA -KKVTGALGVAVTHLGDVKSQFTDLSKKHAEELHVDVESFKLLAKCFVVELGILLKDKFAP -QTQAIWEKYFGVVVDAISKEYH -> HBB_HIPAM -VHLTAEEKDAVLGLWGKVNVQEVGGEALGRLLVVYPWTQRFFESFGDLSSADAVMNNPK -VKAHGKKVLDSFADGLKHLDNLKGTFAALSELHCDQLHVDPENFRLLGNELVVVLARTFG -KEFTPELQAAYQKVVAGVANALAHRYH -> HBB_HORSE -VQLSGEEKAAVLALWDKVNEEEVGGEALGRLLVVYPWTQRFFDSFGDLSNPGAVMGNPK -VKAHGKKVLHSFGEGVHHLDNLKGTFAALSELHCDKLHVDPENFRLLGNVLVVVLARHFG -KDFTPELQASYQKVVAGVANALAHKYH -> HBB_HUMAN -VHLTPEEKSAVTALWGKVNVDEVGGEALGRLLVVYPWTQRFFESFGDLSTPDAVMGNPK -VKAHGKKVLGAFSDGLAHLDNLKGTFATLSELHCDKLHVDPENFRLLGNVLVCVLAHHFG -KEFTPPVQAAYQKVVAGVANALAHKYH -> HBB_HYLLA -VHLTPEEKSAVTALWGKVNVDEVGGEALGRLLVVYPWTQRFFESFGDLSTPDAVMGNPK -VKAHGKKVLGAFSDGLAHLDNLKGTFAQLSELHCDKLHVDPENFRLLGNVLVCVLAHHFG -KEFTPQVQAAYQKVVAGVANALAHKYH -> HBB_LAMGL -VNLSGDEKNAVHGLWSKVKVDEVGGEALGRLLVVYPWTRRFFESFGDLSTADAVMNNPK -VKAHGSKVLNSFGDGLSHLDNLKGTYAKLSELHCDKLHVDPENFRLLGNVLVVVLARHFG -KEFTPDLQAAYQKVVAGVANALAHRYH -> HBB_LARRI -VHWSAEEKQLITGLWGKVNVADCGAEALARLLIVYPWTQRFFASFGNLSSPTAINGNPM -VRAHGKKVLTSFGEAVKNLDNIKNTFAQLSELHCDKLHVDPENFRLLGDILIIVLAAHFA -KDFTPDSQAAWQKLVRVVAHALARKYH -> HBB_LATCH -VHWTETERATIETVYQKLHLDEVGREALTRLFIVYPWTTRYFKSFGDLSSSKAIASNPK -VTEHGLKVMNKLTEAIHNLDHIKDLFHKLSEKHFHELHVDPQNFKLLSKCLIIVLATKLG -KQLTPDVQATWEKLLSVVVAALSREYH -> HBB_LEMCA -TFLTPEENGHVTSLWGKVNVEKVGGEALGRLLVVYPWTQRFFESFGDLSSPDAIMGNPK -VKAHGKKVLSAFSEGLHHLDNLKGTFAQLSELHCVALHVDPENFKLLGNVLVIVLAHHFG -NDFSPQTQAAFQKVVTGVANALAHKYH -> HBB_LEMFU -TLLSAEENAHVTSLWGKVDVEKVGGEALGRLLVVYPWTQRFFESFGDLSSPSAVMGNPK -VKAHGKKVLSAFSEGLHHLDNLKGTFAQLSELHCDKLHVDPQNFTLLGNVLVVVLAEHFG -NAFSPAVQAAFQKVVAGVANALAHKYH -> HBB_LEMVA -TFLTPEENNHVTSLWGKVNVEKVGGEALGRLLVVYPWTQRFFESFGDLSSPDAIMGNPK -VKAHGKKVLTAFSEGLHHLDNLKGTFAQLSELHCDKLHVDPQNFKLLGNVLVIVLAHHFG -NDFSPQTQAAFQKVVTGVANALAHKYH -> HBB_LEPEU -VHLSGEEKSAVTALWGKVNVEEVGGETLGRLLVVYPWTQRFFESFGDLSTASAVMGNPK -VKAHGKKVLAAFSEGLSHLDNLKGTFAKLSELHCDKLHVDPENFRLLGNVLVIVLSHHFG -KEFTPQVQAAYQKVVAGVANALAHKYH -> HBB_LEPPA -VHWEDAEKQYIVSVFSKIDVDHVGANTLERVLIVFPWTKRYFNSFGDLSSPGAIKHNNK -VSAHGRKVLAAIIECTRHFGNIKGHLANLSHLHSEKLHVDPHNFRVLGQCLRIELAAALG -fKEFTPERNAYFQKFMDVISHSLGREYH -> HBB_LEPWE -VHLTAEEKSAVTALWGKVNVDEVGGEALGRLLVVYPWTQRFFDSFGDLSSPNAIMSNPK -VKAHGKKVLNSFSDGLKNLDNLKGTFAKLSELHCDQLHVDPENFKLLGNVLVCVLAHHFG -KEFTPQVQAAYQKVVAGVANALAHKYH -> HBB_LIOMI -VHWTAEEKSAITAIWGKVDVAAIGGEALCRLLIVYPWTQRFFTSFGNLSNAAAIQSNAQ -VKAHGKKVFTAFGDAVKNPEGVKDTFAKLSELHCDKLHVDPVNFKLLGQILITVLAAHFG -KDFTPNVQAAYQKLVSVVAHALAHQYH -> HBB_LORTA -VHLTGEEKSAVTGLWGKVNVEDVGGEALGRLLVVYPWTQRFFESFGDLSSPSAVMGNPK -VKAHGKKVLSAFSDGLNHLDNLKGTFAKLSELHCDKLHVDPENFRLLGNVLVVVLAHHFG -KDFTPQVQSAYQKVVAGVANALAHKYH -> HBB_LOXAF -VNLTAAEKTQVTNLWGKVNVKELGGEALSRLLVVYPWTRRFFEHFGDLSTAEAVLHNAK -VLAHGEKVLTSFGEGLKHLDNLKGTFADLSELHCDKLHVDPENFRLLGNVLVIVLARHFG -KEFTPDVQAAYEKVVAGVANALAHKYH -> HBB_LUTLU -VHLTGEEKAAVTSLWGKVNVDEVGGEALGRLLVVYPWTQRFFDSFGDLSSPDAVMGNPK -VKAHGKKVLNSFSEGLKNLDNLKGTFAKLSELHCDKLHVDPENFKLLGNVLVCVLAHHFG -KEFTPQVQAAYQKVVAGVANALAHKYH -> HBB_MACCA -VHLTGEEKSTVSALWGKVNVEEIGGEALGRLLVVYPWTQRFFDSFGDLSSPSAVFGNAK -VKSHGKKVLDSFSNGMQHLDNLKGTFAKLSELHCDKLHVDPENFRLLGNVLVVVLARNFG -KEFTPQVQAAYQKVVAGVATALAHKYH -> HBB_MACFU -VHLTPEEKNAVTTLWGKVNVDEVGGEALGRLLVVYPWTQRFFESFGDLSSPDAVMGNPK -VKAHGKKVLGAFSDGLNHLDNLKGTFAQLSELHCDKLHVDPENFKLLGNVLVCVLAHHFG -KEFTPQVQAAYQKVVAGVANALAHKYH -> HBB_MACGG -VHLTGEEKAAVTGLWGKVNVEEVGGEALGRLLVVYPWTQRFFDSFGDLSSPSAVMGNPK -VKAHGKKVLNSFSDGLKNLDNLKGTFAKLSELHCDKLHVDPENFRLLGNVLVCVLARHFG -KEFTPQVQAAYQKVVAGVATALAHKYH -> HBB_MACGI -VHLTAEEKNAITSLWGKVAIEQTGGEALGRLLIVYPWTSRFFDHFGDLSNAKAVMANPK -VLAHGAKVLVAFGDAIKNLDNLKGTFAKLSELHCDKLHVDPENFKLLGNIIVICLAEHFG -KEFTIDTQVAWQKLVAGVANALAHKYH -> HBB_MACMU -VHLTPEEKNAVTTLWGKVNVDEVGGEALGRLLLVYPWTQRFFESFGDLSSPDAVMGNPK -VKAHGKKVLGAFSDGLNHLDNLKGTFAQLSELHCDKLHVDPENFKLLGNVLVCVLAHHFG -KEFTPQVQAAYQKVVAGVANALAHKYH -> HBB_MACRU -VHLTAEEKNAITSLWGKVAIEQTGGEALGRLLIVYPWTSRFFDHFGDLSNAKAVMGNPK -VLAHGAKVLVAFGDAIKNLDNLKGTFAKLSELHCDKLHVDPENFKLLGNIIVICLAEHFG -KEFTIDTQVAWQKLVAGVANALAHKYH -> HBB_MANSP -VHLTPEEKTAVTTLWGKVNVDEVGGEALGRLLVVYPWTQRFFDSFGDLSSPDAVMGNPK -VKAHGKKVLGAFSDGLNHLDNLKGTFAQLSELHCDKLHVDPENFKLLGNVLVCVLAHHFG -KEFTPQVQAAYQKVVAGVANALAHKYH -> HBB_MARMA -VHLSDGEKNAISTAWGKVNAADIGAEALGRLLVVYPWTQRFFDSFGDLSSASAVMGNAK -VKAHGKKVIDSFSNGLKHLDNLKGTFASLSELHCDKLHVDPENFKLLGNMIVIVMAHHLG -KDFTPEAQAAFQKVVAGVANALAHKYH -> HBB_MEGLY -VHLTNEEKTAVIGLWGKVNVEEVGGEALGRLLVVYPWTQRFFESFGDLSSPSAIMGNPK -VKAHGKKVLNSFSEGLKNLDNLKGTFAKLSELHCDKLHVDPENFRLLGYILLCVLARHFG -KEFTPQVQAAYQKVVAGVATALAHKYH -> HBB_MELCA -VHLTAEEKAAVTALWGKVNVDEVGGEALGRLLVVYPWTQRFFDSFGDLSSPDAVMGNPK -VKAHGKKVLNSFSEGLKNLDNLKGTFAKLSELHCDKLHVDPENFKLLGNVLVCVLAHHFG -KEFTPQVQAAYQKVVAGVANALAHKYH -> HBB_MELME -VHLTAEEKSAVTSLWGKVNVDEVGGEALGRLLVVYPWTQRYFDSFGDLSTPDAVMGNPK -VKAHGKKVLNSFSEGLKNLDNLKGTFAKLSELHCDKLHVDPENFKLLGNVLVCVLAHHFG -KEFTPQVQAAYQKVVAGVANALAHKYH -> HBB_MESAU -VHLTDAEKALVTGLWGKVNADAVGAEALGRLLVVYPWTQRFFEHFGDLSSASAVMNNPQ -VKAHGKKVIHSFADGLKHLDNLKGAFSSLSELHCDKLHVDPENFKLLGNMIIIVLSHDLG -KDFTPSAQSAFHKVVAGVANALAHKYH -> HBB_MESBR -VHLTDAEKNLVSGLWGKVNADAVGAEALGRLLVVTPWTQRFFEHFGDLSSASAVMNNPQ -VKAHGKKVIHSFADGLKHLDNLKGAFSSLSELHCDKLHVDPENFKLLGNMIIIVLSHDLG -KDFTPSAQSAFHKVVAGVANALAHKYH -> HBB_MICXA -VHLTDAEKAAISGLWGKVXAXAAGAXALGRLLVVYPWTXRFFXHFGXLSSASAVMGNAQ -VKAHGKKVIHAFADGLKHLDXLKGTFASLSXLHCXKLHVXPXXFRLLGXMIVIVLAHHLG -KDFTPSAXAAFXKVVAGVASALAHKYH -> HBB_MUSLU -VHLTAEEKAAVTALWGKVNVDEVGGEALGRLLVVYPWTQRFFDSFGDLSSPDAVMGNPK -VKAHGKKVLNSFSEGLKNLDNLKGTFAKLSELHCDKLHVDPENFKLLGNVLVCVLAHHFG -KEFTPQVQAAYQKVVAGVATALAHKYH -> HBB_MUSPF -VHLTGEEKAAVTALWGKVNVDEVGGETLGRLLVVYPWTQRFFDSFGDLSSPDAVMSNPK -VKAHGKKVLNSFSEGLKNLDNLKGTFAKLSELHCDKLHVDPENFKLLGNVLVCVLAHHFG -KEFTPQVQAAYQKVVAGVANALAHKYH -> HBB_MYOVE -VHLTADEKAAVSGLWGKVNVDEVGGEALGRLLVVYPWTQRFFTSFGDLSNAAAVMGNSK -VKAHGKKVLNSFGEGLKNVDNLKGTFASLSELHCDKLHVDPENFRLLGNVLVIVLARHFG -KEFTPQVQGAFQKLALGVATALAHKYH -> HBB_NASNA -VHLTGEEKTAVTNLWAKVNVDEVGGEALGRLLVVYPWTQRFFESFGDLSSPDAIMGNPK -VKAHGKKVLNSFSEGLKNLDNLKGTFAKLSELHCDKLHVDPENFRLLGNVLVCVLAHHFG -KEFTPQVQAAYQKVVAGVANALAHKYH -> HBB_NOTCO -VNWSDSERAIITDIFSHMDYDDIGPKALSRCLIVYPWTQRHFSGFGNLYNAEAILGNAN -VAAHGIKVLHGLDRGVKNMDKIVDAYAELSMLHSEKLHVDPDNFKLLSDCITIVVAAKMG -SAFTPEIQCAFQKFLAVVVSALGKQYH -> HBB_NYCCO -VHLTGEEKSAVTALWGKVNVDDVGGEALGRLLVVYPWTQRFFESFGDLSSPSAVMGNPK -VKAHGKKVLSAFSDGLNHLDNLKGTFAKLSELHCDKLHVDPENFRLLGNVLVVVLAHHFG -KDFTPQVQSAYQKVVAGVANALAHKYH -> HBB_ODORO -VHLTADEKAAVTALWGKVNVDEVGGEALGRLLVVYPWTQRFFDSFGDLSSPDAVMGNPK -VKAHGKKVLNSFSDGLKNLDNLKGTFAKLSELHCDKLHVDPENFKLLGNVLVCVLAHHFG -KEFTPQVQAAYQKVVAGVANALAHKYH -> HBB_ODOVI -MLTAEEKAAVTGFWGKVNVDVVGAEALGRLLVVYPWTQRFFEHFGDLSSAGAVMGNPKV -KAHGKRVLDAFSEGLKHLDDLKGAFAELSELHCNKLHVDPENFRLLGNVLVVVLARNFGG -EFTPLVQADFQKVVAGVANALAHRYH -> HBB_ONDZI -VHLTDAEKAAISGLWGKVNADGVGAEALGRLLVVYPWTQRFFEHFGDLSSSSAVMGNAK -VKSHGKKVITAFADGLKHLDNLKGTFSALSELHCDKLHVDPENFKLLGNMIVIVLSHDLG -KDFTPDAQSAFQKVVTGVATALGHKYH -> HBB_ORNAN -VHLSGGEKSAVTNLWGKVNINELGGEALGRLLVVYPWTQRFFEAFGDLSSAGAVMGNPK -VKAHGAKVLTSFGDALKNLDDLKGTFAKLSELHCDKLHVDPENFNRLGNVLIVVLARHFS -KDFSPEVQAAWQKLVSGVAHALGHKYH -> HBB_OVIMU -MLTAEEKAAVTGFWGKVKVDEVGAEALGRLLVVYPWTQRFFEHFGDLSSADAVMNNAKV -KAHGKKVLXSFSNGMKHLDDLKGTFAQLSELHCDKLHVXPXXFRXXXXXXXXXXXXHHGS -EFTPVLQAXFQKVVAGVANALAHRYH -> HBB_PAGBE -VEWTDKERSIISDIFSHMDYDDIGPKALSRCLIVYPWTQRHFSGFGNLYNAEAIIGNAN -VAAHGIKVLHGLDRGVKNMDNIAATYADLSTLHSEKLHVDPDNFKLLSDCITIVLAAKMG -HAFTAETQGAFQKFLAVVVSALGKQYH -> HBB_PAGLA -GFLTAEEKGLVNGLWGKVNVDEVGGEALGRLLVVYPWTQRFFQSFGDLSSADAIMHNSK -VKAHGKKVLNSFSDGLKHVDDLKGTFAKLSELHCDKLHVDPENFKLLGNVLVCVLAHHFG -KEFTPQVQAAYQKVVAGVASALAHRYH -> HBB_PANPO -SFLSAEEKNLVSGLWGKVNVDEVGGEALGRLLVVYPWTQRFFQSFGDLSSADAIMSNAK -VKAHGKKVLNSFSDGLKNIDDLKGAFAKLSELHCDKLHVDPENFRLLGNVLVCVLAHHFG -HEFNPQVQAAFQKVVAGVASALAHRYH -> HBB_PANTS -SFLSAEEKGLVNGLWSKVNVDEVGGEALGRLLVVYPWTQRFFQSFGDLSSADAIMSNAK -VKAHGKKVLNSFSDGLKNIDDLKGAFAKLSELHCDKLHVDPENFRLLGNVLVCVLAHHFG -HEFNPQVQAAFQKVVAGVASALAHRYH -> HBB_PAPCY -VHLTPEEKNAVTALWGKVNVDEVGGEALGRLLVVYPWTQRFFDSFGDLSSPAAVMGNPK -VKAHGKKVLGAFSDGLNHLDNLKGTFAQLSELHCDKLHVDPENFKLLGNVLVCVLAHHFG -KEFTPQVQAAYQKVVAGVANALAHKYH -> HBB_PASMO -VQWTAEEKQLITGLWGKVNVAECGGEALARLLIVYPWTQRFFASFGNLSSPTAVLGNPK -VQAHGKKVLTSFGEAVKNLDSIKNTFSQLSELHCDKLHVDPENFRLLGDILVVVLAAHFG -KDFTPDCQAAWQKLVRVVAHALARKYH -> HBB_PHACA -VHWTAEEKQLITGLWGKVNVAECGAEALARLLIVYPWTQRFFASFGNLSSATAITGNPM -VRAHGKKVLTSFGEAVKNLDNIKATFAQLSELHCDKLHVDPENFRLLGDILIIVLAAHFA -KDFTPECQAAWQKLVGAVAHALARKYH -> HBB_PHACO -VHWSAEEKQLITGLWGKVNVADCGAEALARLLIVYPWTQRFFASFGNLSSPTAILGNPM -VRAHGKKVLTSFGDAVKNLDNIKNTFSQLSELHCDKLHVDPENFRLLGDILIIVLAAHFS -KDFTPECQAAWQKLVRVVAHALARKYH -> HBB_PHORU -VHWSAEEKQLITSLWGKVNVADCGAEALARLLIVYPWTQRFFASFGNLSSPTAILGNPM -VRAHGKKVLTSFGEAVKNLDNIKNTFAQLSELHCDKLHVDPENFRLLGDILIIVLAAHFA -KDFTPECQAAWQKLVRVVAHALARKYH -> HBB_PHOVI -VHLTGEEKSAVTALWGKVNVDEVGGEALGRLLVVYPWTQRFFDSFGDLSSADAIMGNPK -VKAHGKKVLNSFSDGLKNLDNLKGTFAKLSELHCDKLHVDPENFKLLGNVLVCVLAHHFG -KEFTPQVQAAYQKVVAGVANALAHKYH -> HBB_PHYCA -VHLTGEEKSGLTALWAKVNVEEIGGEALGRLLVVYPWTQRFFEHFGDLSTADAVMKNPK -VKKHGQKVLASFGEGLKHLDNLKGTFATLSELHCDKLHVDPENFRLLGNVLVVVLARHFG -KEFTPELQTAYQKVVAGVANALAHKYH -> HBB_PIG -VHLSAEEKEAVLGLWGKVNVDEVGGEALGRLLVVYPWTQRFFESFGDLSNADAVMGNPK -VKAHGKKVLQSFSDGLKHLDNLKGTFAKLSELHCDQLHVDPENFRLLGNVIVVVLARRLG -HDFNPDVQAAFQKVVAGVANALAHKYH -> HBB_POTTR -VHLSSEEKGLITSLWGKIDIEQTGGEALGRLLIVYPWTSRFFDHFGDLSSAKAVLGNAK -VLAHGAKVLVSFGDAIKNLDNLKGTFAKLSELHCDKLHVDPENFKLLGNVLVICLAEHFG -KDFTIDAQVAWQKLVAGVANALAHKYH -> HBB_PREEN -VHLTPEEKAAVTALWGKVNVDEVGGEALGRLLVVYPWTQRFFESFGDLSSPDAVMGNPK -VKAHGKKVLGAFSDGLAHLDNLKGTFAQLSELHCDKLHVDPENFRLLGNVLVCVLAHHFG -KEFTPQVQAAYQKVVAGVANALAHKYH -> HBB_PROCR -GFLTAEEKSLVNDLWSKVNVDEVGGEALGRLLVVYPWTQRFFQSFGDLSSADAIMGNGK -VKAHGKKVLNSFSDGLKHIDDLKGTFAKLSELHCDKLHVDPENFKLLGNVLVCVLAHHFG -NEFTPPVQAAYQKVVAGVANALAHKYH -> HBB_PROHA -VHLTDAEKAAVTGLWGKVKVDEYGGEALGRLLVVYPWTQRFFEHFGDLSNADAIMHNPK -VLAHGKKVLSSFGDGLNHLDNLKGTFAQLSELHCDKLHVDPENFRLLGNVLVVVLARHFH -EEFTPDVQAAFQKVVTGVANALAHKYH -> HBB_PROLO -VHLTADEKTAVTTLWGKVNVEEVGGEALGRLLVVYPWTQRFFESFGDLSSADAIMGNPK -VKAHGKKVLNSFSEGLKNLDNLKGTFAKLSELHCDKLHVDPENFRLLGNVLVCVLAHHFG -KEFTPPVQAAYQKVVAGVANALAHKYH -> HBB_PSIKR -VHWSAEEKQLITGLWGKVNVAECGAEALARLLIVYPWTQRFFTSFGNLSSASAVLGNPN -VRAHGKKVLTSFGEAVKNLDNIKNTFAQLSELHCDKLHVDPENFRLLGDILIIVLAGHFG -KDFTPDCQAAWQKLVRAVAHALARKYH -> HBB_PTEAL -VHLSGEEKAAVTGLWGKVKVDEVGGEALGRLLVVYPWTQRFFDSFGDLSSASAVMGNPK -VKAHGKKVLDSFSEGLQHLDNLKGTFAKLSELHCDKLHVDPENFRLLGNVLVCVLARHFG -KEFTPQVQAAYQKVVAGVANALAHKYH -> HBB_PTEBR -VHLTGEEKAAVTALWGKVNVDEVGGEALGRLLVVYPWTQRFFDSFGDLSSPDAVMGNPK -VKAHGKKVLNSFSEGLKNLDNLKGTFAKLSELHCDKLHVDPENFKLLGNVLVCVLAHHFG -KEFTPQVQAAYQKVVAGVANALAHKYH -> HBB_PTEPO -VHLSGEEKAAVTGLWGKVKVDEVGGEALGRLLVVYPWTQRFFDSFGDLSSAPAVMGNPK -VKAHGKKVLDSFSEGLQHLDNLKGTFAKLSELHCDKLHVDPENFRLLGNVLVCVLARHFG -KEFTPQVQAAYQKVVAGVANALAHKYH -> HBB_RABIT -VHLSSEEKSAVTALWGKVNVEEVGGEALGRLLVVYPWTQRFFESFGDLSSANAVMNNPK -VKAHGKKVLAAFSEGLSHLDNLKGTFAKLSELHCDKLHVDPENFRLLGNVLVIVLSHHFG -KEFTPQVQAAYQKVVAGVANALAHKYH -> HBB_RANES -GSDLVSGFWGKVDAHKIGGEALARLLVVYPWTQRYFTTFGNLGSADAICHNAKVLAHGE -KVLAAIGEGLKHPENLKAHYAKLSEYHSNKLHVDPANFRLLGNVFITVLARHFQHEFTPE -LQHALEAHFCAVGDALAKAYH -> HBB_RANTA -MLTSEEKAAVTGFWGKVKVDEVGAEALGRLLVVYPWTQRFFEHFGDLSSADAIMHNDKV -KAHGKRVLDAFSDGLKHLDDLKGAFAKLSELHCDKLHVDPENFRLLGNVLVVVLARHFGK -DFTPVLQADYQKVVTGVANALAHRYH -> HBB_RHEAM -VQWTAEEKQLITGLWGKVNVADCGAEALARLLIVYPWTQRFFASFGNLSSPTAILGNPM -VRAHGKKVLTSFGDAVKNLDNIKNTFAQLSELHCDKLHVDPENFRLLGDILIIVLAAHFA -KDFTPECQAAWQKLVRVVAHALARKYH -> HBB_RHIUN -VDLTAEEKAAVLALWGKVNEDEVGGEALGRLLVVYPWTQRFFDSFGDLSTPAAVLGNAK -VKAHGKKVLHSFGDGVHNLDNLKGTYAALSELHCDKLHVDPENFRLLGNVLVVVLAQHFG -QEFTPELQAAYQKVVAGVANALAHKYH -> HBB_ROUAE -VHLSGEEKAAVTALWGKVKVEEVGGEALGRLLVVYPWTQRFFDSFGDLSSASAVMSNPK -VKAHGKKVLDSFSEGLQHLDSLKGTFAKLSELHCDKLHVDPENFRLLGNVLVCVLARHFG -KEFTPQVQAAYQKVVAGVATALAHKYH -> HBB_SAGFU -VHLTGEEKSAVTTLWGKVNVEEVGGEALGRLLVVYPWTQRFFESFGDLSSPDAVMGNPK -VKAHGKKVLGAFSDGLAHLDNLKGTFAQLSELHCNKLHVDPENFRLLGNVLVCVLAHHFG -KEFTPQVQAAYQKVVAGVANALAHKYH -> HBB_SAGMY -VHLTGEEKSAVTTLWGKVNVEEVGGEALGRLLVVYPWTQRFFDSFGDLSSPDAVMNNPK -VKAHGKKVLGAFSDGLAHLDNLKGTFAQLSELHCDKLHVDPENFRLLGNVLVCVLAHHFG -KEFTPQVQAAYQKVVAGVANALAHKYH -> HBB_SAGNI -VHLTGEEKSAVTTLWGKVNVEEVGGEALGRLLVVYPWTQRFFESFGDLSSPDAVMNNPK -VKAHGKKVLGAFSDGLAHLDNLKGTFAQLSELHCDKLHVDPENFRLLGNVLVCVLAHHFG -KEFTPQVQAAYQKVVAGVANALAHKYH -> HBB_SAISC -VHLTGDEKAAVTALWGKVNVEDVGGEALGRLLVVYPWTQRFFESFGDLSTPDAVMNNPK -VKAHGKKVLGAFSDGLAHLDNLKGTFAQLSELHCDKLHVDPENFRLLGNVLVCVLAHHFG -KEFTPQVQAAYQKVVAGVANALAHKYH -> HBB_SHEEP -MLTAEEKAAVTGFWGKVKVDEVGAEALGRLLVVYPWTQRFFEHFGDLSNADAVMNNPKV -KAHGKKVLDSFSNGMKHLDDLKGTFAQLSELHCDKLHVDPENFRLLGNVLVVVLARHHGN -EFTPVLQADFQKVVAGVANALAHKYH -> HBB_SPAEH -VHLTDAEKAAVSGLWSKVNVDEIGGEALGRLLVVYPWTQRFFDSFGDLSSPSAVMSNPK -VKAHGKKVLNSFSEGLKHLDNLKGTFSSLSELHCDKLHVDPENFKLLGNVIVVVLAHHLG -KDFTPAAQAAFQKVVAGVATALAHKYH -> HBB_SPECI -VHLSDGEKNAISTAWGKVHAAEVGAEALGRLLVVYPWTQRFFDSFGDLSSASAVMGNAK -VKAHGKKVIDSFSNGLKHLDNLKGTFASLSELHCDKLHVDPENFKLLGNMIVIVMAHHLG -KDFTPEAQAAFQKVVAGVANALAHKYH -> HBB_SPETO -VHLTDGEKNAISTAWGKVNAAEIGAEALGRLLVVYPWTQRFFDSFGDLSSASAVMGNAK -VKAHGKKVIDSFSNGLKHLDNLKGTFASLSELHCDKLHVDPENFKLLGNMIVIVMAHHLG -KDFTPEAQAAFQKVVAGVANALSHKYH -> HBB_SQUAC -VHWTGEEKALVNAVWTKTDHQAVVAKALERLFVVYPWTKTYFVKFNGKFHASDSTVQTH -AGKVVSALTVAYNHIDDVKPHFVELSKKHYEELHVDPENFKLLANCLEVELGHALHKEFT -PEVQAAWSKFSNVVVDALSKGYH -> HBB_STRCA -VQWSAEEKQLISGLWGKVNVADCGAEALARLLIVYPWTQRFFASFGNLSSPTAILGNPM -VRAHGKKVLTSFGDAVKNLDNIKNTFAQLSELHCDKLHVDPENFRLLGDILIIVLAAHFT -KEFTPECQAAWQKLVRVVAHALARKYH -> HBB_STUVU -VQWTAEEKQLITGLWGKVNVAECGAEALARLLIVYPWTQRFFASFGNLSSPTAVLGNPK -VQAHGKKVLTSFGDAVKNLDSIKNTFSQLSELHCDKLHVDPENFRLLGDILVVVLAAHFG -KDFTPDCQAAWQKLVRVVAHALARKYH -> HBB_SUNMU -VHLSGEEKACVTGLWGKVNEDEVGAEALGRLLVVYPWTQRFFDSFGDLSSASAVMGNPK -VKAHGKKVLHSLGEGVANLDNLKGTFAKLSELHCDKLHVDPENFRLLGNVLVVVLASKFG -KEFTPPVQAAFQKVVAGVANALAHKYH -> HBB_TACAC -VHLSGSEKTAVTNLWGHVNVNELGGEALGRLLVVYPWTQRFFESFGDLSSADAVMGNAK -VKAHGAKVLTSFGDALKNLDNLKGTFAKLSELHCDKLHVDPENFNRLGNVLVVVLARHFS -KEFTPEAQAAWQKLVSGVSHALAHKYH -> HBB_TADBR -VHLSGEEKGAVTALWGKVNQEEVGGEALGRLLVVYPWTQRFFDSFGDLSSASAVMGNAK -VKAHGKKVLNSFSDGLKNLDNLKGAFAKLSELHCDKLHVDPENFKLLGNVLVVVLARTFG -KEFTPPVQSAFQKVAAGVATALAHKYH -> HBB_TALEU -VHLSGEEKGLVTGMWGKVNVDEVGGEALGRLLVVYPWTQRFFDSFGDLSSASAIMGNAK -VKAHGKKVANSITDGVKNLDNLKGTYAKLSELHCDKLHVDPENFRLLGNVLVCVLARNLG -KEFTPQAQAAFQKVVLGVATALAHKYH -> HBB_TARBA -VHLTADEKAAVTALWGKVDVEDVGGEALGRLLVVYPWTQRFFDSFGDLSTPAAVMGNAK -VKAHGKKVLNAFSEGMAHLDNLKGTFAKLSELHCDKLHVDPENFRLLGNVLVCVLAHHFG -KEFTPQVQAAYQKVVAGVATALAHKYH -> HBB_TARSY -VHLTAEEKAAVTALWGKVDVEDVGGEALGRLLVVYPWTQRFFDSFGDLSTPAAVMSNAK -VKAHGKKVLNAFSDGMAHLDNLKGTFAKLSELHCDKLHVDPENFRLLGNVLVCVLAHHFG -KEFTPQVQAAYQKVVAGVATALAHKYH -> HBB_THEGE -VHLTPEEKNAVTTLWGKVNVDEVGGEALGRLLVVYPWTQRFFDSFGDLSSPAAVMGNPK -VKAHGKKVLGAFSDGLNHLDNLKGTFAQLSELHCDKLHVDPENFKLLGNVLVCVLAHHFG -KEFTPQVQAAYQKVVAGVANALAHKYH -> HBB_THUTH -VEWTQQERSIIAGFIANLNYEDIGPKALARCLIVYPWTQRYFGAYGDLSTPDAIKGNAK -IAAHGVKVLHGLDRAVKNMDNINEAYSELSVLHSDKLHVDPDNFRILGDCLTVVIAANLG -DAFTVETQCAFQKFLAVVVFALGRKYH -> HBB_TRAST -MLTAEEKAAVTAFWGKVKVDEVGGEALGRLLVVYPWTQRFFESFGDLSTADAVMNNPKV -KAHGKKVLDSFSNGMKHLDDLKGTFAALSELHCDKLHVDPENFKLLGNVLVVVLARHFGK -EFTPELQADYQKVVTGVANALAHRYH -> HBB_TRIIN -VHLTPEEKALVIGLWAKVNVKEYGGEALGRLLVVYPWTQRFFEHFGDLSSASAIMNNPK -VKAHGEKVFTSFGDGLKHLEDLKGAFAELSELHCDKLHVDPENFRLLGNVLVCVLARHFG -KEFSPEAQAAYQKVVAGVANALAHKYH -> HBB_TUPGL -VHLSGEEKAAVTGLWGKVDLEKVGGQSLGSLLIVYPWTQRFFDSFGDLSSPSAVMSNPK -VKAHGKKVLTSFSDGLNHLDNLKGTFAKLSELHCDKLHVDPENFRLLGNVLVRVLACNFG -PEFTPQVQAAFQKVVAGVANALAHKYH -> HBB_TURME -VQWTAEEKQLITGLWGKVNVAECGGEALARLLIVYPWTQRFFASFGNLSSPTAVLGNPK -VQAHGKKVLTSFGEAVKNLDSIKGTFAQLSELHCDKLHVDPENFRLLGDILVVVLAAHFG -KDFTPDCQAAWQKLVRVVAHALARKYH -> HBB_TURTR -VHLTGEEKSAVTALWGKVNVEEVGGEALGRLLVVYPWTQRFFESFGDLSTADAVMKNPN -VKKHGQKVLASFGEGLKHLDDLKGTFAALSELHCDKLHVDPENFRLLGNVLVVVLARHFG -KEFTPELQSAYQKVVAGVATALAHKYH -> HBB_URSMA -VHLTGEEKSLVTGLWGKVNVDEVGGEALGRLLVVYPWTQRFFDSFGDLSSADAIMNNPK -VKAHGKKVLNSFSDGLKNLDNLKGTFAKLSELHCDKLHVDPENFKLLGNVLVCVLAHHFG -KEFTPQVQAAYQKVVAGVANALAHKYH -> HBB_VULGR -VHWSAEEKQLITGLWGKVNVAECGAEALARLLIVYPWTQRFFASFGNLSSPTAIIGNPM -VRAHGKKVLTSFGEAVKNLDNIKNTFAQLSELHCEKLHVDPENFRLLGDILIIVLAAHFA -KDFTPDCQAAWQKLVRAVAHALARKYH -> HBB_VULVV -VHLTAEEKSLVTGLWGKVNVDEVGGEALGRLLIVYPWTQRFFDSFGDLSTPDAVMGNAK -VKAHGKKVLNSFSDGLKNLDNLKGTFAKLSELHCDKLHVDPENFKLLGNVLVCVLAHHFG -KEFTPQVQAAYQKVVAGVANALAHKYH -> HBD_AOTTR -VHLTGDEKSAVAALWGKVNVEEVGGEALGRLLVVYPWTQRFFESFGALSSPDAVMGNPK -VKAHGKKVLGAFSDGLAHLDNLKGTFAQLSELHCDKLHVDPENFRLLGNVLVCVLARNFG -KEFTPLLQAAFQKVVAGVATALAHKYH -> HBD_ATEGE -VHLTPEEKAAVAALWGKVNVDEVGGEALGRLLVVYPWTQRFFESFGDLSTPAAVMGNPK -VKAHGKKVLGAFSDGLAHLDNLKGTFAQLSELHCDKLHVDPENFRLLGNVLVCVLARNFG -KEFTPQVQAAFQKVVAGVATALAHKYH -> HBD_COLPO -VHLTPEEKTVVSALWGKVNVDAVGGEALGRLLVVYPWTQRFFESFGDLSSPAAVMGNPK -VKAHGKKVLGAFSDGLAHLDSLKGTFSQLSELHCDKLHVDPENFRLLGNVLVCVLAHNFG -KEFTPQVQAAYQKVVAGVANALAHKYH -> HBD_HUMAN -VHLTPEEKTAVNALWGKVNVDAVGGEALGRLLVVYPWTQRFFESFGDLSSPDAVMGNPK -VKAHGKKVLGAFSDGLAHLDNLKGTFSQLSELHCDKLHVDPENFRLLGNVLVCVLARNFG -KEFTPQMQAAYQKVVAGVANALAHKYH -> HBD_PANTR -VHLTPEEKTAVNALWGKVNVDAVGGEALGRLLVVYPWTQRFFESFGDLSSPDAVMGNPK -VKAHGKKVLGAFSDGLAHLDNLKGTFSQLSELHCDKLHVDPENFRLLGNVLVCVLARNFG -KEFTPQVQAAYQKVVAGVANALAHKYH -> HBD_SAGMY -VHLTGDEKSAVAALWSKVNVDEVGGEALGRLLVVYPWTQRFFESFGALSSPDAVMGNPK -VKAHGKKVLGAFSDGLAHLDNLKGTFAQLSELHCDKLHVDPENFRLLGNVLVCVLARNFG -KEFTPRVQAAFQKVVAGVATALAHKYH -> HBD_SAISC -VHLTGDEKSAVAALWSKVNVDEVGGEALGRLLVVYPWTQRFFESFGALSSADAVMGNPK -VKAHGKKVLGAFSDGLAHLDNLKGTFAQLSELHCDKLHVDPENFRLLGNVLVCVLARNFG -KEFTPQVQAAFQKVVAGVATALAHKYH -> HBD_TARSY -VHLTADEKAAVTALWSKVNVEDVGGEALGRLLVVYPWTQRFFDSFGDLSTPAAVMSNAK -VKAHGKKVLNAFSDGMAHLDNLKGTFAKLSELHCDKLHVDPENFRLLGNVLVCVLAHHFG -KQFTPQLQAAYQKVVAGVAAALAHKYH -> HBE1_CAPHI -VHFTAEEKAAITGLWGKVNVEEAGGEALGRLLVVYPWTQRFFDSFGNLSSASAIMGNPK -VKAHGKKVLTSFGEAIKNLDNLKGAFAKLSELHCDKLHVDPENFRLLGNVIVIILATHFG -REFTPDVQAAWQKLVSGVATALAHKYH -> HBE2_BOVIN -VHFTTEENVAVASLWAKVNVEVVGGESLARLLIVCPWTQRFFDSFGNLYSESAIMGNPK -VKVYGRKVLNSFGNAIKHMDDLKGTFADLSELHCDKLHVDPENFRLLGNMILIVLATHFS -KEFTPQMQAAWQKLTNAVANALTHKYH -> HBE2_CAPHI -VHFTTEEKAAVASLWAKVNVEVVGGESLARLLIVYPWTQRFFDSFGNLCSESAIMGNPK -VKAHGRKVLNSFGNAIKHMDDLKGTFADLSELHCDKLHVDPPNFRLLGNMILIVLATHFS -KEFTPQMQAAWQKLTNAVANALAHKYH -> HBE4_BOVIN -VHFTTEEKAAVASLWAKVNVEVVGGESLARLLIVYPWTQRFFDSFGNLYSESAIMGNPK -VKAHGRKVLNSFGNAIEHMDDLKGTFADLSELHCDKLHVDPENFRLLGNMILIVLATHFS -KEFTPQMQASWQKLTNAVANALAHKYH -> HBE_CAIMO -VHWSAEEKQLITGLWGKVNVEECGAEALARLLIVYPWTQRFFSSFGNLSSPTAIIGNPK -VPPHGRKFFTSFGEPVKNLDNIKNTYAKLSELHCEKLQVEPENFRLLGDILIIVLASHFA -RDFTPACQFPWQKLVSVVAHALPRKYH -> HBE_CHICK -VHWSAEEKQLITSVWSKVNVEECGAEALARLLIVYPWTQRFFASFGNLSSPTAIMGNPR -VRAHGKKVLSSFGEAVKNLDNIKNTYAKLSELHCDKLHVDPENFRLLGDILIIVLASHFA -RDFTPACQFAWQKLVNVVAHALARKYH -> HBE_DIDMA -VHFTPEDKTNITSVWTKVDVEDVGGESLARLLVVYPWTQRFFDSFGNLSSASAVMGNPK -VKAHGKKVLTSFGEGVKNMDNLKGTFAKLSELHCDKLHVDPENFRLLGNVLIIVLASRFG -KEFTPEVQASWQKLVSGVSSALGHKYH -> HBE_GALCR -VHFTAEEKAIIMSLWGKVNIEEAGGEALGRLLVVYPWTQRFFETFGNLSSASAIMGNPK -VKAHGKKVLTSFGEAVKNMDNLKGAFAKLSELHCDKLHVDPENFKLLGNVMVIILATHFG -KEFTPDVQAAWQKLVSGVATALAHKYH -> HBE_HUMAN -VHFTAEEKAAVTSLWSKMNVEEAGGEALGRLLVVYPWTQRFFDSFGNLSSPSAILGNPK -VKAHGKKVLTSFGDAIKNMDNLKPAFAKLSELHCDKLHVDPENFKLLGNVMVIILATHFG -KEFTPEVQAAWQKLVSAVAIALAHKYH -> HBE_LEMFU -VHFTAEEKSTILSLWGKVNVEEAGGEALGRLLVVYPWTQRFFDNFGNLSSASAILGNPK -VKAHGKKVLTSFGEAVKNMDNLKGAFAKLSELHCDKLHVDPENFKLLGNVMVIILATHFG -KEFTPDVQAAWQKLVSGVATALAHKYH -> HBE_MOUSE -VNFTAEEKTLINGLWSKVNVEEVGGEALGRLLVVYPWTQRFFDSFGNLSSASAIMGNPR -VKAHGKKVLTAFGESIKNLDNLKSALAKLSELHCDKLHVDPENFKLLGNVLVIVLASHFG -NEFTAEMQAAWQKLVAGVATALSHKYH -> HBE_PIG -VHFTAEEKAVITGLWSRVNVEETGGEAVGRLLVVYPWTQRFFDSFGNMSSPSAIMGNPK -VKAHGKKVLTSFGDAVKNMDNLKGTFAKLSELHCDKLHVDPENFRLLGNMIVIILASHFG -REFTPEVQAAWQKLVAGVATALAHKYH -> HBE_PONPY -VHFTAEEKAAVTSLWSKMNVEEAGGEALGRLLVVYPWTQRFFDSFGNLSSPSAILGNPK -VKAHGKKVLTSFGDAIKNMDNLKTTFAKLSELHCDKLHVDPENFKLLGNVMVIILATHFG -KEFTPEVQAAWQKLVSAVAIALAHKYH -> HBE_RABIT -VHFTPEEKCIISKQWGQVNIDETGGEALGRLLVVYPWTQRFFDNFGNLSSSSAIMGNPK -VKAHGKKVLTSFGDAIKNMDNLKGAFAKLSELHCDKLHVDPENFKLLGNVLLIVLATHFG -KEFTPEVQAAWQKLVSGVAIALAHKYH -> HBE_TARSY -VHLTAEEKSSVTSLWGKMNVDEAGGEALGRLLVVYPWTQRFFDNFGNLSSSSAIMGNPK -VKAHGKKVLTSFGDAIKNMDNLKGAFAKLSELHCDKLHVDPENFRLLGNVLVIILVTHFG -KDFTPEVQVAWQKLVSGVATALAHKYH -> HBF1_URECA -GLTTAQIKAIQDHWFLnIKGCLQAAADSIFFKYLTAYPGDLAFFHKFSSVPLYGLRSNP -AYKAQTLTVINYLDKVVDALGGNAGALMKAKVPSHDAMGITPKHFGQLLKLVGGVFQEEF -SADPTTVAAWGDAAGVLVAAMK -> HBG1_PONPY -GHFTEEDKATITSLWGKVNVEDAGGETLGRLLVVYPWTQRFFDSFGNLSSASAIMGNPK -VKAHGKKVLTSLGDAIKNLDDLKGTFAQLSELHCDKLHVDPENFRLLGNVLVTVLAIHFG -KEFTPEVQASWQKMVTGVASALSSRYH -> HBG2_PONPY -SNFTAEDKAAITSLWGKLNVEDAGGETLGRLLLVYPWTQRFFDSFGSLSSPSAIMGNPK -VKAHGVKVLTSLGGAVKNLDDLKGTFGQLSELHCDKLHVDPENFRLLGNVLVTVLAILHG -KEFTPEVQASWQKMVAAVASALASRYH -> HBG_ATEGE -SNFTAEDKAAITSLWGKVNVEDAGGETLGRLLVVYPWTQRFFDSFGSLSSPSAIMGNPK -VKAHGVKVLTSLGEAIKNLDDLKGTFGQLSELHCDKLHVDPENFRLLGNVLVTVLAILHG -KEFTPEVQASWQKMVAGVASALASRYH -> HBG_CHEME -VHFTVEEKAVITSLWGKVNVEEAGGEALGRLLVVYPWTQRFFDNFGNLSSASAIMGNPK -VKAHGKKVLTSLGEAIKNMDDLKGTFAHLSELHCDRLHVDPENFKLLGNELVIVLAKHFG -KEFTPQVQAAWQKMVAGVAIALAHKYH -> HBG_GALCR -VHFTAEEKAIITSLWGKVNVEEDGGEALGRLLVVYPWTQRFFDTFGNLSSASAIMGNPK -VKAHGKKVLSSLGEAIKNMDDLKGTFSHLSELHCDRLHVDPENFRLLGNVLVIVLAKHFG -KEFTPQIQAASQKMVAGVATALAHKYH -> HBG_GORGO -GHFTEEDKATITSLWGKVNVEDAGGETLGRLLVVYPWTQRFFDSFGNLSSASAIMGNPK -VKAHGKKVLTSLGGAIKHLDDLKGTFAQLSELHCDKLHVDPENFRLLGNVLVTVLAIHFG -KEFTPEVQASWQKMVTAVASALSSRYH -> HBG_HUMAN -GHFTEEDKATITSLWGKVNVEDAGGETLGRLLVVYPWTQRFFDSFGNLSSASAIMGNPK -VKAHGKKVLTSLGDAIKHLDDLKGTFAQLSELHCDKLHVDPENFKLLGNVLVTVLAIHFG -KEFTPEVQASWQKMVTAVASALSSRYH -> HBG_HYLLA -GHFTEEDKATITSLWGKVNVEDAGGETLGRLLVVYPWTQRFFDSFGNLSSASAIMGNPK -VKAHGKKVLTSLGGAIKNLDDLKGTFAQLSELHCDKLHVDPENFRLLGNVLVTVLAIHFG -KEFTPEVQASWQKMVAGVASALSSRYH -> HBG_LEMFU -VHFTAEEKAVITSLWGKVNVEEAGGEALGRLLVVYPWTQRFFDNFGNLSSASAIMGNPK -VKAHGKKVLTSLGDAIKNMDDLKGTFAHLSELHCDRLHVDPENFKLLGNELVIVLAKYFG -KEFTPQVQAAWQKMVAGVAIALAHKYH -> HBG_MACMU -GHFTEEDKATITSLWGKVNVEDAGGETLGRLLVVYPWTQRFFDSFGNLSSASAIMGNPK -VKAHGKKVLTSLGDAIKNLDDLKGTFAQLSELHCDKLHVDPENFRLLGNVLVTVLAIHFG -KEFTPEVQASWQKMVAGVASALSSRYH -> HBG_MACNE -GHFTEEDKATITSLWGKVNVEDAGGETLGRLLVVYPWTQRFFDSFGNLSSASAIMGNPK -VKAHGKKVLTSLGDAIKNLDDLKGTFAQLSELHCDKLHVDPENFRLLGNVLVTVLAIRFG -KEFTPEVQASWQKMVAGVASALSSRYH -> HBG_RABIT -VHFTAEEKAAITSTWKLVDVEDAGAEALGRLLVVYPWTQRFFDSFGNLSSSSAIMGNPK -VKAHGKKVLTAFGDAVKNVDDLKNTFAHLSELHCDRLHVDPENFKLLGNVLVIVLAKYFG -KEFTPQVQSAWQKLVAGVATALAHKYH -> HBG_TARSY -VHFTAEEKAIITSLWAKVNVEETGGEALGRLLVVYPWTQRFFDNFGNLSSASAIMGNPK -VKAHGKKVLSSLGEAVTHMDDLKDAFAHLSRLHCDELHVDPENFRVTpGKRAVIVLAHHF -GREFTPQVQAAWKKLMSAVAIAMGHKYH -> HBP1_CASGL -ALTEKQEALLKQSWEVLKQNIPAHSLRLFALIIEAAPESKYVFSFLKDSNEIPENNPKL -KAHAAVIFKTICESATELRQKGHAVWDNNTLKRLGSIHLKNKITDPHFEVMKGALLGTIK -EAIKENWSDEMGQAWTEAYNQLVATIKAEMKE -> HBP2_CASGL -MSTLEGRGFTEEQEALVVKSWSAMKPNAGELGLKFFLKIFEIAPSAQKLFSFLKDSNVP -LERNPKLKSHAMSVFLMTCESAVQLRKAGKVTVRESSLKKLGASHFKHGVADEHFEVTKF -ALLETIKEAVPETWSPEMKNAWGEAYDKLVAAIKLEMKPSS -> HBPI_CAIMO -TLTQAEKAAVITIWTKVATQADAIGAESLERLFSSYPQTKTYFPHFDLSQGSTQLRGHG -SKVMNAIGEAVKNIDDIRGALAKLSELHAYILRVDPVNFKLLCHCILCSVAARYPSDFTP -EVHAAWDKFLSSVSSVLTEKYR -> HBPI_CHICK -ALTQAEKAAVTTIWAKVATQIESIGLESLERLFASYPQTKTYFPHFDVSQGSVQLRGHG -SKVLNAIGEAVKNIDDIRGALAKLSELHAYILRVDPVNFKLLSHCILCSVAARYPSDFTP -EVHAEWDKFLSSISSVLTEKYR -> HBPL_PARAD -SSSEVNKVFTEEQEALVVKAWAVMKKNSAELGLQFFLKIFEIAPSAKNLFSYLKDSPVP -LEQNPKLKPHATTVFVMTCESAVQLRKAGKATVKESDLKRIGAIHFKTGVVNEHFEVTRF -ALLETIKEAVPEMWSPEMKNAWGVAYDQLVAAIKFEMKPSST -> HBPL_TRETO -MSSSEVDKVFTEEQEALVVKSWAVMKKNSAELGLKFFLKIFEIAPSAKNLFSYLKDSPI -PLEQNPKLKPHAMTVFVMTCESAVQLRKAGKVTVRESNLKRLGAIHFKNGVVNEHFETRF -ALLETIKEAVPEMWSPEMKNAWGEAYDQLVAAIKSEMKPSST -> HBRH_CHICK -VHWSAEEKQLITSVWSKVNVEECGAEALARLLIVYPWTQRFFDNFGNLSSPTAIIGNPK -VRAHGKKVLSSFGEAVKNLDNIKNTYAKLSELHCEKLHVDPENFRLLGNILIIVLAAHFT -KDFTPTCQAVWQKLVSVVAHALAYKYH -> HBT_PIG -VHFTAEEKSVITGLWGKVNVEETGGEAVGRLLVVYPWTQRFFDSFGNMSSPSAIMGNPK -VKAHGKKVLTSFGDAVKNMDNLKGTFAKLSELHCDKLHVDPENFRLLGNMIVIILASHFG -GEFTPEVQAAWQKLVAGVATALAHKYH -> LGB1_LUPLU -GVLTDVQVALVKSSFEEFNANIPKNTHRFFTLVLEIAPGAKDLFSFLKGSSEVPQNNPD -LQAHAGKVFKLTYEAAIQLQVNGAVASDATLKSLGSVHVSKGVVDAHFPVVKEAILKTIK -EVVGDKWSEELNTAWTIAYDELAIIIKKEMKDAA -> LGB1_MEDSA -MSFTDKQEALVNSSWEAFKQNLPRYSVFFYTVVLEKAPAAKGLFSFLKNSAEVQDSPQL -QAHAEKVFGLVRDSAVQLRATGGVVLGDATLGAIHVRKGVVDPHFVVVKEALLKTIKEAA -GDKWSEELNTAWEVAYDALATAIKKAMS -> LGB1_MEDTR -MSFTDKQEALVNSSYEAFKQNLSGYSVFFYTVILEKAPAAKGLFSFLKDSAGVQDSPQL -QAHAEKVFGLVRDSASQLRATGGVVLGDAALGAIHIQKGVVDPHFVVVKEALLKTIKEAA -GDKWSEELSTAWEVAYDALATEIKKAMS -> LGB1_PEA -GFTDKQEALVNSSSEFKQNLPGYSILFYTIVLEKAPAAKGLFSFLKDTAGVEDSPKLQA -HAEQVFGLVRDSAAQLRTKGEVVLGNATLGAIHVQKGVTNPHFVVVKEALLQTIKKASGN -NWSEELNTAWEVAYDGLATAIKKAMKTA -> LGB1_SOYBN -GAFTEKQEALVSSSFEAFKANIPQYSVVFYNSILEKAPAAKDLFSFLANGVDPTNPKLT -GHAEKLFALVRDSAGQLKTNGTVVADAALVSIHAQKAVTDPQFVVVKEALLKTIKEAVGG -NWSDELSSAWEVAYDELAAAIKKA -> LGB1_VICFA -GFTEKQEALVNSSSQLFKQNPSNYSVLFYTIILQKAPTAKAMFSFLKDSAGVVDSPKLG -AHAEKVFGMVRDSAVQLRATGEVVLDGKDGSIHIQKGVLDPHFVVVKEALLKTIKEASGD -KWSEELSAAWEVAYDGLATAIKAA -> LGB2_LUPLU -GALTESQAALVKSSWEEFNANIPKHTHRFFILVLEIAPAAKDLFSFLKGTSEVPQNNPE -LQAHAGKVFKLVYEAAIQLQVTGVVVTDATLKNLGSVHVSKGVADAHFPVVKEAILKTIK -EVVGAKWSEELNSAWTIAYDELAIVIKKEMNDAA -> LGB2_MEDTR -MGFTEKQEALVNSSWELFKQNPGNSVLFYTIILEKAPAAKGMFSFLKDTAGVQDSPKLQ -SHAEKVFGMVRDSAVQLRATGGVVLGDATLGAIHIQKGVVDPHFVVVKEALLKTIKEVSG -DKWSEELSTAWEVAYDALAAAIKKAMG -> LGB2_SESRO -GFTEKQEALVNASYEAFKQNLPGNSVLFYSFILEKAPAAKGMFSFLKDSDGVPQNNPSL -QAHAEKVFGLVRDSAAQLRATGVVVLADASLGSVHVQKGVLDPHFVVVKEALLKTLKEAA -GATWSDEVSNAWEVAYDGLSAAIKKAMS -> LGB2_SOYBN -GAFTEKQEALVSSSFEAFKANIPQYSVVFYTSILEKAPAAKDLFSFLSNGVDPSNPKLT -GHAEKLFGLVRDSAGQLKANGTVVADAALGSIHAQKAITDPQFVVVKEALLKTIKEAVGD -KWSDELSSAWEVAYDELAAAIKKAF -> LGB3_MEDSA -MGFTDKQEALVNSSWESFKQNPGNSVLFYTIILEKAPAAKGMFSFLKDSAGVQDSPKLQ -SHAEKVFGMVRDSAAQLRATGGVVLGDATLGAIHIQKGVVDPHFAVVKEALLKTIKEVSG -DKWSEELNTAWEVAYDALATAIKKAMV -> LGB3_SESRO -GFTEKQEALVNASYEAFKQNLPGNSVLFYSFILEKAPAAKGMFSFLKDFDEVPQNNPSL -QAHAEKVFGLVRDSAAQLRATGVVVLADASLGSVHVQKGVLDPHFVVVKEALLKTLKEAG -GATWSDEVSNAWEVAYDELSAAIKKAMS -> LGB3_SOYBN -GAFTDKQEALVSSSFEAFKTNIPQYSVVFYTSILEKAPVAKDLFSFLANGVDPTNPKLT -GHAEKLFGLVRDSAGQLKASGTVVIDAALGSIHAQKAITDPQFVVVKEALLKTIKEAVGD -KWSDELSSAWEVAYDELAAAIKKAF -> LGB4_MEDSA -MGFTADQEALVNSSWESFKQNLPGYSVFFYTTILEKAPAAKGMFSFLKDSAGVQDSPQL -QAHAEKVFGMVRDSAVQLRATGEVVLGDATLGSIHIQKGVVDPHFVVVKEALLKTIKEAV -GDKWSEELSTSWEVAYDGLASAIKKAMS -> LGBA_PHAVU -GAFTEKQEALVNSSWEAFKGNIPQYSVVFYTSILEKAPAAKNLFSFLANGVDPTNPKLT -AHAESLFGLVRDSAAQLRANGAVVADAALGSIHSQKGVSNDQFLVVKEALLKTLKQAVGD -KWTDQLSTALELAYDELAAAIKKAYA -> LGBA_SOYBN -VAFTEKQDALVSSSFEAFKANIPQYSVVFYTSILEKAPAAKDLFSFLANGVDPTNPKLT -GHAEKLFALVRDSAGQLKASGTVVADAALGSVHAQKAVTDPQFVVVKEALLKTIKAAVGD -KWSDELSRAWEVAYDELAAAIKKA -> LGB_PSOTE -MGGFTEKQEALVNSSYEAFKANVPQYSVVFYTSILEKAPAAKDLFPFLANGVDPTNPKL -IGHAEKLFGLVHDSAAQLRAKGAVVADAALGSLHAQKGVTDPQFVVVKEALLKTVKEAVG -DKWSDELSNAWEVAYNELAAALKKAF -> MYG_ALLMI -MELSDQEWKHVLDIWTKVESKLPEHGHEVIIRLLQEHPETQERFEKFKHMKTADEMKSS -EKMKQHGNTVFTALGNILKQKGNHAEVLKPLAKSHALEHKIPVKYLEFISEIIVKVIAEK -YPADFGADSQAAMRKALELFRNDMASKYKEFGYQG -> MYG_AOTTR -GLSDGEWQLVLNVWGKVEADVPSHGQEVLISLFKGHPETLEKFDKFKHLKSEDEMKASE -ELKKHGVTVLTALGGILKKKGHHEAELKPLAQSHATKHKIPVKYLEFISDAIVHVLQKKH -PGDFGADAQGAMKKALELFRNDMAAKYKELGFQG -> MYG_APTFO -GLNDQEWQQVLTMWGKVESDLAGHGHAVLMRLFKSHPETMDRFDKFRGLKTPDEMRGSE -DMKKHGVTVLTLGQILKKKGHHEAELKPLSQTHATKHKVPVKYLEFISEAIMKVIAQKHA -SNFGADAQEAMKKALELFRNDMASKYKEFGFQG -> MYG_BALAC -VLSDAEWHLVLNIWAKVEADVAGHGQDILIRLFKGHPETLEKFDKFKHLKTEAEMKASE -DLKKHGNTVLTALGGILKKKGHHEAELKPLAQSHATKHKIPIKYLEFISDAIIHVLHSRH -PAEFGADAQAAMNKALELFRKDIAAKYKELGFQG -> MYG_BALPH -VLTDAEWHLVLNIWAKVEADVAGHGQDILISLFKGHPETLEKFDKFKHLKTEAEMKASE -DLKKHGNTVLTALGGILKKKGHHEAELKPLAQSHATKHKIPIKYLEFISDAIIHVLHSRH -PADFGADAQAAMNKALELFRKDIAAKYKELGFQG -> MYG_BOVIN -GLSDGEWQLVLNAWGKVEADVAGHGQEVLIRLFTGHPETLEKFDKFKHLKTEAEMKASE -DLKKHGNTVLTALGGILKKKGHHEAEVKHLAESHANKHKIPVKYLEFISDAIIHVLHAKH -PSDFGADAQAAMSKALELFRNDMAAQYKVLGFHG -> MYG_CALJA -GLSDGEWQLVLNVWGKVEADIPSHGQEVLISLFKGHPETLEKFDKFKHLKSEDEMKASE -ELKKHGVTVLTALGGILKKKGHHEAELKPLAQSHATKHKIPVKYLEFISDAIVHVLQKKH -PGDFGADAQGAMKKALELFRNDMAAKYKELGFQG -> MYG_CANFA -GLSDGEWQIVLNIWGKVETDLAGHGQEVLIRLFKNHPETLDKFDKFKHLKTEDEMKGSE -DLKKHGNTVLTALGGILKKKGHHEAELKPLAQSHATKHKIPVKYLEFISDAIIQVLQSKH -SGDFHADTEAAMKKALELFRNDIAAKYKELGFQG -> MYG_CASFI -GLSDGEWQLVLHVWGKVEADLAGHGQEVLIRLFKGHPETLEKFNKFKHIKSEDEMKASE -DLKKHGVTVLTALGGVLKKKGHHEAEIKPLAQSHATKHKIPIKYLEFISEAIIHVLQSKH -PGXFGADAXGAMNKALELFRKDIAAKYKELGFQG -> MYG_CEBAP -GLSDGEWQLVLNVWGKVEADIPSHGQEVLISLFKGHPETLEKFDKFKHLKSEDEMKASE -ELKKHGATVLTALGGILKKKGQHEAELKPLAQSHATKHKIPVKYLEFISDAIVHVLQKKH -PGDFGADAQGAMKKALELFRNDMAAKYKELGFQG -> MYG_CEREL -GLSDGEWQLVLNAWGKVEADVAGHGQEVLIRLFTGHPETLEKFDKFKHLKTEAEMKASE -DLKKHGNTVLTALGGILKKKGHHEAEVKHLAESHANKHKIPVKYLEFISDAIIHVLHAKH -PSNFGADAQGAMSKALELFRNDMAAQYKVLGFQG -> MYG_CHEMY -GLSDDEWNHVLGIWAKVEPDLTAHGQEVIIRLFQLHPETQERFAKFKNLTTIDALKSSE -EVKKHGTTVLTALGRILKQKNNHEQELKPLAESHATKHKIPVKYLEFICEIIVKVIAEKH -PSDFGADSQAAMKKALELFRNDMASKYKEFGFLG -> MYG_CHICK -GLSDQEWQQVLTIWGKVEADIAGHGHEVLMRLFHDHPETLDRFDKFKGLKTPNEMKGSE -DLKKHGATVLTQLGKILKQKGQHESDLKPLAQTHATKHKIPVKYLEFISEVIIKVIAEKH -AADFGADSQAAMKKALELFRNDMASKYKEFGFQG -> MYG_CTEGU -GLSDGEWQLVLNAWGKVETDIGGHGQEVLIRLFKGHPETLEKFDKFKHLKSEDEMKASE -DLKKHGTTVLTALGNILKKKGQHEAELAPLAQSHATKHKIPVKYLEFISEAIIQVLESKH -PGDFGADAQGAMSKALELFRNDIAAKYKELGFQG -> MYG_CYPCA -HDAELVLKCWGGVEADFEGTGGEVLTRLFKQHPETQKLFPKFVGIASNELAGNAAVKAH -GATVLKKLGELLKARGDHAAILKPLATTHANTHKIALNNFRLITEVLVKVMAEKAGLDAG -GQSALRRVMDVVIGDIDTYYKEIGFAG -> MYG_DIDMA -GLSDGEWQLVLNAWGKVEADIPGHGQEVLIRLFKGHPETLEKFDKFKHLKSEDEMKASE -DLKKHGATVLTALGNILKKKGNHEAELKPLAQSHATKHKISVQFLEFISEAIIQVIQSKH -PGDFGGDAQAAMGKALELFRNDMAAKYKELGFQG -> MYG_ELEMA -GLSDGEWELVLKTWGKVEADIPGHGEFVLVRLFTGHPETLEKFDKFKHLKTEGEMKASE -DLKKQGVTVLTALGGILKKKGHHEAEIQPLAQSHATKHKIPIKYLEFISDAIIHVLQSKH -PAEFGADAQGAMKKALELFRNDIAAKYKELGFQG -> MYG_ERIEU -GLSDGEWQLVLNVWGKVEADIPGHGQEVLIRLFKDHPETLEKFDKFKHLKSEDEMKSSE -DLKKHGTTVLTALGGILKKKGQHEAQLAPLAQSHANKHKIPVKYLEFISEAIIQVLKSKH -AGDFGADAQGAMSKALELFRNDIAAKYKELGFQG -> MYG_ESCGI -VLSDAEWQLVLNIWAKVEADVAGHGQDILIRLFKGHPETLEKFDKFKHLKTEAEMKASE -DLKKHGNTVLTALGGILKKKGHHEAELKPLAQSHATKHKIPIKYLEFISDAIIHVLHSRH -PGDFGADAQAAMNKALELFRKDIAAKYKELGFQG -> MYG_GALAU -ADWDKVNSVWSAMEANITAVGQNILLRLFEQYPESQSYFPKLKNKSLGELKDTADIKAQ -ADTVLKALGNIVKKKGNHSQPVKALAATHITTHKIPPHYFTKITTIAVGVLSEMYPSEMN -AQAQEAFSGAFKSICSDIEKEYKAANFQG -> MYG_GALCR -GLSDGEWQLVLKIWGKVEADLAGHGQDVLIRLFTAHPETLEKFDKFKNLKTADEMKASE -DLKKHGVTVLTALGGILKKKGQHEAEIKPLAQSHATKHKIPVKYLEFISEAIIHVLQNKH -SGDFGTDVQGAMSKALELFRNDIAAKYKELGFQG -> MYG_GALJA -AXWDKVNSVWSAVEQNITAIGQNILLRLFEQYPESEDYFPKLKNKSLGELKDTADIKAQ -ADTVLRALGNIVKKKGDHSQPVKALAATHITTHKIPPHYFTKITTIAVGVLSEMYPSEMN -AQAQAAFSGAFKNICSDIEKEYKAANFQG -> MYG_GLOME -GLSDGEWQLVLNVWGKVEADLAGHGQDILIRLFKGHPETLEKFDKFKHLKTEADMKASE -DLKKHGNTVLTALGAILKKKGHHEAELKPLAQSHATKHKIPIKYLEFISEAIIHVLHSRH -PAEFGADAQGAMNKALELFRKDIAAKYKELGFHG -> MYG_GORBE -GLSDGEWQLVLNVWGKVEADISGHGQEVLIRLFKGHPETLEKFDKFKHLKSEDEMKASE -DLKKHGATVLTALGGILKKKGHHEAEIKPLAQSHATKHKIPVKYLEFISECIIQVLQSKH -PGDFGADAQGAMNKALELFRKDMASNYKELGFQG -> MYG_GRAGE -GLSDDEWHHVLGIWAKVEPDLSAHGQEVIIRLFQVHPETQERFAKFKNLKTIDELRSSE -EVKKHGTTVLTALGRILKLKNNHEPELKPLAESHATKHKIPVKYLEFICEIIVKVIAEKH -PSDFGADSQAAMRKALELFRNDMASKYKEFGFQG -> MYG_HALGR -GLSDGEWHLVLNVWGKVETDLAGHGQEVLIRLFKSHPETLEKFDKFKHLKSEDDMRRSE -DLRKHGNTVLTALGGILKKKGHHEAELKPLAQSHATKHKIPIKYLEFISEAIIHVLHSKH -PAEFGADAQAAMKKALELFRNDIAAKYKELGFHG -> MYG_HETPO -TEWEHVNKVWAVVEPDIPAVGLAILLRLFKEHKETKDLFPKFKEIPVQQLGNNEDLRKH -GVTVLRALGNILKQKGKHSTNVKELADTHINKHKIPPKNFVLITNIAVKVLTEMYPSDMT -GPMQESFSKVFTVICSDLETLYKEANFQG -> MYG_HORSE -GLSDGEWQQVLNVWGKVEADIAGHGQEVLIRLFTGHPETLEKFDKFKHLKTEAEMKASE -DLKKHGTVVLTALGGILKKKGHHEAELKPLAQSHATKHKIPIKYLEFISDAIIHVLHSKH -PGNFGADAQGAMTKALELFRNDIAAKYKELGFQG -> MYG_HUMAN -GLSDGEWQLVLNVWGKVEADIPGHGQEVLIRLFKGHPETLEKFDKFKHLKSEDEMKASE -DLKKHGATVLTALGGILKKKGHHEAEIKPLAQSHATKHKIPVKYLEFISECIIQVLQSKH -PGDFGADAQGAMNKALELFRKDMASNYKELGFQG -> MYG_HYLAG -GLSDGEWQLVLNVWGKVEADIPSHGQEVLIRLFKGHPETLEKFDKFKHLKSEDEMKASE -DLKKHGATVLTALGGILKKKGHHEAEIKPLAQSHATKHKIPVKYLEFISECIIQVLQSKH -PGDFGADAQGAMNKALELFRKDMASNYKELGFQG -> MYG_INIGE -GLSDGEWQLVLNIWGKVEADLAGHGQDVLIRLFKGHPETLEKFDKFKHLKTEAEMKASE -DLKKHGNTVLTALGGILKKKGHHEAELKPLAQSHATKHKIPIKYLEFISEAIIHVLHSRH -PGDFGADAQAAMNKALELFRKDIAAKYKELGFHG -> MYG_KOGSI -VLSEGEWQLVLHVWAKVEADIAGHGQDILIRLFKHHPETLEKFDRFKHLKSEAEMKASE -DLKKHGVTVLTALGAILKKKGHHEAELKPLAQSHATKHKIPIKYLEFISEAIIHVLHSRH -PADFGADAQGAMSKALELFRKDIAAKYKELGYQG -> MYG_LAGLA -GLSDGEWQLVLNIWGKVEADIPSHGQEVLISLFKGHPETLEKFDKFKHLKSEDEMKASE -ELKKHGVTVLTALGGILKKKGQHEAELKPLAQSHATKHKIPVKYLEFISDAIIHALQKKH -PGDFGADAQGAMKKALELFRNDMAAKYKELGFQG -> MYG_LAGMA -GLSDGEWQLVLNVWGKVEADLGGHGQEVLIRLFKGHPETLEKFDKFKHLKAEDEMRASE -DLKKHGTTVLTALGGILKKRGQHAAELAPLAQSHATKHKIPVKYLEFISEAIIQVLQSKH -PGDFGADAQAAMSKALELFRNDIAAKYKELGFQG -> MYG_LEPMU -GLSDGEWQLVLNVWGKVEADVGGHGQEVLIRLFTGHPETLEKFDKFKHLKTADEMKASE -DLKKHGTTVLTALGGILKKKGQHEAELKPLAQSHATKHKIPIKYLEFISDAIVHVLHSKH -PAEFGADAQAAMKKALELFRNDIAAKYKELGFQG -> MYG_LOXAF -GLSDGEWELVLKTWGKVEADIPGHGEFVLVRLFTGHPETLEKFDKFKHLKTEGEMKASE -DLKKQGVTVLTALGGILKKKGHHEAEIQPLAQSHATKHKIPIKYLEFISDAIIHVLQSKH -PAEFGADAQAAMKKALELFRNDIAAKYKELGFQG -> MYG_LUTLU -GLSDGEWQLVLNVWGKVEADLAGHGQEVLIRLFKGHPETLEKFDKFKHLKSEDEMKGSE -DLKKHGNTVLTALGGILKKKGKHEAELKPLAQSHATKHKIPIKYLEFISEAIIQVLQSKH -PGXFGADAQGAMKRALELFRNDIAAKYKELGFQG -> MYG_LYCPI -GLSDGEWQIVLNIWGKVETDLAGHGQEVLIRLFKNHPETLDKFDKFKHLKTEDEMKGSE -DLKKHGNTVLTALGGILKKKGHHEAELKPLAQSHATKHKIPVKYLEFISDAIIQVLQNKH -SGDFHADTEAAMKKALELFRNDIAAKYKELGFQG -> MYG_MACFA -GLSDGEWQLVLNVWGKVEADIPSHGQEVLIRLFKGHPETLEKFDKFKHLKSEDEMKASE -DLKKHGVTVLTALGGILKKKGHHEAEIKPLAQSHATKHKIPVKYLELISESIIQVLQSKH -PGDFGADAQGAMNKALELFRNDMAAKYKELGFQG -> MYG_MACRU -GLSDGEWQLVLNIWGKVETDEGGHGKDVLIRLFKGHPETLEKFDKFKHLKSEDEMKASE -DLKKHGITVLTALGNILKKKGHHEAELKPLAQSHATKHKIPVQFLEFISDAIIQVIQSKH -AGNFGADAQAAMKKALELFRHDMAAKYKEFGFQG -> MYG_MEGNO -VLSDAEWQLVLNIWAKVEADVAGHGQDILIRLFKGHPETLEKFDKFKHLKTEAEMKASE -DLKKHGNTVLTALGGILKKKGHHEAELKPLAQSHATKHKIPIKYLEFISDAIIHVLHSRH -PADFGADAQAAMNKALELFRKDIAAKYKELGFQG -> MYG_MELME -GLSDGEWQLVLNVWGKVEADLAGHGQEVLIRLFKGHPETLEKFDKFKHLKSEDEMKGSE -DLKKHGNTVLTALGGILKKKGHQEAELKPLAQSHATKHKIPVKYLEFISDAIAQVLQSKH -PGNFAAEAQGAMKKALELFRNDIAAKYKELGFQG -> MYG_MESCA -GLSEAEWQLVLHVWAKVEADLSGHGQEILIRLFKGHPETLEKFDKFKHLKSEAEMKASE -DLKKHGHTVLTALGGILKKKGHHEAELKPLAQSHATKHKIPIKYLEFISDAIIHVLHSKH -PSDFGADAQGAMTKALELFRKDIAAKYKELGFHG -> MYG_MOUSE -GLSDGEWQLVLNVWGKVEADLAGHGQEVLIGLFKTHPETLDKFDKFKNLKSEEDMKGSE -DLKKHGCTVLTALGTILKKKGQHAAEIQPLAQSHATKHKIPVKYLEFISEIIIEVLKKRH -SGDFGADAQGAMSKALELFRNDIAAKYKELGFQG -> MYG_MUSAN -VDWEKVNSVWSAVESDLTAIGQNILLRLFEQYPESQNHFPKFKNKSLGELKDTADIKAQ -ADTVLSALGNIVKKKGSHSQPVKALAATHITTHKIPPHYFTKITTIAVDVLSEMYPSEMN -AQVQAAFSGAFKIICSDIEKEYKAANFQG -> MYG_NYCCO -GLSDGEWQSVLNVWGKVEADLAGHGQEILIRLFTAHPETLEKFDKFKNLKTPDEMKASE -DLKKHGVTVLTALGGILKKKGQHEAEIKPLAQSHATKHKIPVKYLEFISGAIIHVLQSKH -PGDFGADAQGAMSKALELFRNDIAAKYKELGFQG -> MYG_OCHPR -GLSDGEWQLVLNVWGKVEADLAGHGQEVLIRLFKNHPETLEKFDKFKNLKSEDEMKGSD -DLKKHGNTVLSALGGILKKKGQHEAELKPLAQSHATKHKIPVKYLEFISEAIIQVLQSKH -PGDFGADAQGAMSKALELFRNDMAAKYKELGFQG -> MYG_ORCOR -GLSDGEWQLVLNVWGKVEADLAGHGQDILIRLFKGHPETLEKFDKFKHLKTEADMKASE -DLKKHGNTVLTALGAILKKKGHHDAELKPLAQSHATKHKIPIKYLEFISEAIIHVLHSRH -PAEFGADAQGAMNKALELFRKDIAAKYKELGFHG -> MYG_ORNAN -GLSDGEWQLVLKVWGKVEGDLPGHGQEVLIRLFKTHPETLEKFDKFKGLKTEDEMKASA -DLKKHGGTVLTALGNILKKKGQHEAELKPLAQSHATKHKISIKFLEYISEAIIHVLQSKH -SADFGADAQAAMGKALELFRNDMAAKYKEFGFQG -> MYG_ORYAF -GLSDAEWQLVLNVWGKVEADIPGHGQDVLIRLFKGHPETLEKFDRFKHLKTEDEMKASE -DLKKHGTTVLTALGGILKKKGQHEAEIQPLAQSHATKHKIPVKYLEFISEAIIQVIQSKH -SGDFGADAQGAMSKALELFRNDIAAKYKELGFQG -> MYG_PANTR -GLSDGEWQLVLNVWGKVEADIPGHGQEVLIRLFKGHPETLEKFDKFKHLKSEDEMKASE -DLKKHGATVLTALGGILKKKGHHEAEIKPLAQSHATKHKIPVKYLEFISECIIQVLHSKH -PGDFGADAQGAMNKALELFRKDMASNYKELGFQG -> MYG_PAPAN -GLSDGEWQLVLNVWGKVEADIPSHGQEVLIRLFKGHPETLEKFDKFKHLKSEDEMKASE -DLKKHGATVLTALGGILKKKGHHEAEIKPLAQSHATKHKIPVKYLELISESIIQVLQSKH -PGDFGADAQGAMNKALELFRNDMAAKYKELGFQG -> MYG_PERPO -GLSDGEWQSVLNVWGKVEADLAGHGQEILIRLFTAHPETLEKFDKFKNLKTPDEMKASE -DLKKHGVTVLTALGGILKKKGHHEAEIKPLAQSHATKHKIPVKYLEFISEAIIHVLQSKH -PGDFGADAQGAMNKALELFRNDIAAKYKELGFQG -> MYG_PHOPH -GLSEGEWQLVLNVWGKVEADLAGHGQDVLIRLFKGHPETLEKFDKFKHLKTEAEMKASE -DLKKHGNTVLTALGGILKKKGHHDAELKPLAQSHATKHKIPIKYLEFISEAIIHVLHSRH -PAEFGADAQGAMNKALELFRKDIATKYKELGFHG -> MYG_PHYCA -VLSEGEWQLVLHVWAKVEADVAGHGQDILIRLFKSHPETLEKFDRFKHLKTEAEMKASE -DLKKHGVTVLTALGAILKKKGHHEAELKPLAQSHATKHKIPIKYLEFISEAIIHVLHSRH -PGDFGADAQGAMNKALELFRKDIAAKYKELGYQG -> MYG_PIG -GLSDGEWQLVLNVWGKVEADVAGHGQEVLIRLFKGHPETLEKFDKFKHLKSEDEMKASE -DLKKHGNTVLTALGGILKKKGHHEAELTPLAQSHATKHKIPVKYLEFISEAIIQVLQSKH -PGDFGADAQGAMSKALELFRNDMAAKYKELGFQG -> MYG_PONPY -GLSDGEWQLVLNVWGKVEADIPSHGQEVLIRLFKGHPETLEKFDKFKHLKSEDEMKASE -DLKKHGATVLTALGGILKKKGHHEAEIKPLAQSHATKHKIPVKYLEFISESIIQVLQSKH -PGDFGADAQGAMNKALELFRKDMASNYKELGFQG -> MYG_PROGU -GLSDGEWQLVLNVWGKVEGDLSGHGQEVLIRLFKGHPETLEKFDKFKHLKAEDEMRASE -ELKKHGTTVLTALGGILKKKGQHAAELAPLAQSHATKHKIPVKYLEFISEAIIQVLQSKH -PGDFGADAQGAMSKALELFRNDIAAKYKELGFQG -> MYG_RABIT -GLSDAEWQLVLNVWGKVEADLAGHGQEVLIRLFHTHPETLEKFDKFKHLKSEDEMKASE -DLKKHGNTVLTALGAILKKKGHHEAEIKPLAQSHATKHKIPVKYLEFISEAIIHVLHSKH -PGDFGADAQAAMSKALELFRNDIAAQYKELGFQG -> MYG_ROUAE -GLSDGEWQLVLNVWGKVEADIPGHGQEVLIRLFKGHPETLEKFDKFKHLKSEDEMKASE -DLKKHGATVLTALGGILKKKGQHEAQLKPLAQSHATKHKIPVKYLEFISEVIIQVLQSKH -PGDFGADAQGAMGKALELFRNDIAAKYKELGFQG -> MYG_SAISC -GLSDGEWQLVLNIWGKVEADIPSHGQEVLISLFKGHPETLEKFDKFKHLKSEDEMKASE -ELKKHGTTVLTALGGILKKKGQHEAELKPLAQSHATKHKIPVKYLELISDAIVHVLQKKH -PGDFGADAQGAMKKALELFRNDMAAKYKELGFQG -> MYG_SHEEP -GLSDGEWQLVLNAWGKVEADVAGHGQEVLIRLFTGHPETLEKFDKFKHLKTEAEMKASE -DLKKHGNTVLTALGGILKKKGHHEAEVKHLAESHANKHKIPVKYLEFISDAIIHVLHAKH -PSNFGADAQGAMSKALELFRNDMAAEYKVLGFQG -> MYG_SPAEH -GLSDGEWQLVLNVWGKVEGDLAGHGQEVLIKLFKNHPETLEKFDKFKHLKSEDEMKGSE -DLKKHGNTVLTALGGILKKKGQHAAEIQPLAQSHATKHKIPIKYLEFISEAIIQVLQSKH -PGDFGADAQGAMSKALELFRNDIAAKYKELGFQG -> MYG_TACAC -GLSDGEWQLVLKVWGKVETDITGHGQDVLIRLFKTHPETLEKFDKFKHLKTEDEMKASA -DLKKHGGVVLTALGSILKKKGQHEAELKPLAQSHATKHKISIKFLEFISEAIIHVLQSKH -SADFGADAQAAMGKALELFRNDMATKYKEFGFQG -> MYG_THUAL -ADFDAVLKCWGPVEADYTTMGGLVLTRLFKEHPETQKLFPKFAGIAQADIAGNAAISAH -GATVLKKLGELLKAKGSHAAILKPLANSHATKHKIPINNFKLISEVLVKVMHEKAGLDAG -GQTALRNVMGIIIADLEANYKELGFSG -> MYG_TUPGL -GLSDGEWQLVLNVWGKVEADVAGHGQEVLIRLFKGHPETLEKFDKFKHLKTEDEMKASE -DLKKHGNTVLSALGGILKKKGQHEAEIKPLAQSHATKHKIPVKYLEFISEAIIQVLQSKH -PGDFGADAQAAMSKALELFRNDIAAKYKELGFQG -> MYG_TURTR -GLSDGEWQLVLNVWGKVEADLAGHGQDVLIRLFKGHPETLEKFDKFKHLKTEADMKASE -DLKKHGNTVLTALGAILKKKGHHDAELKPLAQSHATKHKIPIKYLEFISEAIIHVLHSRH -PAEFGADAQGAMNKALELFRKDIAAKYKELGFHG -> MYG_VARVA -GLSDEEWKKVVDIWGKVEPDLPSHGQEVIIRMFQNHPETQDRFAKFKNLKTLDEMKNSE -DLKKHGTTVLTALGRILKQKGHHEAEIAPLAQTHANTHKIPIKYLEFICEVIVGVIAEKH -SADFGADSQEAMRKALELFRNDMASRYKELGFQG -> MYG_VULCH -GLSDGEWQLVLNIWGKVETDLAGHGQEVLIRLFKNHPETLDKFDKFKHLKTEDEMKGSE -DLKKHGNTVLTALGGILKKKGHHEAELKPLAQSHATKHKIPVKYLEFISDAIIQVLQSKH -SGDFHADTEAAMKKALELFRNDIAAKYKELGFQG -> MYG_ZALCA -GLSDGEWQLVLNIWGKVEADLVGHGQEVLIRLFKGHPETLEKFDKFKHLKSEDEMKRSE -DLKKHGKTVLTALGGILKKKGHHDAELKPLAQSHATKHKIPIKYLEFISEAIIHVLQSKH -PGDFGADTHAAMKKALELFRNDIAAKYRELGFQG -> MYG_ZIPCA -GLSEAEWQLVLHVWAKVEADLSGHGQEILIRLFKGHPETLEKFDKFKHLKSEAEMKASE -DLKKHGHTVLTALGGILKKKGHHEAELKPLAQSHATKHKIPIKYLEFISDAIIHVLHSRH -PSDFGADAQAAMTKALELFRKDIAAKYKELGFHG diff --git a/forester/archive/RIO/others/hmmer/tutorial/nucleic.null b/forester/archive/RIO/others/hmmer/tutorial/nucleic.null deleted file mode 100644 index 654e5e3..0000000 --- a/forester/archive/RIO/others/hmmer/tutorial/nucleic.null +++ /dev/null @@ -1,15 +0,0 @@ -# nucleic.null -# -# Example of a null model file for DNA/RNA sequences. -# The values in this file are the HMMER 2 default -# settings. - -Nucleic - -0.25 # A -0.25 # C -0.25 # G -0.25 # T - -0.999001 # p1 - diff --git a/forester/archive/RIO/others/hmmer/tutorial/nucleic.pri b/forester/archive/RIO/others/hmmer/tutorial/nucleic.pri deleted file mode 100644 index c1aec46..0000000 --- a/forester/archive/RIO/others/hmmer/tutorial/nucleic.pri +++ /dev/null @@ -1,27 +0,0 @@ -# nucleic.pri -# -# Example of a prior file for DNA/RNA models. -# The values in this file are the HMMER 2 default settings. - -Dirichlet # Strategy (mixture Dirichlet) -Nucleic # type of prior (Amino or Nucleic) - -# Transitions -1 # Single component -1.0 # with probability = 1.0 -0.7939 0.0278 0.0135 # m->m, m->i, m->d alpha's -0.1551 0.1331 # i->m, i->i alpha's -0.9002 0.5630 # d->m, d->d alpha's - -# Match emissions -# The use of 1.0 for alpha's here makes a simple Laplace "plus-one" prior. -# -1 # single component -1.0 # with probability = 1.0 -1.0 1.0 1.0 1.0 - -# Insert emissions -# -1 # Single component -1.0 # with probability 1.0 -1.0 1.0 1.0 1.0 diff --git a/forester/archive/RIO/others/hmmer/tutorial/pkinase.slx b/forester/archive/RIO/others/hmmer/tutorial/pkinase.slx deleted file mode 100644 index 9f9b39c..0000000 --- a/forester/archive/RIO/others/hmmer/tutorial/pkinase.slx +++ /dev/null @@ -1,712 +0,0 @@ -# ID pkinase -# AC PF00069 -# DE Eukaryotic protein kinase domain -# AU Sonnhammer ELL -# AL Clustalw -# AM hmma -qR -# SE Unknown -# DR PROSITE; PDOC00100; -# DR PROSITE; PDOC00212; -# DR PROSITE; PDOC00213; -# DR PROSITE; PDOC00629; -# GA Bic_raw 25 hmmfs 20 -# NC hmmfs 17.60 YPKA_YERPS/2-292 -# CC hmmfs breaks up some members too much, e.g. KS61_MOUSE, which -# CC hmmls does not. However, hmmls inserts too many unwelcome insertions. -# ** hmmls fails badly: NINL_DROME gets the whole myosin domain inserted. -# ** Remaking SEED from more seqs only made things worse. -# DR SCOP; 1apm; sf; -# DR URL; http://www.sdsc.edu/Kinases/pk_home.html; -# RN [1] -# RM 92065863 -# RA S.K. Hanks, A.M. Quinn; -# RL Methods Enzymol. 200: 38-62 (1991) -# RN [2] -# RM 95285959 -# RA S.K. Hanks, T. Hunter; -# RL FASEB J. 9: 576-596 (1995) -# RN [3] -# RM 97172697 -# RA T. Hunter, G.D. Plowman; -# RL Trends Biochem. Sci. 22: 18-22 (1997) -# SQ 67 -7LES_DROME/2209-2481 LKLLRFLGSGAFGEVYEGQLKTED....SEEPQRVAIKSLRKG....... -ABL1_CAEEL/296-547 IIMHNKLGGGQYGDVYEGYWKR........HDCTIAVKALK......... -ARK1_BOVIN/191-453 FSVHRIIGRGGFGEVYGCRKA........DTGKMYAMKCLD......... -AVR2_HUMAN/192-479 LQLLEVKARGRFGCVWKAQLL..........NEYVAVKIFP......... -BFR2_HUMAN/367-643 LTLGKPLGEGCFGQVVMAEAVGIDK.DKPKEAVTVAVKMLKDDAT..... -BYR1_SCHPO/66-320 LEVVRHLGEGNGGAVSLVKHR..........NIFMARKTVYVG......S -BYR2_SCHPO/394-658 WIRGALIGSGSFGQVYLGMNAS........SGELMAVKQVILD....... -CC15_YEAST/25-272 YHLKQVIGRGSYGVVYKAINK........HTDQVVAIKEVVYE......N -CC21_MEDSA/1-284 GENVEKIGEGTYGVVYKARDR........VTNETIALKKIR........L -CC5_YEAST/82-337 YHRGHFLGEGGFARCFQIKDD.........SGEIFAAKTVA......... -CDPK_SOYBN/34-292 YEVGRKLGQGQFGTTFECTRR........ASGGKFACKSIP......... -CDR1_SCHPO/12-258 WRLGKTLGTGSTSCVRLAKHA........KTGDLAAIKIIP......... -CHK1_SCHPO/10-272 YHIGREIGTGAFASVRLCYDD.........NAKIYAVKFVN........K -CLK1_MOUSE/160-476 YEIVDTLGEGAFGKVVECIDHK.......VGGRRVAVKIVKN.......V -CTK1_YEAST/183-469 YLRIMQVGEGTYGKVYKAKNTN........TEKLVALKKLRLQ....... -ERK1_CANAL/68-371 YQILEIVGEGAYGIVCSAIHK........PSQQKVAIKKIEP.......F -ERK3_HUMAN/20-312 FVDFQPLGFGVNGLVLSAVDS........RACRKVAVKKIALS......D -FUSE_DROME/4-254 YAVSSLVGQGSFGCVYKATRK........DDSKVVAIKVIS......... -HR25_YEAST/9-273 FRIGRKIGSGSFGDIYHGTNL........ISGEEVAIKLES......... -JAK1_HUMAN/571-833 LVQGEHLGRGTRTHIYSGTLMDYKD..DEGTSEEKKIKVIL......... -JAK1_HUMAN/864-1137 LKRIRDLGEGHFGKVELCRYDPED.....NTGEQVAVKSLK......... -KAB7_YEAST/1096-1354 FVSLQKMGEGAYGKVNLCIHK........KNRYIVVIKMIFK.......E -KAKT_MLVAT/171-429 FEYLKLLGKGTFGKVILVKEK........ATGRYYAMKILK......... -KC21_CHICK/39-324 YQLVRKLGRGKYSEVFEAINIT........NNEKVVVKILK......... -KCC4_MOUSE/42-296 FEVESELGRGATSIVYRCKQK........GTQKPYALKVL.......... -KCR8_YEAST/316-590 GRCQEVLGKGAFGVVRICQKKNVSSQDGNKSEKLYAVKEFKR........ -KG3A_RAT/119-403 YTDIKVIGNGSFGVVYQARLA........ETRELVAIKKVLQ........ -KGP1_DROME/457-717 LEVVSTLGIGGFGRVELVKAHH......QDRVDIFALKCLK......... -KI28_YEAST/7-290 YTKEKKVGEGTYAVVYLGCQHS........TGRKIAIKEIK......... -KI82_YEAST/324-602 FEKIRLLGQGDVGKVYLVRER........DTNQIFALKVLN......... -KIN1_SCHPO/125-395 YVLGKTIGAGSMGKVKDAHHL........KTGEQFAIKIVTRLHPDITKA -KIR1_HUMAN/208-495 ITLLECVGKGRYGEVWRGSWQ..........GENVAVKIFS......... -KKIA_HUMAN/5-288 YEKIGKIGEGSYGVVFKCRNR........DTGQIVAIKKFLE........ -KKL6_YEAST/192-508 WKKVRPIGSGNFSTVLLYELMDQS....NPKLKQVAVKRLKYPEELSNVE -KMIL_AVIMH/82-339 VLLSTRIGSGSFGTVYKGKWHG........DVAVKILKVVDP.......T -KML2_CHICK/1453-1708 YNIEERLGSGKFGQVFRLVEK........KTGKVWAGKFFK......... -KMOS_CERAE/60-338 VCLLQRLGAGGFGSVYKATYH..........GVPVAIKQVN......... -KPBH_RAT/24-291 YDPKDIIGRGVSSVVRRCVHRA......TGDEFAVKIMEVS......... -KPIM_HUMAN/38-290 YQVGPLLGSGGFGSVYSGIRV........SDNLPVAIKHVE......... -KPK2_PLAFK/111-364 YVLNKKIGKGSFSTAYIGTNI........LYGNRVVVKEVD......... -KPRO_MAIZE/534-812 RKFKVELGRGESGTVYKGVLE.........DDRHVAVKKLEN........ -KR1_HSV11/191-478 FTIHGALTPGSEGCVFDSSHP.........DYPQRVIVKA.......... -KR1_PRVKA/53-332 FEVLQPLQSGSEGRVFVARRP.........GEADTVVLKV.......... -KS61_MOUSE/407-664 YVVKETIGVGSYSVCKRCVHK........ATNMEYAVKVID......... -KYK1_DICDI/1289-1559 LEFGQTIGKGFFGEVKRGYWR..........ETDVAIKIIY......... -KYK2_DICDI/108-364 IQFIQKVGEGAFSEVWEGWWK..........GIHVAIKKLKIIG.....D -MAK_RAT/4-284 YTTMRQLGDGTYGSVLMGKSN........ESGELVAIKRMK......... -MEK1_YEAST/162-444 EITNRIVGNGTFGHVLITHNSKERDEDVCYHPENYAVKIIK......... -MET_HUMAN/1078-1337 VHFNEVIGRGHFGCVYHGTLLDND.....GKKIHCAVKSLN......... -MKK1_YEAST/221-488 IETLGILGEGAGGSVSKCKLK........NGSKIFALKVIN......... -MLK1_HUMAN/3-262 LTLEEIIGIGGFGKVYRAFWI..........GDEVAVKAARHD......P -NINL_DROME/16-282 FEIYEEIAQGVNAKVFRAKELD........NDRIVALKIQHYD......E -NPR1_YEAST/438-742 IKTGADLGAGAGGSVKLAQRIS........DNKIFAVKEFR........T -PHY_CERPU/1004-1282 IQITGSLGSGSSATVEKAVWL..........GTPVAKKTFYG........ -PKD1_DICDI/36-291 FNFYGSLGSGSFGTAKLCRHR........GSGLFFCSKTLR......... -PKN1_MYXXA/59-321 FRLVRRLGRGGMGAVYLGEHVS........IGSRVAVKVLH......... -RAN1_SCHPO/18-295 LRFVSIIGAGAYGVVYKAEDIY........DGTLYAVKALC......... -RYK_HUMAN/327-593 ITLKDVLQEGTFGRIFHGILIDEKD...PNKEKQAFVKTVKD.......Q -SGV1_YEAST/60-366 YREDEKLGQGTFGEVYKGIHL........ETQRQVAMKKIIVS......V -SPK1_YEAST/198-466 SIIDEVVGQGAFATVKKAIER........TTGKTFAVKIIS......... -ST20_YEAST/620-871 YANLVKIGQGASGGVYTAYEIG........TNVSVAIKQMNLE....... -STE7_YEAST/191-466 LVQLGKIGAGNSGTVVKALHVP........DSKIVAKKTIP........V -SYK_PIG/364-619 TLEDKELGSGNFGTVKKGYYQMK......KVVKTVAVKILKN........ -TOP_DROME/938-1194 LRKGGVLGMGAFGRVYKGVWVPEG....ENVKIPVAIKELLKSTG..... -TRKA_HUMAN/504-775 IVLKWELGEGAFGKVFLAECHNLLP...EQDKMLVAVKALK......... -TTK_HUMAN/509-775 YSILKQIGSGGSSKVFQVLNE.........KKQIYAIKYVN........L -WEE1_HUMAN/299-569 FHELEKIGSGEFGSVFKCVKR........LDGCIYAIKRS.......... - -7LES_DROME/2209-2481 ASEFAE............LLQEAQLMSNFK......HENIVRLVGICF.. -ABL1_CAEEL/296-547 EDAMPLH..........EFLAEAAIMKDLH......HKNLVRLLGVCT.. -ARK1_BOVIN/191-453 KKRIKMKQGE......TLALNERIMLSLVSTG...DCPFIVCMSYAFH.. -AVR2_HUMAN/192-479 IQDKQS..........WQNEYEVYSLPGMK......HENILQFIGAEKRG -BFR2_HUMAN/367-643 EKDLSD............LVSEMEMMKMIG.....KHKNIINLLGACTQ. -BYR1_SCHPO/66-320 DSKLQK...........QILRELGVLHHCR......SPYIVGFYGAFQ.. -BYR2_SCHPO/394-658 SVSESKDRHAKLL...DALAGEIALLQELS......HEHIVQYLGSNL.. -CC15_YEAST/25-272 DEELN............DIMAEISLLKNLN......HNNIVKYHGFIR.. -CC21_MEDSA/1-284 EQEDEG.....VP...STAIREISLLKEMQ......HRNIVRLQDVVH.. -CC5_YEAST/82-337 KASIK...SEKTR...KKLLSEIQIHKSMS......HPNIVQFIDCFE.. -CDPK_SOYBN/34-292 KRKLLCKEDYED......VWREIQIMHHLSE.....HANVVRIEGTYE.. -CDR1_SCHPO/12-258 IRYAS.............IGMEILMMRLLR......HPNILRLYDVWT.. -CHK1_SCHPO/10-272 KHATSCMNAGVWA...RRMASEIQLHKLCNG.....HKNIIHFYNTAE.. -CLK1_MOUSE/160-476 DRYCEA............AQSEIQVLEHLNTTDP..HSTFRCVQMLEWF. -CTK1_YEAST/183-469 GEREG......FP...ITSIREIKLLQSFD......HPNVSTIKEIMVE. -ERK1_CANAL/68-371 ERSMLCLR..........TLRELKLLKHFN......HENIISILAIQRPI -ERK3_HUMAN/20-312 ARSMKH............ALREIKIIRRLD......HDNIVKVYEVLGPK -FUSE_DROME/4-254 KRGRATKELKN.......LRRECDIQARLK......HPHVIEMIESFE.. -HR25_YEAST/9-273 IRSRHP...........QLDYESRVYRYLS......GGVGIPFIRWFGR. -JAK1_HUMAN/571-833 KVLDPSHRDIS.....LAFFEAASMMRQVS......HKHIVYLYGVCV.. -JAK1_HUMAN/864-1137 PESGGN........HIADLKKEIEILRNLY......HENIVKYKGICTED -KAB7_YEAST/1096-1354 RILVDTWVRDRKL...GTIPSEIQIMATLNKK...PHENILRLLDFFE.. -KAKT_MLVAT/171-429 KEVIVAKDEVAH......TLTENRVLQNSR......HPFLTALKYSFQ.. -KC21_CHICK/39-324 PVKKKK............IKREIKILENLRG.....GPNIITLADIVKDP -KCC4_MOUSE/42-296 KKTVD....KKI......VRTEIGVLLRLS......HPNIIKLKEIFE.. -KCR8_YEAST/316-590 RTSESAEKYSKR......LTSEFCISSSLH......HTNIVTTLDLFQD. -KG3A_RAT/119-403 DKRFK..............NRELQIMRKLD......HCNIVRLRYFFYSS -KGP1_DROME/457-717 KRHIVDTKQEE......HIFSERHIMLSSR......SPFICRLYRTFR.. -KI28_YEAST/7-290 TSEFKDGLDMS.......AIREVKYLQEMQ......HPNVIELIDIFM.. -KI82_YEAST/324-602 KHEMIK...RKKI...KRVLTEQEILATSD......HPFIVTLYHSFQ.. -KIN1_SCHPO/125-395 KAAASAEATKAAQ...SEKNKEIRTVREAALSTLLRHPYICEARDVYI.. -KIR1_HUMAN/208-495 SRDEKS............WFRETELYNTVMLR....HENILGFIASDMTS -KKIA_HUMAN/5-288 SEDDP.....VIK...KIALREIRMLKQLK......HPNLVNLLEVFR.. -KKL6_YEAST/192-508 QINTSLRYKETLSRLENSLTRELQVLKSLN......HPCIVKLLGINNPI -KMIL_AVIMH/82-339 PEQFQA............FRNEVAVLRKTR......HVNILLFMGYMT.. -KML2_CHICK/1453-1708 AYSAK...EKEN......IRDEISIMNCLH......HPKLVQCVDAFE.. -KMOS_CERAE/60-338 KCTKNRLASRR.......SFWAELNVARLR......HDNIVRVVAASTRT -KPBH_RAT/24-291 AERLSLEQLEEVR...DATRREMHILRQVAG.....HPHIITLIDSYE.. -KPIM_HUMAN/38-290 KDRISDWGELPNG...TRVPMEVVLLKKVSSG....FSGVIRLLDWFE.. -KPK2_PLAFK/111-364 KSKVK....ESN......VYTEIEVLRKVM......HKYIIKLISAYE.. -KPRO_MAIZE/534-812 VRQGKE...........VFQAELSVIGRIN......HMNLVRIWGFCS.. -KR1_HSV11/191-478 GWYTS.............TSHEARLLRRLD......HPAILPLLDLHV.. -KR1_PRVKA/53-332 GQKPS.............TLMEGMLLQRLS......HDNVMRMKQMLA.. -KS61_MOUSE/407-664 KSKRD.............PSEEIEILLRYGQ.....HPNIITLKDVYD.. -KYK1_DICDI/1289-1559 RDQFKT...KSSL...VMFQNEVGILSKLR......HPNVVQFLGACTAG -KYK2_DICDI/108-364 EEQFKER...........FIREVQNLKKGN......HQNIVMFIGACY.. -MAK_RAT/4-284 RKFYSWDECMN........LREVKSLKKLN......HANVIKLKEVIR.. -MEK1_YEAST/162-444 LKPNK.............FDKEARILLRLD......HPNIIKVYHTFCD. -MET_HUMAN/1078-1337 RITDIGEVS........QFLTEGIIMKDFS......HPNVLSLLGICLR. -MKK1_YEAST/221-488 TLNTDPEYQKQ.......IFRELQFNRSFQ......SEYIVRYYGMFTDD -MLK1_HUMAN/3-262 DEDISQTIEN........VRQEAKLFAMLK......HPNIIALRGVCL.. -NINL_DROME/16-282 EHQVS.............IEEEYRTLRDYCD.....HPNLPEFYGVYKLS -NPR1_YEAST/438-742 KFENES..KRDYV...KKITSEYCIGTTLN......HPNIIETIEIVY.. -PHY_CERPU/1004-1282 RNNED.............FKREVEILAELC......HPNITSMFCSPL.. -PKD1_DICDI/36-291 RETIVHEKHKEH......VNNEINIMLNIS......HPYIVKTYSTFN.. -PKN1_MYXXA/59-321 AHLTMYPELVQR......FHAEARAVNLIG......HENIVSIFDMDA.. -RAN1_SCHPO/18-295 KDGLNEKQKK.......LQARELALHARVSS.....HPYIITLHRVLE.. -RYK_HUMAN/327-593 ASEIQVT..........MMLTESCKLRGLH......HRNLLPITHVCIE. -SGV1_YEAST/60-366 EKDLFP..........ITAQREITILKRLN......HKNIIKLIEMVYDH -SPK1_YEAST/198-466 KRKVIGNMDG........VTRELEVLQKLN......HPRIVRLKGFYE.. -ST20_YEAST/620-871 KQPKKE...........LIINEILVMKGSK......HPNIVNFIDSYV.. -STE7_YEAST/191-466 EQNNS.....TII...NQLVRELSIVKNVKP.....HENIITFYGAYYN. -SYK_PIG/364-619 EANDPALKD........ELLAEANVMQQLD......NPYIVRMIGICE.. -TOP_DROME/938-1194 AESSEE............FLREAYIMASEE......HVNLLKLLAVCM.. -TRKA_HUMAN/504-775 EASESAR.........QDFQREAELLTMLQ......HQHIVRFFGVCTE. -TTK_HUMAN/509-775 EEADNQTLDS........YRNEIAYLNKLQQH....SDKIIRLYDYEI.. -WEE1_HUMAN/299-569 KKPLAGSVDEQN......ALREVYAHAVLGQ.....HSHVVRYFSAWA.. - -7LES_DROME/2209-2481 .................DTESISLIMEHMEAG......DLLSYLRAARAT -ABL1_CAEEL/296-547 .................HEAPFYIITEFMCNG......NLLEYLRRTDKS -ARK1_BOVIN/191-453 .................TPDKLSFILDLMNGG......DLHYHLSQHG.. -AVR2_HUMAN/192-479 ...............TSVDVDLWLITAFHEKG......SLSDFLKANV.. -BFR2_HUMAN/367-643 ..................DGPLYVIVEYASKG......NLREYLRARRPP -BYR1_SCHPO/66-320 .................YKNNISLCMEYMDCG......SLDAILREGG.. -BYR2_SCHPO/394-658 .................NSDHLNIFLEYVPGG......SVAGLLTMYG.. -CC15_YEAST/25-272 .................KSYELYILLEYCANG......SLRRLISRSS.. -CC21_MEDSA/1-284 .................SDKRLYLVFEYLDL.......DLKKHMDSSPE. -CC5_YEAST/82-337 .................DDSNVYILLEICPNG......SLMELLKRRK.. -CDPK_SOYBN/34-292 .................DSTAVHLVMELCEGG......ELFDRIVQKG.. -CDR1_SCHPO/12-258 .................DHQHMYLALEYVPDG......ELFHYIRKHG.. -CHK1_SCHPO/10-272 .................NPQWRWVVLEFAQGG......DLFDKIEPDVG. -CLK1_MOUSE/160-476 ................EHRGHICIVFELLGLS.......TYDFIKENS.. -CTK1_YEAST/183-469 .................SQKTVYMIFEYADN.......DLSGLLLNKEV. -ERK1_CANAL/68-371 N..............YESFNEIYLIQELMET.......DLHRVIRTQN.. -ERK3_HUMAN/20-312 G........TDLQGELFKFSVAYIVQEYMET.......DLARLLEQGT.. -FUSE_DROME/4-254 .................SKTDLFVVTEFALM.......DLHRYLSYNG.. -HR25_YEAST/9-273 .................EGEYNAMVIDLLGP.......SLEDLFNYCHR. -JAK1_HUMAN/571-833 .................RDVENIMVEEFVEGG......PLDLFMHRKSD. -JAK1_HUMAN/864-1137 .................GGNGIKLIMEFLPSG......SLKEYLPKNKN. -KAB7_YEAST/1096-1354 .................DDDYYYIETPVHGETGC...IDLFDLIEFKT.. -KAKT_MLVAT/171-429 .................THDRLCFVMEYANGG......ELFFHLSRER.. -KC21_CHICK/39-324 .................VSRTPALVFEHVNNT......DFKQLYQTLT.. -KCC4_MOUSE/42-296 .................TPTEISLVLELVTGG......ELFDRIVEKG.. -KCR8_YEAST/316-590 .................AKGEYCEVMEYCAGG......DLFTLVVAAG.. -KG3A_RAT/119-403 G.............EKKDELYLNLVLEYVPET....VYRVARHFTKAK.. -KGP1_DROME/457-717 .................DEKYVYMLLEACMGG......EIWTMLRDRG.. -KI28_YEAST/7-290 .................AYDNLNLVLEFLPT.......DLEVVIKDKS.. -KI82_YEAST/324-602 .................TKDYLYLCMEYCMGG......EFFRALQTRKS. -KIN1_SCHPO/125-395 .................TNSHYYMVFEFVDGG......QMLDYIISHG.. -KIR1_HUMAN/208-495 R...............HSSTQLWLITHYHEMG......SLYDYLQLTT.. -KKIA_HUMAN/5-288 .................RKRRLHLVFEYCDHT......VLHELDRYQR.. -KKL6_YEAST/192-508 FVTSKKPLCDLIIKTPRALPPCDMIMSYCPAG......DLLAAVMARNG. -KMIL_AVIMH/82-339 ..................KDNLAIVTQWCEGS......SLYKHLHVQET. -KML2_CHICK/1453-1708 .................EKANIVMVLEMVSGG......ELFERIIDED.. -KMOS_CERAE/60-338 PAG..............SNSLGTIIMEFGGNV......TLHQVIYGAASH -KPBH_RAT/24-291 .................SSSFMFLVFDLMRKG......ELFDYLTEKV.. -KPIM_HUMAN/38-290 .................RPDSFVLILERPEPV.....QDLFDFITERG.. -KPK2_PLAFK/111-364 .................QEGFVYLVLEYLKGG......ELFEYLNNNG.. -KPRO_MAIZE/534-812 .................EGSHRLLVSEYVENG......SLANILFSEGG. -KR1_HSV11/191-478 .................VSGVTCLVLPKYQA.......DLYTYLSRRLN. -KR1_PRVKA/53-332 .................RGPATCLVLPHFRC.......DLYSYLTMRD.. -KS61_MOUSE/407-664 .................DGKHVYLVTELMRGG......ELLDKILRQK.. -KYK1_DICDI/1289-1559 .................GEDHHCIVTEWMGGG......SLRQFLTDHFN. -KYK2_DICDI/108-364 ...................KPACIITEYMAGG......SLYNILHNPNS. -MAK_RAT/4-284 .................ENDHLYFIFEYMKEN......LYQLMKDRNK.. -MEK1_YEAST/162-444 .................RNNHLYIFQDLIPGG......DLFSYLAKGDCL -MET_HUMAN/1078-1337 .................SEGSPLVVLPYMKHG......DLRNFIRNETH. -MKK1_YEAST/221-488 .................ENSSIYIAMEYMGGRSL...DAIYKNLLERGG. -MLK1_HUMAN/3-262 .................KEPNLCLVMEFARGG......PLNRVLSGKR.. -NINL_DROME/16-282 KPN..............GPDEIWFVMEYCAGGTA...VDMVNKLLKLDR. -NPR1_YEAST/438-742 .................ENDRILQVMEYCEY.......DLFAIVMSNK.. -PHY_CERPU/1004-1282 .................YRRKCSIIMELMDG.......DLLALMQRRLDR -PKD1_DICDI/36-291 .................TPTKIHFIMEYAGKK......DLFHHLRANK.. -PKN1_MYXXA/59-321 .................TPPRPYLIMEFLDG.......APLSAWVGTP.. -RAN1_SCHPO/18-295 .................TEDAIYVVLQYCPNG......DLFTYITEKKVY -RYK_HUMAN/327-593 .................EGEKPMVILPYMNWG......NLKLFLRQCKLV -SGV1_YEAST/60-366 SPDITN......AASSNLHKSFYMILPYMVA.......DLSGVLHNPR.. -SPK1_YEAST/198-466 .................DTESYYMVMEFVSGG......DLMDFVAAHG.. -ST20_YEAST/620-871 .................LKGDLWVIMEYMEGG......SLTDVVTHCI.. -STE7_YEAST/191-466 ...............QHINNEIIILMEYSDCGSLDKILSVYKRFVQRGTV -SYK_PIG/364-619 ..................AESWMLVMEMAELG......PLNKYLQQNR.. -TOP_DROME/938-1194 ..................SSQMMLITQLMPLG......CLLDYVRNNRD. -TRKA_HUMAN/504-775 ..................GRPLLMVFEYMRHG......DLNRFLRSHGPD -TTK_HUMAN/509-775 .................TDQYIYMVMECGNI.......DLNSWLKKKK.. -WEE1_HUMAN/299-569 .................EDDHMLIQNEYCNGG......SLADAISENYRI - -7LES_DROME/2209-2481 STQEP.....QPTAGLSLSELLAMCIDVANGCSYLEDMH..........F -ABL1_CAEEL/296-547 ..............LLPPIILVQMASQIASGMSYLEARH..........F -ARK1_BOVIN/191-453 ..............VFSEADMRFYAAEIILGLEHMHNRF..........V -AVR2_HUMAN/192-479 ...............VSWNELCHIAETMARGLAYLHEDIPGLKDGHKPAI -BFR2_HUMAN/367-643 GMEYSYDINRVPEEQMTFKDLVSCTYQLARGMEYLASQK..........C -BYR1_SCHPO/66-320 ..............PIPLDILGKIINSMVKGLIYLYNVLH.........I -BYR2_SCHPO/394-658 ..............SFEETLVKNFIKQTLKGLEYLHSRG..........I -CC15_YEAST/25-272 .T............GLSENESKTYVTQTLLGLKYLHGEG..........V -CC21_MEDSA/1-284 .............FIKDPRQVKMFLYQMLCGIAYCHSHR..........V -CC5_YEAST/82-337 ..............VLTEPEVRFFTTQICGAIKYMHSRR..........V -CDPK_SOYBN/34-292 ..............HYSERQAARLIKTIVEVVEACHSLG..........V -CDR1_SCHPO/12-258 ..............PLSEREAAHYLSQILDAVAHCHRFR..........F -CHK1_SCHPO/10-272 ..............ID.EDVAQFYFAQLMEGISFMHSKG..........V -CLK1_MOUSE/160-476 ............FLPFRMDHIRKMAYQICKSVNFLHSNK..........L -CTK1_YEAST/183-469 ..............QISHSQCKHLFKQLLLGMEYLHDNK..........I -ERK1_CANAL/68-371 ...............LSDDHIQYFIYQTLRALKAMHSAN..........V -ERK3_HUMAN/20-312 ...............LAEEHAKLFMYQLLRGLKYIHSAN..........V -FUSE_DROME/4-254 ..............AMGEEPARRVTGHLVSALYYLHSNR..........I -HR25_YEAST/9-273 ..............RFSFKTVIMLALQMFCRIQYIHGRS..........F -JAK1_HUMAN/571-833 ..............VLTTPWKFKVAKQLASALSYLEDKD..........L -JAK1_HUMAN/864-1137 ..............KINLKQQLKYAVQICKGMDYLGSRQ..........Y -KAB7_YEAST/1096-1354 ..............NMTEFEAKLIFKQVVAGIKHLHDQG..........I -KAKT_MLVAT/171-429 ..............VFSEDRARFYGAEIVSALDYLHSEKN.........V -KC21_CHICK/39-324 .................DYDIRFYMYEILKALDYCHSMG..........I -KCC4_MOUSE/42-296 ..............YYSERDARDAVKQILEAVAYLHENG..........I -KCR8_YEAST/316-590 ..............KLEYMEADCFFKQLIRGVVYMHEMG..........V -KG3A_RAT/119-403 .............LIIPIIYVKVYMYQLFRSLAYIHSQG..........V -KGP1_DROME/457-717 ..............SFEDNAAQFIIGCVLQAFEYLHARG..........I -KI28_YEAST/7-290 .............ILFTPADIKAWMLMTLRGVYHCHRNF..........I -KI82_YEAST/324-602 .K............CIAEEDAKFYASEVVAALEYLHLLG..........F -KIN1_SCHPO/125-395 ..............KLKEKQARKFERQIGSALSYLHQNS..........V -KIR1_HUMAN/208-495 ...............LDTVSCLRIVLSIASGLAHLHIEIFGTQGK..PAI -KKIA_HUMAN/5-288 ..............GVPEHLVKSITWQTLQAVNFCHKHN..........C -KKL6_YEAST/192-508 ..............RLEAWLIQRIFTEVVLAVKYLHENS..........I -KMIL_AVIMH/82-339 ..............KFQMFQLIDIARQTAQGMDYLHAKN..........I -KML2_CHICK/1453-1708 .F............ELTERECIKYMRQISEGVEYIHKQG..........I -KMOS_CERAE/60-338 PEGDAGEPHCSTGGPLTLGKCLKYSLDVVNGLLFLHSQS..........I -KPBH_RAT/24-291 ..............ALSEKETRSIMRSLLEAVNFLHVNN..........I -KPIM_HUMAN/38-290 ..............ALQEELARSFFWQVLEAVRHCHNCG..........V -KPK2_PLAFK/111-364 ..............PYTEQVAKKAMKRVLIALEALHSNG..........V -KPRO_MAIZE/534-812 ............NILLDWEGRFNIALGVAKGLAYLHHECL.......EWV -KR1_HSV11/191-478 ..............PLGRPQIAAVSRQLLSAVDYIHRQG..........I -KR1_PRVKA/53-332 G.............PLDMRDAGCVIRAVLRGLAYLHGMR..........I -KS61_MOUSE/407-664 ..............FFSEREASFVLHTISKTVEYLHSQG..........V -KYK1_DICDI/1289-1559 ............LLEQNPHIRLKLALDIAKGMNYLHGWTP.......P.I -KYK2_DICDI/108-364 ..S.......TPKVKYSFPLVLKMATDMALGLLHLHSIT..........I -MAK_RAT/4-284 ..............LFPESVIRNIMYQILQGLAFIHKHG..........F -MEK1_YEAST/162-444 T.............SMSETESLLIVFQILQALNYLHDQD..........I -MET_HUMAN/1078-1337 ..............NPTVKDLIGFGLQVAKGMKYLASKK..........F -MKK1_YEAST/221-488 ..............RISEKVLGKIAEAVLRGLSYLHEKK..........V -MLK1_HUMAN/3-262 ...............IPPDILVNWAVQIARGMNYLHDEAI.......VPI -NINL_DROME/16-282 ..............RMREEHIAYIIRETCRAAIELNRNH..........V -NPR1_YEAST/438-742 ...............MSYEEICCCFKQILTGVQYLHSIG..........L -PHY_CERPU/1004-1282 NED.......HDSPPFSILEVVDIILQTSEGMNYLHEKG..........I -PKD1_DICDI/36-291 ..............CFTEQTTKLIVAEIVLAIEYLHAEN..........I -PKN1_MYXXA/59-321 ...............LAAGAVVSVLSQVCDALQAAHARG..........I -RAN1_SCHPO/18-295 ..............QGNSHLIKTVFLQLISAVEHCHSVG..........I -RYK_HUMAN/327-593 EAN........NPQAISQQDLVHMAIQIACGMSYLARRE..........V -SGV1_YEAST/60-366 .............INLEMCDIKNMMLQILEGLNYIHCAK..........F -SPK1_YEAST/198-466 ..............AVGEDAGREISRQILTAIKYIHSMG..........I -ST20_YEAST/620-871 ..............LT.EGQIGAVCRETLSGLEFLHSKG..........V -STE7_YEAST/191-466 SSK.......K..TWFNELTISKIAYGVLNGLDHLYRQYK.........I -SYK_PIG/364-619 ..............HVKDKNIIELVHQVSMGMKYLEECN..........F -TOP_DROME/938-1194 ..............KIGSKALLNWSTQIAKGMSYLEEKR..........L -TRKA_HUMAN/504-775 AKLLAGGED.VAPGPLGLGQLLAVASQVAAGMVYLAGLH..........F -TTK_HUMAN/509-775 ..............SIDPWERKSYWKNMLEAVHTIHQHG..........I -WEE1_HUMAN/299-569 MS............YFKEAELKDLLLQVGRGLRYIHSMS..........L - -7LES_DROME/2209-2481 VHRDLACRNCLVTESTGSTD............RRRTVKIGDFGLARDIYK -ABL1_CAEEL/296-547 IHRDLAARNCLVSEH...................NIVKIADFGLARFMKE -ARK1_BOVIN/191-453 VYRDLKPANILLDEH...................GHVRISDLGLACDFS. -AVR2_HUMAN/192-479 SHRDIKSKNVLLKNN...................LTACIADFGLALKFEA -BFR2_HUMAN/367-643 IHRDLAARNVLVTEN...................NVMKIADFGLARDINN -BYR1_SCHPO/66-320 IHRDLKPSNVVVNSR...................GEIKLCDFGVSGELVN -BYR2_SCHPO/394-658 VHRDIKGANILVDNK...................GKIKISDFGISKKLEL -CC15_YEAST/25-272 IHRDIKAANILLSAD...................NTVKLADFGVSTIVN. -CC21_MEDSA/1-284 LHRDLKPQNLLIDRR..................TNSLKLADFGLARAFG. -CC5_YEAST/82-337 IHRDLKLGNIFFDSN...................YNLKIGDFGLAAVLAN -CDPK_SOYBN/34-292 MHRDLKPENFLFDTID.....E...........DAKLKATDFGLSVFYK. -CDR1_SCHPO/12-258 RHRDLKLENILIKVN..................EQQIKIADFGMATVEP. -CHK1_SCHPO/10-272 AHRDLKPENILLDYN...................GNLKISDFGFASLFSY -CLK1_MOUSE/160-476 THTDLKPENILFVKSDYTEAYNPKMKRDERTIVNPDIKVVDFGSATYDD. -CTK1_YEAST/183-469 LHRDVKGSNILIDNQ...................GNLKITDFGLARKMN. -ERK1_CANAL/68-371 LHRDLKPSNLLLNSN...................CDLKICDFGLARSIAS -ERK3_HUMAN/20-312 LHRDLKPANIFISTED..................LVLKIGDFGLARIVDQ -FUSE_DROME/4-254 LHRDLKPQNVLLDKN...................MHAKLCDFGLARNMT. -HR25_YEAST/9-273 IHRDIKPDNFLMGVGRR................GSTVHVIDFGLSKKYRD -JAK1_HUMAN/571-833 VHGNVCTKNLLLAREG..................IDSECGPFIKLSDPG. -JAK1_HUMAN/864-1137 VHRDLAARNVLVESE...................HQVKIGDFGLTKAIET -KAB7_YEAST/1096-1354 VHRDIKDENVIVDSK...................GFVKIIDFGSAAYVK. -KAKT_MLVAT/171-429 VYRDLKLENLMLDKD...................GHIKITDFGLCKEGIK -KC21_CHICK/39-324 MHRDVKPHNVMIDHEH..................RKLRLIDWGLAEFYHP -KCC4_MOUSE/42-296 VHRDLKPENLLYATP.A....P...........DAPLKIADFGLSKIV.. -KCR8_YEAST/316-590 CHRDLKPENLLLTHD...................GVLKITDFGNSECFKM -KG3A_RAT/119-403 CHRDIKPQNLLVDPDT..................AVLKLCDFGSAKQLVR -KGP1_DROME/457-717 IYRDLKPENLMLDER...................GYVKIVDFGFAKQIG. -KI28_YEAST/7-290 LHRDLKPNNLLFSPD...................GQIKVADFGLARAIP. -KI82_YEAST/324-602 IYRDLKPENILLHQS...................GHVMLSDFDLSIQATG -KIN1_SCHPO/125-395 VHRDLKIENILISKT...................GDIKIIDFGLSNLYR. -KIR1_HUMAN/208-495 AHRDLKSKNILVKKN...................GQCCIADLGLAVMHSQ -KKIA_HUMAN/5-288 IHRDVKPENILITKH...................SVIKLCDFGFARLLTG -KKL6_YEAST/192-508 IHRDLKLENILLKYSFDDINSFRDSPIY...CKQNFIELADFGLCKKIE. -KMIL_AVIMH/82-339 IHRDMKSNNIFLHGG...................LTVKIGDFGLATVKSR -KML2_CHICK/1453-1708 VHLDLKPENIMCVNKTG.................TSIKLIDFGLARRLE. -KMOS_CERAE/60-338 VHLDLKPANILISEQ...................DVCKISDFGCSEKLED -KPBH_RAT/24-291 VHRDLKPENILLDDN...................MQIRLSDFGFSCHLE. -KPIM_HUMAN/38-290 LHRDIKDENILIDLN..................RGELKLIDFGSGALLK. -KPK2_PLAFK/111-364 VHRDLKMENLMLENPN.....D...........PSSLKIIDFGLASFLN. -KPRO_MAIZE/534-812 IHCDVKPENILLDQA...................FEPKITDFGLVKLLNR -KR1_HSV11/191-478 IHRDIKTENIFINTP...................EDICLGDFGAACFVQG -KR1_PRVKA/53-332 MHRDVKAENIFLEDV...................DTVCLGDLGAARCN.. -KS61_MOUSE/407-664 VHRDLKPSNILYVDESG....N...........PECLRICDFGFAKQLR. -KYK1_DICDI/1289-1559 LHRDLSSRNILLDHNIDPKNPVVSS......RQDIKCKISDFGLSRLKKE -KYK2_DICDI/108-364 VHRDLTSQNILLDEL...................GNIKISDFGLSAEKSR -MAK_RAT/4-284 FHRDMKPENLLCMGP...................ELVKIADFGLARELR. -MEK1_YEAST/162-444 VHRDLKLDNILLCTP.E....P...........CTRIVLADFGIAKDLN. -MET_HUMAN/1078-1337 VHRDLAARNCMLDEK...................FTVKVADFGLARDMYD -MKK1_YEAST/221-488 IHRDIKPQNILLNEN...................GQVKLCDFGVSGEAV. -MLK1_HUMAN/3-262 IHRDLKSSNILILQKVENGDLS...........NKILKITDFGLAREWH. -NINL_DROME/16-282 LHRDIRGDNILLTKN...................GRVKLCDFGLSRQVDS -NPR1_YEAST/438-742 AHRDLKLDNCVINEK...................GIVKLIDFGAAVVFSY -PHY_CERPU/1004-1282 IHRDLKSMNILVKSVKVTKSEIG..........YVHVKVADFGLSKTKDS -PKD1_DICDI/36-291 IYRDLKPENILIDEK...................GHIKLTDFGFSKKTVG -PKN1_MYXXA/59-321 VHRDLKPDNIFLVRRNGN...............APFVKVLDFGIAKLADA -RAN1_SCHPO/18-295 YHRDLKPENIMVGNDG..................NTVYLADFGLATTEPY -RYK_HUMAN/327-593 IHKDLAARNCVIDDT...................LQVKITDNALSRDLFP -SGV1_YEAST/60-366 MHRDIKTANILIDHN...................GVLKLADFGLARLYYG -SPK1_YEAST/198-466 SHRDLKPDNILIEQD......D...........PVLVKITDFGLAKVQG. -ST20_YEAST/620-871 LHRDIKSDNILLSME...................GDIKLTDFGFCAQIN. -STE7_YEAST/191-466 IHRDIKPSNVLINSK...................GQIKLCDFGVSKKLIN -SYK_PIG/364-619 VHRDLAARNVLLVTQ...................HYAKISDFGLSKALRA -TOP_DROME/938-1194 VHRDLAARNVLVQTP...................SLVKITDFGLAKLLSS -TRKA_HUMAN/504-775 VHRDLATRNCLVGQG...................LVVKIGDFGMSRDIYS -TTK_HUMAN/509-775 VHSDLKPANFLIVDG....................MLKLIDFGIANQMQP -WEE1_HUMAN/299-569 VHMDIKPSNIFISRTSI....P...........NAASEEGDEDDWASN.. - -7LES_DROME/2209-2481 S.................DYYRKEGEGLLPVRWMSPES............ -ABL1_CAEEL/296-547 D.................TYTAHAGAKFPIKWT.APEG............ -ARK1_BOVIN/191-453 ...................KKKPHASVGTHGYM.APEV............ -AVR2_HUMAN/192-479 G................KSAGDTHGQVGTRRYM.APEV............ -BFR2_HUMAN/367-643 .................IDYYKKTTNGRLPVKWMAPEA............ -BYR1_SCHPO/66-320 ....................SVAQTFVGTSTYM.SPER............ -BYR2_SCHPO/394-658 N............STSTKTGGARPSFQGSSFWM.APEV............ -CC15_YEAST/25-272 ....................SSALTLAGTLNWM.APEI............ -CC21_MEDSA/1-284 .................IPVRTFTHEVVTLWYR.APEIL........... -CC5_YEAST/82-337 ..................ESERKYTICGTPNYI.APEV............ -CDPK_SOYBN/34-292 ..................PGESFCDVVGSPYYV.APEV............ -CDR1_SCHPO/12-258 ..................NDSCLENYCGSLHYL.APEI............ -CHK1_SCHPO/10-272 K................GKSRLLNSPVGSPPYA.APEI............ -CLK1_MOUSE/160-476 ....................EHHSTLVSTRHYR.APEV............ -CTK1_YEAST/183-469 ..................SRADYTNRVITLWYR.PPEL............ -ERK1_CANAL/68-371 Q...............EDNYGFMTEYVATRWYR.APEI............ -ERK3_HUMAN/20-312 HYS...............HKGYLSEGLVTKWYR.SPRL............ -FUSE_DROME/4-254 .................LGTHVLTSIKGTPLYM.APEL............ -HR25_YEAST/9-273 FN...........THRHIPYRENKSLTGTARYA.SVNT............ -JAK1_HUMAN/571-833 ................IPITVLSRQECIERIPWIAPEC............ -JAK1_HUMAN/864-1137 D................KEYYTVKDDRDSPVFWYAPEC............ -KAB7_YEAST/1096-1354 ...................SGPFDVFVGTIDYA.APEV............ -KAKT_MLVAT/171-429 ..................DGATMKTFCGTPEYL.APEV............ -KC21_CHICK/39-324 ...................GQEYNVRVASRYFK.GPEL............ -KCC4_MOUSE/42-296 .................EHQVLMKTVCGTPGYC.APEI............ -KCR8_YEAST/316-590 AWEK..............NIHLSGGVCGSSPYI.APEEY........... -KG3A_RAT/119-403 ...................GEPNVSYICSRYYR.APEL............ -KGP1_DROME/457-717 ..................TSSKTWTFCGTPEYV.APEI............ -KI28_YEAST/7-290 .................APHEILTSNVVTRWYR.APEL............ -KI82_YEAST/324-602 SKKPTMKDSTYLDTKICSDGFRTNSFVGTEEYL.APEV............ -KIN1_SCHPO/125-395 ..................RQSRLRTFCGSLYFA.APEL............ -KIR1_HUMAN/208-495 STN..............QLDVGNNPRVGTKRYM.APEV............ -KKIA_HUMAN/5-288 ..................PSDYYTDYVATRWYR.SPEL............ -KKL6_YEAST/192-508 ..................NNEMCTARCGSEDYV.SPEI............ -KMIL_AVIMH/82-339 WS................GSQQVEQPTGSILWM.APEVIR.......... -KML2_CHICK/1453-1708 ..................SAGSLKVLFGTPEFV.APEV............ -KMOS_CERAE/60-338 ...............LLCFQTPLYPLGGTYTHR.APEL............ -KPBH_RAT/24-291 ..................PGEKLRELCGTPGYL.APEI............ -KPIM_HUMAN/38-290 ...................DTVYTDFDGTRVYS.PPEW............ -KPK2_PLAFK/111-364 ...................SPSMNMRCGSPGYV.APEI............ -KPRO_MAIZE/534-812 G................GSTQNVSHVRGTLGYI.APEW............ -KR1_HSV11/191-478 S................RSSPFPYGIAGTIDTN.APEV............ -KR1_PRVKA/53-332 .................VAAPNFYGLAGTIETN.APEV............ -KS61_MOUSE/407-664 .................AENGLLMTPCYTANFV.APEV............ -KYK1_DICDI/1289-1559 ..................QASQMTQSVGCIPYM.APEV............ -KYK2_DICDI/108-364 E................GSMTMTNGGICNPRWR.PPEL............ -MAK_RAT/4-284 ..................SQPPYTDYVSTRWYR.APEVL........... -MEK1_YEAST/162-444 .................SNKERMHTVVGTPEYC.APEVGFRANRKAYQSF -MET_HUMAN/1078-1337 K...............EYYSVHNKTGAKLPVKWMALES............ -MKK1_YEAST/221-488 ...................NSLATTFTGTSFYM.APER............ -MLK1_HUMAN/3-262 ...................RTTKMSAAGTYAWM.APEV............ -NINL_DROME/16-282 ..................TLGKRGTCIGSPCWM.APEVVS.......... -NPR1_YEAST/438-742 P..............FSKNLVEASGIVGSDPYL.APEVC........... -PHY_CERPU/1004-1282 S................TRYSNQTWNRGTNRWM.APEVINLG........ -PKD1_DICDI/36-291 ...................GKNTSSVCGTFDYM.APEI............ -PKN1_MYXXA/59-321 H................MPQTHAGIIVGTPEYM.APEQ............ -RAN1_SCHPO/18-295 SSDFGCG..SLFYMSPECQREVKKLSSLSDMLPVTPEP............ -RYK_HUMAN/327-593 .................MDYHCLGDNENRPVRWMALES............ -SGV1_YEAST/60-366 CPPNLKYPG......GAGSGAKYTSVVVTRWYR.APELV........... -SPK1_YEAST/198-466 ..................NGSFMKTFCGTLAYV.APEVIR.......... -ST20_YEAST/620-871 .................ELNLKRTTMVGTPYWM.APEV............ -STE7_YEAST/191-466 ....................SIADTFVGTSTYM.SPER............ -SYK_PIG/364-619 D................ENYYKAQTHGKWPVKWYAPEC............ -TOP_DROME/938-1194 D.................SNEYKAAGGKMPIKWLALEC............ -TRKA_HUMAN/504-775 .................TDYYRVGGRTMLPIRWMPPES............ -TTK_HUMAN/509-775 ................DTTSVVKDSQVGTVNYM.PPEAIKDMS....... -WEE1_HUMAN/299-569 ..................KVMFKIGDLGHVTRISSPQV............ - -7LES_DROME/2209-2481 .....LVD......GLFTTQSDVWAFGVLCWEILTLG............. -ABL1_CAEEL/296-547 .....LAF......NTFSSKSDVWAFGVLLWEIATYG............. -ARK1_BOVIN/191-453 .....LQKG.....VAYDSSADWFSLGCMLFKLLRG.............. -AVR2_HUMAN/192-479 .....LEGAIN.FQRDAFLRIDMYAMGLVLWELASRCTAADG........ -BFR2_HUMAN/367-643 .....LFD......RVYTHQSDVWSFGVLMWEIFTLG............. -BYR1_SCHPO/66-320 .....IRG......GKYTVKSDIWSLGISIIELATQ.............. -BYR2_SCHPO/394-658 .....VKQ......TMHTEKTDIWSLGCLVIEMLTS.............. -CC15_YEAST/25-272 .....LGN......RGASTLSDIWSLGATVVEMLTK.............. -CC21_MEDSA/1-284 .....LGS......RHYSTPVDVWSVGCIFAEMANRRP............ -CC5_YEAST/82-337 .....LMG....KHSGHSFEVDIWSLGVMLYALLIG.............. -CDPK_SOYBN/34-292 .....LR.......KLYGPESDVWSAGVILYILLS............... -CDR1_SCHPO/12-258 .....VSHK.....PYRGAPADVWSCGVILYSLLSN.............. -CHK1_SCHPO/10-272 ......TQ......QYDGSKVDVWSCGIILFALLLG.............. -CLK1_MOUSE/160-476 .....ILA......LGWSQPCDVWSIGCILIEYYLGFTVFPTHD...... -CTK1_YEAST/183-469 .....LLG.....TTNYGTEVDMWGCGCLLVELFNKTAIFQ......... -ERK1_CANAL/68-371 .....MLT.....FQEYTTAIDVWSVGCILAEMLSGRPLFPGRDYHNQLW -ERK3_HUMAN/20-312 .....LLSP.....NNYTKAIDMWAAGCILAEMLTG.............. -FUSE_DROME/4-254 .....LAD......EPYDHHADMWSLGCIAYESMAG.............. -HR25_YEAST/9-273 .....HLG......IEQSRRDDLESLGYVLIYFCKG.............. -JAK1_HUMAN/571-833 .....VEDS.....KNLSVAADKWSFGTTLWEICYNG............. -JAK1_HUMAN/864-1137 .....LMQ......SKFYIASDVWSFGVTLHELLTYCD............ -KAB7_YEAST/1096-1354 .....LGGN.....PYEGQPQDIWAIGILLYTVVFK.............. -KAKT_MLVAT/171-429 .....LED......NDYGRAVDWWGLGVVMYEMMCG.............. -KC21_CHICK/39-324 .....LVDY.....QMYDYSLDMWSLGCMLASMIFRKEP........... -KCC4_MOUSE/42-296 .....LRG......CAYGPEVDMWSVGIITYILL................ -KCR8_YEAST/316-590 .....IKE......EFDPRPVDIWACGVIYMAMRTG.............. -KG3A_RAT/119-403 .....IFG.....ATDYTSSIDVWSAGCVLAELLLGQPIFP......... -KGP1_DROME/457-717 .....ILN......KGHDRAVDYWALGILIHELLNG.............. -KI28_YEAST/7-290 .....LFG.....AKHYTSAIDIWSVGVIFAELMLRIPYLP......... -KI82_YEAST/324-602 .....IRG......NGHTAAVDWWTLGILIYEMLFG.............. -KIN1_SCHPO/125-395 .....LNAQ.....PYIGPEVDVWSFGIVLYVLVCG.............. -KIR1_HUMAN/208-495 .....LDETIQVDCFDSYKRVDIWAFGLVLWEVARRMVSNG......... -KKIA_HUMAN/5-288 .....LVG.....DTQYGPPVDVWAIGCVFAELLSGVPLWP......... -KKL6_YEAST/192-508 .....LMG.....VPYDGHLSDTWALGVILYSLFED.............. -KMIL_AVIMH/82-339 .....MQDS.....NPFSFQSDVYSYGIVLYELMTG.............. -KML2_CHICK/1453-1708 .....INY......EPIGYETDMWSIGVICYILVSG.............. -KMOS_CERAE/60-338 .....LKG......EGVTPKADIYSFAITLWQMTTK.............. -KPBH_RAT/24-291 .....LKCSMDETHPGYGKEVDLWACGVILFTLLAG.............. -KPIM_HUMAN/38-290 .....IRYH.....RYHGRSAAVWSLGILLYDMVCG.............. -KPK2_PLAFK/111-364 .....LKC......ASYGTKVDIFSLGVILFNIL................ -KPRO_MAIZE/534-812 .....VSS......LPITAKVDVYSYGVVLLELLTG.............. -KR1_HSV11/191-478 .....LAG......DPYTTTVDIWSAGLVIFETAVHNASLFSAPRGP... -KR1_PRVKA/53-332 .....LAR......DRYDTKVDVWGAGVVLFETLAYPKTITGG.DEP... -KS61_MOUSE/407-664 .....LKR......QGYDEGCDIWSLGILLYTMLAG.............. -KYK1_DICDI/1289-1559 .....FKG......DSNSEKSDVYSYGMVLFELLTS.............. -KYK2_DICDI/108-364 .....TKN.....LGHYSEKVDVYCFSLVVWEILTG.............. -MAK_RAT/4-284 .....LRS......SVYSSPIDVWAVGSIMAELYTFRPLFPG........ -MEK1_YEAST/162-444 SRAATLEQ......RGYDSKCDLWSLGVITHIMLTG.............. -MET_HUMAN/1078-1337 .....LQT......QKFTTKSDVWSFGVVLWELMTRG............. -MKK1_YEAST/221-488 .....IQG......QPYSVTSDVWSLGLTILEVANG.............. -MLK1_HUMAN/3-262 .....IRA......SMFSKGSDVWSYGVLLWELLTG.............. -NINL_DROME/16-282 ....AMESR....EPDITVRADVWALGITTIELADG.............. -NPR1_YEAST/438-742 .....IFA......KYDPRPVDIWSSAIIFACMILKKFPWKIPKLRDNSF -PHY_CERPU/1004-1282 YESTEGEISFDGKVPKYPLKSDVYSFGMVCYEVLTG.............. -PKD1_DICDI/36-291 .....LNSS.....NGHGKPVDWWALGVVVYELVTG.............. -PKN1_MYXXA/59-321 .....SLG......RGVDGRADLYALGVIAYQLLTG.............. -RAN1_SCHPO/18-295 .....IESQ...SSSFATAPNDVWALGIILINLCCK.............. -RYK_HUMAN/327-593 .....LVN......NEFSSASDVWAFGVTLWELMTLG............. -SGV1_YEAST/60-366 .....LGD......KQYTTAVDIWGVGCVFAEFFEKKP............ -SPK1_YEAST/198-466 GKDTSVSPDEYEERNEYSSLVDMWSMGCLVYVILTG.............. -ST20_YEAST/620-871 .....VSR......KEYGPKVDIWSLGIMIIEMIEG.............. -STE7_YEAST/191-466 .....IQG......NVYSIKGDVWSLGLMIIELVTG.............. -SYK_PIG/364-619 .....INY......YKFSSKSDVWSFGVLMWEAFSYG............. -TOP_DROME/938-1194 .....IRN......RVFTSKSDVWAFGVTIWELLTFG............. -TRKA_HUMAN/504-775 .....ILY......RKFTTESDVWSFGVVLWEIFTYG............. -TTK_HUMAN/509-775 .....SSRENGKSKSKISPKSDVWSLGCILYYMTYG.............. -WEE1_HUMAN/299-569 .....EEG......DSRFLANEVLQENYTHLPKA....DIFAL....... - -7LES_DROME/2209-2481 .....................QQPYAAR.......NNFEVLAHVKEG... -ABL1_CAEEL/296-547 .....................MAPYPG........VELSNVYGLLENG.. -ARK1_BOVIN/191-453 .....................HSPFRQ........HKTKDKHEIDRMT.. -AVR2_HUMAN/192-479 ................PVDEYMLPFEEEIGQHPSLEDMQEVVVHKKKR.. -BFR2_HUMAN/367-643 .....................GSPYPG........IPVEELFKLLKEG.. -BYR1_SCHPO/66-320 .....................ELPWSFSNI.DDSIGILDLLHCIVQE... -BYR2_SCHPO/394-658 .....................KHPYPN.......CDQMQAIFRIGEN... -CC15_YEAST/25-272 .....................NPPYHN........LTDANIYYAVEN... -CC21_MEDSA/1-284 ...LSPGDSEIDELFKIFRILGTPNED......TWPGVTSLPDFKSTFPR -CC5_YEAST/82-337 .....................KPPFQAR.......DVNTIYERIKCR... -CDPK_SOYBN/34-292 ....GVPP...........FWAESEP.........GIFRQILLGKLD... -CDR1_SCHPO/12-258 .....................KLPFGG........QNTDVIYNKIRHG.. -CHK1_SCHPO/10-272 .....................NTPWDEA......ISNTGDYLLYKKQCE. -CLK1_MOUSE/160-476 ..........SREHLAMMERILGPLPKHMIQKTRKRRYFHHDRLDWDEHS -CTK1_YEAST/183-469 ......GSNELEQIESIFKIMGTPTINSWP.TLYDMPWFFMIMPQQTTKY -ERK1_CANAL/68-371 LIMEVLGTPNMEDYYNIKSKRAREYIRSLP.FCKKIPFSELFANTNNN.. -ERK3_HUMAN/20-312 ...RMLFAGAHELEQMQLILETIPVIRE....EDKDELLRVMPSFVSSTW -FUSE_DROME/4-254 .....................QPPFCA........SSILHLVKMIKH... -HR25_YEAST/9-273 .....................SLPWQG....LKATTKKQKYDRIMEKK.. -JAK1_HUMAN/571-833 .....................EIPLKD........KTLIEKERFYES... -JAK1_HUMAN/864-1137 ...................SDSSPMALFLKMIGPTHGQMTVTRLVNTLK. -KAB7_YEAST/1096-1354 .....................ENPFY..........NIDEILEGDLK... -KAKT_MLVAT/171-429 .....................RLPFYN........QDHEKLFELILM... -KC21_CHICK/39-324 ..FFHGHDNYDQLVRIAKVLGTEDLYDYID.KYNIELDPRFNDILGRHSR -KCC4_MOUSE/42-296 ......CG.............FEPFYD........ERGDQFMFRRILNC. -KCR8_YEAST/316-590 .....................RQLWSSAE...KDDPFYMNYLKGRKEK.. -KG3A_RAT/119-403 ......GDSGVDQLVEIIKVLGTPTREQIR.EMNPNYTEFKFPQIKAHP. -KGP1_DROME/457-717 .....................TPPFSAP.......DPMQTYNLILKG... -KI28_YEAST/7-290 ......GQNDVDQMEVTFRALGTPTDRDWP.EVSSFMTYNKLQIYPPPS. -KI82_YEAST/324-602 .....................CTPFKG........DNSNETFSNILT... -KIN1_SCHPO/125-395 .....................KVPFDD........QNMSALHAKIKK... -KIR1_HUMAN/208-495 ................IVEDYKPPFYDVVPNDPSFEDMRKVVCVDQQ... -KKIA_HUMAN/5-288 ....GKSDVDQLYLIRKTLGDLIPRHQ......QVFSTNQYFSGVKIPDP -KKL6_YEAST/192-508 .....................RLPFDPP.....PNASARQRSRATSHR.. -KMIL_AVIMH/82-339 .....................ELPYSHIN...NRDQIIFMVGRGYASP.. -KML2_CHICK/1453-1708 .....................LSPFMG....DNDNETLANVTSATWD... -KMOS_CERAE/60-338 .....................QAPYSGE.....RQHILYAVVAYDLRP.. -KPBH_RAT/24-291 ......SP...............PFWHR....RQILMLRMIMEGQYQ... -KPIM_HUMAN/38-290 .....................DIPFEH..........DEEIIRGQVF... -KPK2_PLAFK/111-364 ......CG.............YPPFRG........NNVKEIFKKNMR.C. -KPRO_MAIZE/534-812 ............TRVSELVGGTDEVHSMLR.KLVRMLSAKLEGEEQSWID -KR1_HSV11/191-478 ..KRGPCDSQITRIIRQAQVHVDEFSPHPE.SRLTSRYRSRAAGNNRPP. -KR1_PRVKA/53-332 .AINGEMH..LIDLIRALGVHPEEFP..PD.TRLRSEFVRYAGTHRQP.. -KS61_MOUSE/407-664 ..YTPFAN............GPSDTP........EEILTRIGSGKFT... -KYK1_DICDI/1289-1559 .....................DEPQQD.......MKPMKMAHLAAYES.. -KYK2_DICDI/108-364 .....................EIPFSD.......LDGSQRSAQVAYAG.. -MAK_RAT/4-284 .......TSEVDEIFKICQVLGTPKKSD.....WPEGYQLASSMNFRFPQ -MEK1_YEAST/162-444 .....................ISPFYG........DGSERSIIQNAKIG. -MET_HUMAN/1078-1337 .....................APPYPD.......VNTFDITVYLLQG... -MKK1_YEAST/221-488 .....................KFPCSSEKM.AANIAPFELLMWILTFTP. -MLK1_HUMAN/3-262 .....................EVPFRG.......IDGLRVAYGVAMN... -NINL_DROME/16-282 .....................KPPFAD.......MHPTRAMFQIIRNP.. -NPR1_YEAST/438-742 KLFCSGRD.CDSLSSLVTRTPDPPSYDESH.STEKKKPESSSNNVSDPNN -PHY_CERPU/1004-1282 .....................DVPFPE.......EKNPNNVKRMVLEG.. -PKD1_DICDI/36-291 .....................KLPFS.........NSKESLLNRKAD... -PKN1_MYXXA/59-321 .....................RLPFND.......EGLAAQLVAHQLRPP. -RAN1_SCHPO/18-295 .....................RNPWKR......ACSQTDGTYRSYVHN.. -RYK_HUMAN/327-593 .....................QTPYVD.......IDPFEMAAYLKDG... -SGV1_YEAST/60-366 ...ILQGKTDIDQGHVIFKLLGTPTEEDWA.VARYLPGAELTTTNYKP.. -SPK1_YEAST/198-466 .....................HLPFSG....STQDQLYKQIGRGSYH... -ST20_YEAST/620-871 .....................EPPYLN.......ETPLRALYLIATNG.. -STE7_YEAST/191-466 .....................EFPLGG......HNDTPDGILDLLQRIVN -SYK_PIG/364-619 .....................QKPYRG........MKGSEVSAMLEKG.. -TOP_DROME/938-1194 .....................QRPHEN........IPAKDIPDLIEVG.. -TRKA_HUMAN/504-775 .....................KQPWYQ.......LSNTEAIDCITQG... -TTK_HUMAN/509-775 .....................KTPFQQ......IINQISKLHAIIDPN.. -WEE1_HUMAN/299-569 .ALTVVCA...........AGAEPLP.........RNGDQWHEIRQG... - -7LES_DROME/2209-2481 ..........GRLQQPPMCTEK....LYSLLLLCWRTDPWERP.....SF -ABL1_CAEEL/296-547 ..........FRMDGPQGCPPS....VYRLMLQCWNWSPSDRP.....RF -ARK1_BOVIN/191-453 ........LTMAVELPDSFSPE....LRSLLEGLLQRDVNRRLGCLGRGA -AVR2_HUMAN/192-479 .........PVLRDYWQKHAGMA..MLCETIEECWDHDAEARL.....SA -BFR2_HUMAN/367-643 ..........HRMDKPANCTNE....LYMMMRDCWHAVPSQRP.....TF -BYR1_SCHPO/66-320 ..........EPPRLPSSFPED....LRLFVDACLHKDPTLRAS.....P -BYR2_SCHPO/394-658 ..........ILPEFPSNISSS....AIDFLEKTFAIDCNLRP.....TA -CC15_YEAST/25-272 ..........DTYYPPSSFSEP....LKDFLSKCFVKNMYKRP.....TA -CC21_MEDSA/1-284 W......PSKDLATVVPNLEPA....GLDLLNSMLCLDPTKRI.....TA -CC5_YEAST/82-337 .........DFSFPRDKPISDE....GKILIRDILSLDPIERP.....SL -CDPK_SOYBN/34-292 ..........FHSEPWPSISDS....AKDLIRKMLDQNPKTRL.....TA -CDR1_SCHPO/12-258 ...........AYDLPSSISSA....AQDLLHRMLDVNPSTRI.....TI -CHK1_SCHPO/10-272 .........RPSYHPWNLLSPG....AYSIITGMLRSDPFKRY.....SV -CLK1_MOUSE/160-476 SAGRYVSRRCKPLKEFMLSQDAEHEFLFDLVGKILEYDPAKRI.....TL -CTK1_YEAST/183-469 V......NNFSEKFKSVLPSSK....CLQLAINLLCYDQTKRF.....SA -ERK1_CANAL/68-371 .......TSTSNTGGRTNINPL....ALDLLEKLLIFNPAKRI.....TV -ERK3_HUMAN/20-312 E......VKRPLRKLLPEVNSE....AIDFLEKILTFNPMDRL.....TA -FUSE_DROME/4-254 ..........EDVKWPSTLTCE....CRSFLQGLLEKDPGLRI.....SW -HR25_YEAST/9-273 ..........LNVSVETLCSGLP.LEFQEYMAYCKNLKFDEKP......D -JAK1_HUMAN/571-833 ..........RCRPVTPSCKE.....LADLMTRCMNYDPNQRP.....FF -JAK1_HUMAN/864-1137 ........EGKRLPCPPNCPDE....VYQLMRKCWEFQPSNRT.....SF -KAB7_YEAST/1096-1354 ............FNNAEEVSED....CIELIKSILNRCVPKRP.....TI -KAKT_MLVAT/171-429 ..........EEIRFPRTLGPE....AKSLLSGLLKKDPTQRLGGGSEDA -KC21_CHICK/39-324 KR....WERFVHSENQHLVSPE....ALDFLDKLLRYDHQSRL.....TA -KCC4_MOUSE/42-296 .......EYYFISPWWDEVSLN....AKDLVKKLIVLDPKKRL.....TT -KCR8_YEAST/316-590 .........GGYEPIESLKRAR....CRNVIYSMLDPVPYRRI.....NG -KG3A_RAT/119-403 ..........WTKVFKSRTPPE....AIALCSSLLEYTPSSRL.....SP -KGP1_DROME/457-717 .........IDMIAFPKHISRW....AVQLIKRLCRDVPSERLGYQTGGI -KI28_YEAST/7-290 ........RDELRKRFIAASEY....ALDFMCGMLTMNPQKRW.....TA -KI82_YEAST/324-602 ........KDVKFPHDKEVSKN....CKDLIKKLLNKNEAKRLGSKS.GA -KIN1_SCHPO/125-395 ..........GTVEYPSYLSSD....CKGLLSRMLVTDPLKRA.....TL -KIR1_HUMAN/208-495 ..........RPNIPNRWFSDPTLTSLAKLMKECWYQNPSARL.....TA -KKIA_HUMAN/5-288 E......DMEPLELKFPNISYP....ALGLLKGCLHMDPTERL.....TC -KKL6_YEAST/192-508 ........IARFDWRWYRLSDY....KTNVG.KQIVENTLTRKN.QRWSI -KMIL_AVIMH/82-339 ..........DLSKLYKNCPKA....MKRLVADCLKKVREERP.....LF -KML2_CHICK/1453-1708 ..........FDDEAFDEISDD....AKDFISNLLKKDMKSRL.....NC -KMOS_CERAE/60-338 ........SLSAAVFQDSLPGQ...RLGDVIRRCWRPSAAQRP.....SA -KPBH_RAT/24-291 ..........FSSPEWDDRSNT....VKDLIAKLLQVDPNARL.....TA -KPIM_HUMAN/38-290 ..............FRQRVSSE....CQHLIRWCLALRPSDRP.....TF -KPK2_PLAFK/111-364 .......HISFNTKHWINKSES....VKEIILWMCCKNPDDRC.....TA -KPRO_MAIZE/534-812 GY....LDSKLNRPVNYVQART....LIKLAVSCLEEDRSKRP.....TM -KR1_HSV11/191-478 .......YTRPAWTRYYKMDID....VEYLVCKALTFDGALRP.....SA -KR1_PRVKA/53-332 .......YTQYARVARLGLPET....GAFLIYKMLTFDPVRRP.....SA -KS61_MOUSE/407-664 ..........LSGGNWNTVSET....AKDLVSKMLHVDPHQRL.....TA -KYK1_DICDI/1289-1559 ..........YRPPIPLTTSSK....WKEILTQCWDSNPDSRP.....TF -KYK2_DICDI/108-364 ..........LRPPIPEYCDPE....LKLLLTQCWEADPNDRP.....PF -MAK_RAT/4-284 C......IPINLKTLIPNASSE....AIQLMTEMLNWDPKKRP.....TA -MEK1_YEAST/162-444 .......KLNFKLKQWDIVSDN....AKSFVKDLLQTDVVKRL.....NS -MET_HUMAN/1078-1337 ..........RRLLQPEYCPDP....LYEVMLKCWHPKAEMRP.....SF -MKK1_YEAST/221-488 .......ELKDEPESNIIWSPS....FKSFIDYCLKKDSRERP.....SP -MLK1_HUMAN/3-262 .........KLALPIPSTCPEP....FAKLMEDCWNPDPHSRP.....SF -NINL_DROME/16-282 .........PPTLMRPTNWSKQ....INDFISESLEKNAENRP.....MM -NPR1_YEAST/438-742 .......VNIGPQRLLHSLPEE....TQHIVGRMIDLAPACRG.....NI -PHY_CERPU/1004-1282 ..........VRPDLPAHCPIE....LKALITDCWNQDPLKRP......S -PKD1_DICDI/36-291 .........FQLIFQNSYLSDE....IKDFIFQLLSVDPSKRLG..TFDS -PKN1_MYXXA/59-321 .........PPPSSVYPAVSAA....LEHVILRALAKKPEDRY....ASI -RAN1_SCHPO/18-295 .........PSTLLSILPISRE....LNSLLNRIFDRNPKTRI.....TL -RYK_HUMAN/327-593 ..........YRIAQPINCPDE....LFAVMACCWALDPEERP.....KF -SGV1_YEAST/60-366 .........TLRERFGKYLSET....GLDFLGQLLALDPYKRL.....TA -SPK1_YEAST/198-466 ..........EGPLKDFRISEE....ARDFIDSLLQVDPNNRS.....TA -ST20_YEAST/620-871 .........TPKLKEPENLSSS....LKKFLDWCLCVEPEDRA.....SA -STE7_YEAST/191-466 .......EPSPRLPKDRIYSKE....MTDFVNRCCIKNERERS.....SI -SYK_PIG/364-619 ..........ERMGCPPGCPRE....MYELMTLCWTYDVENRP.....GF -TOP_DROME/938-1194 ..........LKLEQPEICSLD....IYCTLLSCWHLDAAMRP.....TF -TRKA_HUMAN/504-775 ..........RELERPRACPPE....VYAIMRGCWQREPQQRH.....SI -TTK_HUMAN/509-775 ..........HEIEFPDIPEKD....LQDVLKCCLKRDPKQRI.....SI -WEE1_HUMAN/299-569 .........RLPRIPQVLSQE.....FTELLKVMIHPDPERRP.....SA - -7LES_DROME/2209-2481 RRCYNTL... -ABL1_CAEEL/296-547 RDIHFNL... -ARK1_BOVIN/191-453 QEVKESPFF. -AVR2_HUMAN/192-479 GCVGERI... -BFR2_HUMAN/367-643 KQLVEDL... -BYR1_SCHPO/66-320 QQLCAMPYF. -BYR2_SCHPO/394-658 SELLSHPFV. -CC15_YEAST/25-272 DQLLKHVWI. -CC21_MEDSA/1-284 RSAVEHEYF. -CC5_YEAST/82-337 TEIMDYVWF. -CDPK_SOYBN/34-292 HEVLRHPWI. -CDR1_SCHPO/12-258 PEFFSHPFL. -CHK1_SCHPO/10-272 KHVVQHPWL. -CLK1_MOUSE/160-476 KEALKHPFF. -CTK1_YEAST/183-469 TEALQSDYF. -ERK1_CANAL/68-371 EDALKHPYL. -ERK3_HUMAN/20-312 EMGLQHPYM. -FUSE_DROME/4-254 TQLLCHPFV. -HR25_YEAST/9-273 YLFLARLF.. -JAK1_HUMAN/571-833 RAIMRDI... -JAK1_HUMAN/864-1137 QNLIEGF... -KAB7_YEAST/1096-1354 DDINNDKWL. -KAKT_MLVAT/171-429 KEIMQHRFF. -KC21_CHICK/39-324 REAMEHPYF. -KCC4_MOUSE/42-296 FQALQHPWV. -KCR8_YEAST/316-590 KQILNSEWG. -KG3A_RAT/119-403 LEACAHSFF. -KGP1_DROME/457-717 QDIKKHKWF. -KI28_YEAST/7-290 VQCLESDYF. -KI82_YEAST/324-602 ADIKRHPFF. -KIN1_SCHPO/125-395 EEVLNHPWM. -KIR1_HUMAN/208-495 LRIKKTL... -KKIA_HUMAN/5-288 EQLLHHPYF. -KKL6_YEAST/192-508 NEIYESPFV. -KMIL_AVIMH/82-339 PQILSSI... -KML2_CHICK/1453-1708 TQCLQHPWL. -KMOS_CERAE/60-338 RPLLVDL... -KPBH_RAT/24-291 EQALQHPFF. -KPIM_HUMAN/38-290 EEIQNHPWM. -KPK2_PLAFK/111-364 LQALGHQWF. -KPRO_MAIZE/534-812 EHAVQTL... -KR1_HSV11/191-478 AELLCLPLF. -KR1_PRVKA/53-332 DEILNFGMW. -KS61_MOUSE/407-664 KQVLQHPWI. -KYK1_DICDI/1289-1559 KQIIVHL... -KYK2_DICDI/108-364 TYIVNKL... -MAK_RAT/4-284 SQALKHPYF. -MEK1_YEAST/162-444 KQGLKHIWI. -MET_HUMAN/1078-1337 SELVSRI... -MKK1_YEAST/221-488 RQMINHPWI. -MLK1_HUMAN/3-262 TNILDQL... -NINL_DROME/16-282 VEMVEHPFL. -NPR1_YEAST/438-742 EEIMEDPWI. -PHY_CERPU/1004-1282 FAVICQKL.. -PKD1_DICDI/36-291 CSIRNHKWF. -PKN1_MYXXA/59-321 AAFRNALQVA -RAN1_SCHPO/18-295 PELSTLV... -RYK_HUMAN/327-593 QQLVQCL... -SGV1_YEAST/60-366 MSAKHHPWF. -SPK1_YEAST/198-466 AKALNHPWI. -ST20_YEAST/620-871 TELLHDEYI. -STE7_YEAST/191-466 HELLHHDLI. -SYK_PIG/364-619 VAVELRL... -TOP_DROME/938-1194 KQLTTVF... -TRKA_HUMAN/504-775 KDVHARL... -TTK_HUMAN/509-775 PELLAHPYV. -WEE1_HUMAN/299-569 MALVKHSVL. - diff --git a/forester/archive/RIO/others/hmmer/tutorial/rrm.hmm b/forester/archive/RIO/others/hmmer/tutorial/rrm.hmm deleted file mode 100644 index 509d6d6..0000000 --- a/forester/archive/RIO/others/hmmer/tutorial/rrm.hmm +++ /dev/null @@ -1,237 +0,0 @@ -HMMER2.0 -NAME rrm -DESC -LENG 72 -ALPH Amino -RF no -CS no -MAP yes -COM ../src/hmmbuild -F rrm.hmm rrm.slx -COM ../src/hmmcalibrate rrm.hmm -NSEQ 70 -DATE Wed Jul 8 08:13:25 1998 -CKSUM 2768 -XT -8455 -4 -1000 -1000 -8455 -4 -8455 -4 -NULT -4 -8455 -NULE 595 -1558 85 338 -294 453 -1158 197 249 902 -1085 -142 -21 -313 45 531 201 384 -1998 -644 -EVD -53.840649 0.214434 -HMM A C D E F G H I K L M N P Q R S T V W Y - m->m m->i m->d i->m i->i d->m d->d b->m m->e - -21 * -6129 - 1 -1234 -371 -8214 -7849 -5304 -8003 -7706 2384 -7769 2261 -681 -7660 -7694 -7521 -7816 -7346 -5543 1527 -6974 -6639 1 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -11 -11284 -12326 -894 -1115 -701 -1378 -21 * - 2 -3634 -3460 -5973 -5340 3521 -2129 -4036 -831 -2054 -1257 -2663 -4822 -5229 -4557 -4735 -1979 -1569 -1476 -3893 3439 2 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -11 -11284 -12326 -894 -1115 -701 -1378 * * - 3 -5570 838 -8268 -7958 -5637 -8152 -8243 2427 -7947 -461 -539 -7805 -7843 -7878 -8124 -7550 -5559 3130 -7481 -7000 3 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -11 -11284 -12326 -894 -1115 -701 -1378 * * - 4 -1146 -4797 -1564 -2630 -1480 2769 -2963 -1850 992 -4812 -3887 737 -4397 -120 793 -205 -1019 -4418 -4981 -1059 4 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -11 -11284 -12326 -894 -1115 -701 -1378 * * - 5 -5242 -7035 445 -3538 -7284 1773 -4583 -7166 -4676 -7046 -6312 3633 -1651 -1262 -849 -1278 -5287 -6650 -7228 -291 5 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -11 -11284 -12326 -894 -1115 -701 -1378 * * - 6 -6898 -6238 -9292 -8703 -410 -9176 -7772 820 -8535 3071 -753 -8917 -8033 -7171 -7955 -8614 -6722 5 -6136 -6414 6 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 278 394 45 96 359 117 -369 -294 -249 - - -33 -6025 -12326 -153 -3315 -701 -1378 * * - 7 -5 -5297 178 -2982 -5685 -2278 -528 -5452 -1615 -5394 -4488 1396 3136 -3022 -3659 780 976 -4981 -5565 -4854 8 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -11 -11284 -12327 -894 -1115 -701 -1378 * * - 8 -3329 -4799 -805 543 789 -4303 572 -4868 140 -1087 -3888 -603 1691 530 183 -162 293 -2124 2317 2037 9 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -12 -11284 -12327 -894 -1115 -701 -1378 * * - 9 -373 -4801 2182 1353 -1426 44 -407 -1928 -366 -4817 -3891 1263 -4395 -1080 -666 295 50 -1947 -4985 397 10 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -12 -11285 -12327 -894 -1115 -701 -1378 * * - 10 450 1883 -5953 -5317 -1256 -1301 -4027 1322 -1847 -283 1542 -4802 -5206 -1502 -4713 -4241 2143 1615 -3893 -3551 11 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -12 -11285 -12327 -894 -1115 -701 -1378 * * - 11 -1786 -4835 1027 -807 -5155 -1278 -2989 -4907 -410 -4850 -3924 957 -4421 -943 -250 670 3048 -4456 -5017 -4333 12 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -12 -11285 -12327 -894 -1115 -701 -1378 * * - 12 -3329 -4802 1324 2670 -5123 -4302 -2961 -4874 732 -2424 -3891 -457 -262 553 250 -694 -989 -4424 1772 -1014 13 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -12 -11285 -12327 -894 -1115 -701 -1378 * * - 13 -325 -4802 1515 2286 -5123 -2017 868 -4874 260 -2865 -1087 -2938 -4395 2006 -810 492 -1754 -4424 -4985 -4302 14 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -12 -11285 -12327 -894 -1115 -701 -1378 * * - 14 -337 -4801 2075 1854 -5121 -723 -567 -1924 73 -634 -194 -1227 -4396 1588 -3049 -212 -414 -4422 -5 -4302 15 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -12 -11285 -12327 -894 -1115 -701 -1378 * * - 15 -6843 -6192 -9252 -8675 -481 -9132 -7773 1557 -8511 2856 467 -8869 -8024 -7180 -7953 -8566 -6676 459 -6154 -6421 16 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -12 -11285 -12327 -894 -1115 -701 -1378 * * - 16 5 -4654 -1525 936 444 -4347 -3013 -1809 2193 -441 -3760 -441 -4438 -2577 1775 -91 -3285 -1104 180 -259 17 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -12 -11285 -12327 -894 -1115 -701 -1378 * * - 17 -97 -4802 2341 1548 -5123 -2042 -2961 -4874 -347 -2479 -194 -5 -726 1566 807 -1858 42 -4424 -4985 -4302 18 - - -146 -501 232 42 -381 398 105 -627 210 -463 -721 275 393 44 95 361 116 -370 -295 -242 - - -45 -5457 -12327 -1928 -440 -701 -1378 * * - 18 358 -3435 -5945 -1175 1490 -5154 1309 1157 -1944 1759 -387 -4797 -5204 -4530 -1684 -4238 -376 166 -3893 1330 23 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -12 -11285 -12327 -894 -1115 -701 -1378 * * - 19 -2191 733 -7910 -7364 4360 -7323 -5649 -1557 -7016 -750 -407 -6877 -7039 -6263 -6681 -6482 -5572 -4211 -4950 -1019 24 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -12 -11285 -12328 -894 -1115 -701 -1378 * * - 20 -83 -4801 -3176 698 -5121 1566 -2961 -1977 942 -4817 -3890 -239 -4396 582 256 1807 -874 -1745 -4984 -1334 25 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -13 -11286 -12328 -894 -1115 -701 -1378 * * - 21 -1216 -4802 -289 1083 -1452 -655 -584 -4874 1345 -4818 -3891 964 1488 2130 -3049 -310 107 -2012 -4985 -1334 26 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -13 -11286 -12328 -894 -1115 -701 -1378 * * - 22 -45 1344 -1667 -843 2933 -2146 400 582 -4479 -1948 -2709 -506 -5117 -436 -1764 -4119 -3523 -96 215 2616 27 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -13 -11286 -12328 -894 -1115 -701 -1378 * * - 23 -556 -4294 -4426 -1796 -273 3377 -4149 -4100 -4273 -2279 -3695 -562 298 -4067 -4575 -1940 -3954 -3921 -4866 -77 28 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -13 -11286 -12328 -894 -1115 -701 -1378 * * - 24 -376 -4801 -143 1004 -1426 805 279 -1771 821 -1486 -3890 -527 2002 126 45 -287 -1679 -617 -4985 -4302 29 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -13 -11286 -12328 -894 -1115 -701 -1378 * * - 25 -3608 -178 -1585 -1970 660 -5154 -4024 2773 -894 -985 -386 -4796 -1707 -4528 -4707 -609 -1823 2145 -3893 -1100 30 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -13 -11286 -12328 -894 -1115 -701 -1378 * * - 26 -673 -173 -3429 1042 -4598 -2161 -3110 535 1570 9 283 -508 -4517 -255 382 -1924 313 1407 -4706 -4127 31 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -13 -11286 -12328 -894 -1115 -701 -1378 * * - 27 -1211 -4799 1518 768 -5119 -1218 -441 -945 -1312 -2414 -587 909 -4396 -1010 534 1815 78 -487 -4983 -128 32 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -13 -11286 -12328 -894 -1115 -701 -1378 * * - 28 1271 2236 -5933 -5299 810 -2278 -651 1901 -1970 -221 -2639 -1497 -5203 -4524 -629 -638 -1577 1521 -3894 -1008 33 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -13 -11286 -12328 -894 -1115 -701 -1378 * * - 29 -1909 -4796 153 441 -1513 -4304 -599 -1894 1709 25 -3886 689 -1498 243 1438 -189 -879 380 -126 -255 34 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -13 -11286 -12329 -894 -1115 -701 -1378 * * - 30 -1277 -3441 -5893 -1776 -1155 -5147 -513 1829 -1993 1189 1888 -1484 -703 -4503 -1652 -1974 -3546 2209 -3898 -3554 35 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -14 -11287 -12329 -894 -1115 -701 -1378 * * - 31 -1299 746 -5893 -1992 -1190 -5147 -524 1691 424 -60 2330 -4774 111 -4503 -132 248 -1571 1419 -3898 -19 36 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -14 -11287 -12329 -894 -1115 -701 -1378 * * - 32 -3370 -4477 -3387 50 -560 -1979 -449 -51 1375 -681 233 1068 701 -1040 1343 -1845 543 -480 -10 1246 37 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -240 -11287 -2797 -894 -1115 -701 -1378 * * - 33 -3122 -4595 3395 -593 -4916 -1399 589 -1433 360 -4611 -290 780 -1313 35 -1369 -1782 -3061 -1712 -4778 -4095 38 - - -151 -504 236 42 -380 396 122 -618 211 -468 -714 274 392 45 98 355 123 -373 -299 -248 - - -841 -2976 -1709 -1966 -426 -3668 -118 * * - 34 -452 -4116 -568 -735 -4435 -1350 -2280 -1270 1458 -4131 792 -2257 1620 415 1996 479 -765 -1327 -4300 -538 48 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -15 -10529 -11571 -894 -1115 -1180 -840 * * - 35 272 -4448 -1054 1495 -1086 -283 -2616 -726 380 -1231 -3538 1286 -4050 1395 -988 154 68 50 -4633 -876 49 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -14 -10906 -11948 -894 -1115 -2229 -346 * * - 36 -3050 -4521 457 -2349 -4841 -1681 65 -1545 404 -2305 -3610 996 -1241 -714 -1055 -351 3167 -4143 -4705 -4022 50 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -14 -10981 -12024 -894 -1115 -2036 -403 * * - 37 -943 -4583 277 -486 -4904 2690 -181 -1421 829 -2551 -758 866 -4177 -751 11 -804 -1361 -4205 -4766 -4084 51 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -15 -11049 -12091 -894 -1115 -2632 -254 * * - 38 -1544 -4606 -1206 -627 -1238 -1111 -220 -4677 1841 -1463 -537 -311 146 1310 2236 252 -1424 -1820 -4789 -1025 52 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -15 -11074 -12116 -894 -1115 -1795 -490 * * - 39 -871 902 -3255 -2704 -1212 -2110 605 -4156 -647 -1293 101 192 1442 -2552 91 2587 -171 -3858 -4584 -3996 53 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -15 -11128 -12170 -894 -1115 -1064 -938 * * - 40 -3251 -4717 -597 -2552 -1539 -1882 45 -4784 2499 -1083 -3807 -1125 -312 -892 2672 -1497 -649 -1932 -4902 -1040 54 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -15 -11202 -12244 -894 -1115 -158 -3269 * * - 41 -4425 -5751 -1160 -3492 -6118 3496 -552 -1896 -1318 -2596 -4883 -434 -258 -3375 -548 -4283 -4348 -5409 -5833 -5262 55 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -15 -11288 -12330 -894 -1115 -701 -1378 * * - 42 -3608 -96 -1795 -5308 3204 -5154 498 -1086 -989 -1857 1406 -4797 -5204 -807 -4709 -4238 -268 -366 187 3035 56 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -15 -11288 -12330 -894 -1115 -701 -1378 * * - 43 2573 2359 -7700 -8052 -7623 2634 -6965 -7447 -7655 -7712 -6731 -6019 -5985 -7072 -7238 -2014 -4755 -2203 -7845 -7842 57 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -15 -11288 -12330 -894 -1115 -701 -1378 * * - 44 -1896 -3552 -6072 -5447 4093 -5277 -4115 -1389 -5044 -1849 -2748 -4920 -5327 -4660 -4842 -2020 -787 -772 -3948 1996 58 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -15 -11288 -12330 -894 -1115 -701 -1378 * * - 45 -2123 1258 -8228 -7927 -5768 -8106 -8270 1951 -7921 -982 -4434 -7761 -7830 -7926 -8131 -7503 -5516 3355 -7605 -7039 59 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -16 -11288 -12331 -894 -1115 -701 -1378 * * - 46 -1158 -4801 136 2359 -5122 -4302 -508 -644 437 -2559 -3890 628 -4395 -213 172 18 1464 -2067 -4985 -1086 60 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -16 -11289 -12331 -894 -1115 -701 -1378 * * - 47 -7925 -6836 -8294 -8655 4067 -8176 -4357 -6786 -8211 -6080 795 -6785 -8028 -6925 -7569 -7427 -7774 -6956 -3603 3066 61 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -16 -11289 -12331 -894 -1115 -701 -1378 * * - 48 -633 -4801 851 2019 -1639 -2148 879 -1118 1178 -2414 -3891 -481 -71 241 -1485 -232 744 -569 -4985 -4302 62 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -16 -11289 -12331 -894 -1115 -701 -1378 * * - 49 -3331 -4805 2054 434 -5126 -1882 -432 -4877 377 -4821 -3894 2009 -4398 -269 -1336 1291 1198 -1970 -4988 -4305 63 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -16 -11289 -12331 -894 -1115 -701 -1378 * * - 50 -638 -4800 -1786 1796 -5120 -1884 1628 -1952 812 -444 -621 -1191 1228 530 -672 8 -873 45 -4983 -276 64 - - -149 -500 232 43 -381 398 105 -627 210 -466 -721 277 393 45 95 359 119 -370 -295 -239 - - -38 -6076 -12331 -1893 -453 -701 -1378 * * - 51 243 -4801 1218 2315 -5122 -1551 -485 -1640 -795 -2479 -783 -420 -685 -1027 1035 415 -3268 -631 -23 -4302 69 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -16 -11289 -12332 -894 -1115 -701 -1378 * * - 52 415 694 2467 1155 -1401 -4334 -490 -1800 -2599 -4689 -637 -384 -1759 -12 -3098 1144 -834 -569 -4907 -271 70 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -17 -11290 -12332 -894 -1115 -701 -1378 * * - 53 2846 -3442 -1698 -5254 -979 -5146 -4014 -750 -4864 -773 1875 -4771 -5197 -1456 -1779 -127 -329 428 -3898 -3555 71 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -17 -11290 -12332 -894 -1115 -701 -1378 * * - 54 581 -4801 1239 1462 -5122 -1606 -432 -367 1251 -1623 -3891 335 -4395 1283 -110 -3209 753 -1920 -4985 -4302 72 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -17 -11290 -12332 -894 -1115 -701 -1378 * * - 55 686 -4798 937 304 -1378 -4303 -437 -1924 2219 -1669 -621 828 -4396 -1012 742 0 -1608 -1126 -4982 -1015 73 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -17 -11290 -12332 -894 -1115 -701 -1378 * * - 56 3420 863 -7680 -7410 -5526 -6323 -6681 -57 -7168 -2455 -4425 -6591 -6708 -6875 -7058 -2256 -4981 -4 -6573 -6193 74 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -17 -11290 -12332 -894 -1115 -701 -1378 * * - 57 -2038 -3436 -5943 -5308 -1145 -5154 -4025 2255 423 1498 1203 -4797 -1707 -478 -1267 -2117 -3548 1450 -3893 -931 75 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -18 -11291 -12333 -894 -1115 -701 -1378 * * - 58 622 -4802 1764 1486 -5123 -4302 -2961 -1060 334 -4818 -3891 -420 -4396 1293 1148 487 -3268 -1087 -4985 -429 76 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -102 -11291 -4156 -894 -1115 -701 -1378 * * - 59 1265 -231 -1498 1351 -5045 -262 -355 -4796 922 -1073 -3813 778 -4318 877 -34 53 386 -2030 289 -4225 77 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -18 -11207 -12249 -894 -1115 -160 -3250 * * - 60 -684 813 -5723 -473 532 -2124 -3981 -2958 -121 2114 2840 -1421 -5174 -4409 -926 -4196 -1685 -376 -3915 497 78 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -18 -11291 -12333 -894 -1115 -701 -1378 * * - 61 -1812 -4803 1626 -749 -515 -1133 -415 -4875 -1294 -4819 -3892 3181 -793 1470 -1377 -246 -3268 -4425 -4986 -193 79 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -18 -11291 -12333 -894 -1115 -701 -1378 * * - 62 -1812 -4808 -1465 33 -1509 2998 1583 -4879 122 -4823 -3897 972 -4400 -1078 -3055 -1613 -682 -4429 -4991 -1114 80 - - -149 -500 232 43 -378 398 105 -627 212 -466 -721 275 393 45 98 359 117 -367 -295 -250 - - -98 -4229 -12334 -49 -4901 -701 -1378 * * - 63 -676 -4701 -742 -1422 825 -589 -545 255 1702 -2571 812 -2986 -4424 796 418 -221 1302 -1179 -4912 1028 82 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -19 -11292 -12334 -894 -1115 -701 -1378 * * - 64 -3341 -4695 350 1378 -1551 -1973 -2998 477 1265 78 273 -1163 21 504 -1507 -1108 282 114 -19 473 83 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -19 -11292 -12334 -894 -1115 -701 -1378 * * - 65 -3605 -3444 -949 -2090 2356 -1177 -4010 1410 -1703 1341 -404 -1673 -747 -4487 -4679 -2139 -1048 1197 -3900 411 84 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -19 -11292 -12334 -894 -1115 -701 -1378 * * - 66 -655 -539 1179 279 -1324 1202 -2962 -1895 147 -682 1298 1427 -2056 608 756 -1119 -1893 -4419 -4982 140 85 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -19 -11292 -12335 -894 -1115 -701 -1378 * * - 67 -1814 -4814 166 -2636 -5135 2921 -568 -4885 -1333 -2415 -3903 1495 -4406 -312 -619 602 -1672 -4436 -4997 -4314 86 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -20 -11293 -12335 -894 -1115 -701 -1378 * * - 68 -3329 1217 -624 -797 -1594 -4303 1580 -4872 2069 -2414 -3890 617 -4396 283 2449 -560 -267 -2067 -4984 -1334 87 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -20 -11293 -12335 -894 -1115 -701 -1378 * * - 69 108 566 -1460 747 -1608 -4306 -2965 -30 1407 -2607 -3878 346 1033 -336 863 -1038 745 617 -4975 -4296 88 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -20 -11293 -12335 -894 -1115 -701 -1378 * * - 70 -1318 -3465 -283 -172 -3423 -2053 -3974 1957 -4721 1761 1425 -4678 -1762 -4391 -1578 -1974 -1561 1341 -3918 -3570 89 - - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 - - -20 -11293 -12336 -894 -1115 -701 -1378 * * - 71 -1165 -4790 -240 -275 -5105 -4306 1035 -2009 1665 -395 707 -1334 -218 -188 1891 -1077 -383 404 110 348 90 - - -149 -500 233 43 -381 398 106 -626 210 -464 -720 275 394 45 96 359 117 -369 -294 -249 - - -43 -6001 -12336 -150 -3342 -701 -1378 * * - 72 -1929 1218 -1535 -1647 -3990 -4677 -3410 1725 207 -1481 -3117 -3608 -810 -1118 -743 -1942 428 2687 -4325 -3869 92 - - * * * * * * * * * * * * * * * * * * * * - - * * * * * * * * 0 -// diff --git a/forester/archive/RIO/others/hmmer/tutorial/rrm.slx b/forester/archive/RIO/others/hmmer/tutorial/rrm.slx deleted file mode 100644 index ea84b12..0000000 --- a/forester/archive/RIO/others/hmmer/tutorial/rrm.slx +++ /dev/null @@ -1,167 +0,0 @@ -#=ID rrm -#=AC PF00076 -#=DE RNA recognition motif. (aka RRM, RBD, or RNP domain) -# AU Sean Eddy -# GA HMM_iterative_training -# GA Bic_raw 25 hmmls 10 -# CC There is no separation between signal and noise. -# AL HMM_simulated_annealing -# AM hmma -qR -# SE Published_alignment -# RN [1] -# RM 94119674 -# RA Birney E., Kumar S., Krainer A.R. -# RL NAR 21:5803-5816(1993). -# DR PROSITE; PDOC00030; -# DR SCOP; 1SXL; sf; -# CC -!- The RRM motif is probably diagnostic of an RNA binding protein. -# CC -!- RRMs are found in a variety of RNA binding proteins, including -# CC various hnRNP proteins, proteins implicated in regulation -# CC of alternative splicing, and protein components -# CC of snRNPs. The motif also appears in a few single stranded -# CC DNA binding proteins. -# CC -!- The RRM structure consists of four strands -# CC and two helices arranged in an alpha/beta sandwich. -# SQ 70 -CABA_MOUSE/77-148 MFVGGL.SWDTSKKDLKD....YFTKFGEVVDCTIKMD.........PNT -CABA_MOUSE/161-232 IFVGGL.NPEATEEKIRE....YFGQFGEIEAIELPID.........PKL -CST2_HUMAN/18-89 VFVGNI.PYEATEEQLKD....IFSEVGPVVSFRLVYD.........RET -ELAV_DROME/250-322 LYVSGL.PKTMTQQELEA....IFAPFGAIITSRILQNa........GND -ELAV_DROME/404-475 IFIYNL.APETEEAALWQ....LFGPFGAVQSVKIVKD.........PTT -EWS_HUMAN/363-442 IYVQGL.NDSVTLDDLAD....FFKQCGVVKMNKRTGQpmih.iyldKET -GBP2_YEAST/124-193 IFVRNL.TFDCTPEDLKE....LFGTVGEVVEADIIT...........SK -GBP2_YEAST/221-291 VFIINL.PYSMNWQSLKD....MFKECGHVLRADVELD..........FN -GBP2_YEAST/351-421 IYCSNL.PFSTARSDLFD....LFGPIGKINNAELKPQ..........EN -GR10_BRANA/8-79 CFVGGL.AWATGDAELER....TFSQFGEVIDSKIIND.........RET -HUD_HUMAN/48-119 LIVNYL.PQNMTQEEFRS....LFGSIGEIESCKLVRD.........KIT -IF4B_HUMAN/98-169 AFLGNL.PYDVTEESIKE....FFRGLNISAVRLPREP.........SNP -MSSP_HUMAN/31-102 LYIRGL.PPHTTDQDLVK....LCQPYGKIVSTKAILD.........KTT -NAM8_YEAST/165-237 IFVGDL.APNVTESQLFE....LFiNRYASTSHAKIVHD........QVT -NOP3_YEAST/127-190 LFVRPF.PLDVQESELNE....IFGPFGPMKEVKILN............. -NOP3_YEAST/202-270 ITMKNL.PEGCSWQDLKD....LARENSLETTFSSVN............T -NOP4_YEAST/28-98 LFVRSI.PQDVTDEQLAD....FFSNFAPIKHAVVVKD..........TN -NOP4_YEAST/292-367 VFVRNV.PYDATEESLAP....HFSKFGSVKYALPVID.........KST -NSR1_YEAST/170-241 IFVGRL.SWSIDDEWLKK....EFEHIGGVIGARVIYE.........RGT -NSR1_YEAST/269-340 LFLGNL.SFNADRDAIFE....LFAKHGEVVSVRIPTH.........PET -NUCL_CHICK/283-353 LFVKNL.TPTKDYEELRT....AIKEFFGKKNLQVSEV..........RI -NUCL_CHICK/373-440 LFVKNL.PYRVTEDEMKN....VFENALEVRLVLNKE............. -PABP_DROME/4-75 LYVGDL.PQDVNESGLFD....KFSSAGPVLSIRVCRD.........VIT -PABP_DROME/92-162 VFIKNL.DRAIDNKAIYD....TFSAFGNILSCKVATD..........EK -PABP_DROME/183-254 VYVKNF.TEDFDDEKLKE....FFEPYGKITSYKVMSK..........ED -PABP_SCHPO/249-319 VYIKNL.DTEITEQEFSD....LFGQFGEITSLSLVKD..........QN -PES4_YEAST/93-164 LFIGDL.HETVTEETLKG....IFKKYPSFVSAKVCLD.........SVT -PES4_YEAST/305-374 IFIKNL.PTITTRDDILN....FFSEVGPIKSIYLSN...........AT -PSF_HUMAN/373-443 LSVRNL.SPYVSNELLEE....AFSQFGPIERAVVIVD..........DR -PUB1_YEAST/76-146 LYVGNL.DKAITEDILKQ....YFQVGGPIANIKIMID..........KN -PUB1_YEAST/163-234 LFVGDL.NVNVDDETLRN....AFKDFPSYLSGHVMWD.........MQT -PUB1_YEAST/342-407 AYIGNI.PHFATEADLIP....LFQNFGFILDFKHYPE............ -RB97_DROME/34-105 LFIGGL.APYTTEENLKL....FYGQWGKVVDVVVMRD.........AAT -RN15_YEAST/20-91 VYLGSI.PYDQTEEQILD....LCSNVGPVINLKMMFD.........PQT -RNP1_YEAST/37-109 LYVGNL.PKNCRKQDLRD....LFEPNYGKITINMLKKk........PLK -RO28_NICSY/99-170 LFVGNL.PYDIDSEGLAQ....LFQQAGVVEIAEVIYN.........RET -RO33_NICSY/116-187 LYVGNL.PFSMTSSQLSE....IFAEAGTVANVEIVYD.........RVT -RO33_NICSY/219-290 LYVANL.SWALTSQGLRD....AFADQPGFMSAKVIYD.........RSS -ROA1_BOVIN/106-177 IFVGGI.KEDTEEHHLRD....YFEQYGKIEVIEIMTD.........RGS -ROC_HUMAN/18-82 VFIGNLnTLVVKKSDVEA....IFSKYGKIVGCSVHK............. -ROG_HUMAN/10-81 LFIGGL.NTETNEKALEA....VFGKYGRIVEVLLMKD.........RET -RT19_ARATH/33-104 LYIGGL.SPGTDEHSLKD....AFSSFNGVTEARVMTN.........KVT -RU17_DROME/104-175 LFIARI.NYDTSESKLRR....EFEFYGPIKKIVLIHD.........QES -RU1A_HUMAN/12-84 IYINNL.NEKIKKDELKkslyAIFSQFGQILDILVSRS............ -RU1A_HUMAN/210-276 LFLTNL.PEETNELMLSM....LFNQFPGFKEVRLVPG............ -RU1A_YEAST/229-293 LLIQNL.PSGTTEQLLSQ....ILGNEALVEIRLVSV............. -RU2B_HUMAN/9-81 IYINNM.NDKIKKEELKRslyaLFSQFGHVVDIVALK............T -RU2B_HUMAN/153-220 LFLNNL.PEETNEMMLSM....LFNQFPGFKEVRLVPG............ -SC35_CHICK/16-87 LKVDNL.TYRTSPDTLRR....VFEKYGRVGDVYIPRD.........RYT -SP33_HUMAN/17-85 IYVGNL.PPDIRTKDIED....VFYKYGAIRDIDLKNR............ -SP33_HUMAN/122-186 VVVSGL.PPSGSWQDLKD....HMREAGDVCYADVYRD............ -SQD_DROME/58-128 LFVGGL.SWETTEKELRD....HFGKYGEIESINVKTD.........PQT -SQD_DROME/138-208 IFVGGL.TTEISDEEIKT....YFGQFGNIVEVEMPLD.........KQK -SR55_DROME/5-68 VYVGGL.PYGVRERDLER....FFKGYGRTRDILIKN............. -SSB1_YEAST/39-114 IFIGNV.AHECTEDDLKQ....LFvEEFGDEVSVEIPIKeh.....tDGH -SSB1_YEAST/188-268 LYINNV.PFKATKEEVAE....FFGTDADSISLPMRKMrdqhtgrifTSD -SXLF_DROME/127-198 LIVNYL.PQDMTDRELYA....LFRAIGPINTCRIMRD.........YKT -SXLF_DROME/213-285 LYVTNL.PRTITDDQLDT....IFGKYGSIVQKNILRD.........KLT -TIA1_HUMAN/9-78 LYVGNL.SRDVTEALILQ....LFSQIGPCKNCKMIMD...........T -TIA1_HUMAN/97-168 VFVGDL.SPQITTEDIKA....AFAPFGRISDARVVKD.........MAT -TIA1_HUMAN/205-270 VYCGGV.TSGLTEQLMRQ....TFSPFGQIMEIRVFPD............ -TRA2_DROME/99-170 IGVFGL.NTNTSQHKVRE....LFNKYGPIERIQMVID.........AQT -U2AF_HUMAN/261-332 LFIGGL.PNYLNDDQVKE....LLTSFGPLKAFNLVKD.........SAT -U2AF_SCHPO/312-383 IYISNL.PLNLGEDQVVE....LLKPFGDLLSFQLIKN.........IAD -WHI3_YEAST/540-615 LYVGNL.PSDATEQELRQ....LFSGQEGFRRLSFRNKnt......tSNG -X16_HUMAN/12-78 VYVGNL.GNNGNKTELER....AFGYYGPLRSVWVARN............ -YHC4_YEAST/348-415 IFVGQL.DKETTREELNR....RFSTHGKIQDINLIFK............ -YHH5_YEAST/315-384 ILVKNL.PSDTTQEEVLD....YFSTIGPIKSVFISE...........KQ -YIS1_YEAST/66-136 IFVGNI.TPDVTPEQIED....HFKDCGQIKRITLLYD.........RNT -YIS5_YEAST/33-104 IYIGNL.NRELTEGDILT....VFSEYGVPVDVILSRD.........ENT - -CABA_MOUSE/77-148 GRSRGFGFILFKDS....SSVEKVLDQKEH.RLDGRVIDP.K -CABA_MOUSE/161-232 NKRRGFVFITFKEE....DPVKKVLEKKFH.TVSGSKCEI.K -CST2_HUMAN/18-89 GKPKGYGFCEYQDQ....ETALSAMRNLNG.REFSGRALR.V -ELAV_DROME/250-322 TQTKGVGFIRFDKR....EEATRAIIALNG.TTPSSCTDP.I -ELAV_DROME/404-475 NQCKGYGFVSMTNY....DEAAMAIRALNG.YTMGNRVLQ.V -EWS_HUMAN/363-442 GKPKGDATVSYEDP....PTAKAAVEWFDG.KDFQGSKLK.V -GBP2_YEAST/124-193 GHHRGMGTVEFTKN....ESVQDAISKFDG.ALFMDRKLM.V -GBP2_YEAST/221-291 GFSRGFGSVIYPTE....DEMIRAIDTFNG.MEVEGRVLE.V -GBP2_YEAST/351-421 GQPTGVAVVEYENL....VDADFCIQKLNN.YNYGGCSLQ.I -GR10_BRANA/8-79 GRSRGFGFVTFKDE....KSMKDAIDEMNG.KELDGRTIT.V -HUD_HUMAN/48-119 GQSLGYGFVNYIDP....KDAEKAINTLNG.LRLQTKTIK.V -IF4B_HUMAN/98-169 ERLKGFGYAEFEDL....DSLLSALSLNEE.SLGNRRIRV.D -MSSP_HUMAN/31-102 NKCKGYGFVDFDSP....AAAQKAVSALKA.SGVQAQKAK.Q -NAM8_YEAST/165-237 GMSKGYGFVKFTNS....DEQQLALSEMQG.VFLNGRAIK.V -NOP3_YEAST/127-190 ....GFAFVEFEEA....ESAAKAIEEVHG.KSFANQPLE.V -NOP3_YEAST/202-270 RDFDGTGALEFPSE....EILVEALERLNN.IEFRGSVIT.V -NOP4_YEAST/28-98 KRSRGFGFVSFAVE....DDTKEALAKARK.TKFNGHILR.V -NOP4_YEAST/292-367 GLAKGTAFVAFKDQytynECIKNAPAAGST.SLLIGDDVM.P -NSR1_YEAST/170-241 DRSRGYGYVDFENK....SYAEKAIQEMQG.KEIDGRPIN.C -NSR1_YEAST/269-340 EQPKGFGYVQFSNM....EDAKKALDALQG.EYIDNRPVR.L -NUCL_CHICK/283-353 GSSKRFGYVDFLSA....EDMDKALQ.LNG.KKLMGLEIKlE -NUCL_CHICK/373-440 GSSKGMAYIEFKTE....AEAEKALEEKQG.TEVDGRAMV.I -PABP_DROME/4-75 RRSLGYAYVNFQQP....ADAERALDTMNF.DLVRNKPIR.I -PABP_DROME/92-162 GNSKGYGFVHFETE....EAANTSIDKVNG.MLLNGKKVY.V -PABP_DROME/183-254 GKSKGFGFVAFETT....EAAEAAVQALNGkDMGEGKSLY.V -PABP_SCHPO/249-319 DKPRGFGFVNYANH....ECAQKAVDELND.KEYKGKKLY.V -PES4_YEAST/93-164 KKSLGHGYLNFEDK....EEAEKAMEELNY.TKVNGKEIR.I -PES4_YEAST/305-374 KVKYLWAFVTYKNS....SDSEKAIKRYNN.FYFRGKKLL.V -PSF_HUMAN/373-443 GRSTGKGIVEFASK....PAARKAFERCSE.GVFLLTTTP.R -PUB1_YEAST/76-146 NKNVNYAFVEYHQS....HDANIALQTLNG.KQIENNIVK.I -PUB1_YEAST/163-234 GSSRGYGFVSFTSQ....DDAQNAMDSMQG.QDLNGRPLR.I -PUB1_YEAST/342-407 ...KGCCFIKYDTH....EQAAVCIVALAN.FPFQGRNLR.T -RB97_DROME/34-105 KRSRGFGFITYTKS....LMVDRAQENRPH.IIDGKTVEA.K -RN15_YEAST/20-91 GRSKGYAFIEFRDL....ESSASAVRNLNG.YQLGSRFLK.C -RNP1_YEAST/37-109 KPLKRFAFIEFQEG....VNLKKVKEKMNG.KIFMNEKIV.I -RO28_NICSY/99-170 DRSRGFGFVTMSTV....EEADKAVELYSQ.YDLNGRLLT.V -RO33_NICSY/116-187 DRSRGFAFVTMGSV....EEAKEAIRLFDG.SQVGGRTVK.V -RO33_NICSY/219-290 GRSRGFGFITFSSA....EAMNSALDTMNE.VELEGRPLR.L -ROA1_BOVIN/106-177 GKKRGFAFVTFDDH....DSVDKIVIQKYH.TVNGHNCEV.R -ROC_HUMAN/18-82 ....GFAFVQYVNE....RNARAAVAGEDG.RMIAGQVLD.I -ROG_HUMAN/10-81 NKSRGFAFVTFESP....ADAKDAARDMNG.KSLDGKAIK.V -RT19_ARATH/33-104 GRSRGYGFVNFISE....DSANSAISAMNG.QELNGFNIS.V -RU17_DROME/104-175 GKPKGYAFIEYEHE....RDMHAAYKHADG.KKIDSKRVL.V -RU1A_HUMAN/12-84 LKMRGQAFVIFKEV....SSATNALRSMQG.FPFYDKPMR.I -RU1A_HUMAN/210-276 ..RHDIAFVEFDNE....VQAGAARDALQG.FKITQNNAM.K -RU1A_YEAST/229-293 ...RNLAFVEYETV....ADATKIKNQLGS.TYKLQNNDV.T -RU2B_HUMAN/9-81 MKMRGQAFVIFKEL....GSSTNALRQLQG.FPFYGKPMR.I -RU2B_HUMAN/153-220 ..RHDIAFVEFEND....GQAGAARDALQGfKITPSHAMK.I -SC35_CHICK/16-87 KESRGFAFVRFHDK....RDAEDAMDAMDG.AVLDGRELR.V -SP33_HUMAN/17-85 RGGPPFAFVEFEDP....RDAEDAVYGRDG.YDYDGYRLR.V -SP33_HUMAN/122-186 ....GTGVVEFVRK....EDMTYAVRKLDN.TKFRSHEGE.T -SQD_DROME/58-128 GRSRGFAFIVFTNT....EAIDKVSA.ADE.HIINSKKVD.P -SQD_DROME/138-208 SQRKGFCFITFDSE....QVVTDLLK.TPK.QKIAGKEVD.V -SR55_DROME/5-68 ....GYGFVEFEDY....RDADDAVYELNG.KELLGERVV.V -SSB1_YEAST/39-114 IPASKHALVKFPTK....IDFDNIKENYDT.KVVKDREIH.I -SSB1_YEAST/188-268 SANRGMAFVTFSGE....NVDIEAKAEEFK.GKVFGDREL.T -SXLF_DROME/127-198 GYSFGYAFVDFTSE....MDSQRAIKVLNG.ITVRNKRLK.V -SXLF_DROME/213-285 GRPRGVAFVRYNKR....EEAQEAISALNNvIPEGGSQPL.S -TIA1_HUMAN/9-78 AGNDPYCFVEFHEH....RHAAAALAAMNG.RKIMGKEVK.V -TIA1_HUMAN/97-168 GKSKGYGFVSFFNK....WDAENAIQQMGG.QWLGGRQIR.T -TIA1_HUMAN/205-270 ...KGYSFVRFNSH....ESAAHAIVSVNG.TTIEGHVVK.C -TRA2_DROME/99-170 QRSRGFCFIYFEKL....SDARAAKDSCSG.IEVDGRRIR.V -U2AF_HUMAN/261-332 GLSKGYAFCEYVDI....NVTDQAIAGLNG.MQLGDKKLL.V -U2AF_SCHPO/312-383 GSSKGFCFCEFKNP....SDAEVAISGLDG.KDTYGNKLH.A -WHI3_YEAST/540-615 HSHGPMCFVEFDDV....SFATRALAELYGrQLPRSTVSS.K -X16_HUMAN/12-78 ..PPGFAFVEFEDP....RDAADAVRELDG.RTLCGCRVR.V -YHC4_YEAST/348-415 .PTNIFAFIKYETE....EAAAAALESENH.AIFLNKTMH.V -YHH5_YEAST/315-384 ANTPHKAFVTYKNE....EESKKAQKCLNK.TIFKNHTIW.V -YIS1_YEAST/66-136 GTPKGYGYIEFESP....AYREKALQ.LNG.GELKGKKIA.V -YIS5_YEAST/33-104 GESQGFAYLKYEDQ....RSTILAVDNLNG.FKIGGRALK.I - diff --git a/forester/archive/RIO/others/phylip_mod/IMPORTANT_NOTICE b/forester/archive/RIO/others/phylip_mod/IMPORTANT_NOTICE deleted file mode 100644 index 6d2a698..0000000 --- a/forester/archive/RIO/others/phylip_mod/IMPORTANT_NOTICE +++ /dev/null @@ -1,48 +0,0 @@ -RIO - Phylogenomic Protein Function Analysis ----------------------------------------------------------------- - - -RIO contains modified versions of programs written by others: - -1. TREE-PUZZLE - (Strimmer, K., and A. von Haeseler. 1996. Quartet puzzling: A quartet maximum - likelihood method for reconstructing tree topologies. Mol. Biol. Evol. 13: 964-969.) - - -2. PHYLIP - (Felsenstein, J. 1993. PHYLIP (Phylogeny Inference Package) version 3.5c. - Distributed by the author. - Department of Genetics, University of Washington, Seattle.) - - -Please note: ------------- - -1. RIO uses modifications of these programs, the original versions were - written by others: - - TREE-PUZZLE: Heiko A. Schmidt, Korbinian Strimmer, Martin Vingron, Arndt von Haeseler - - PHYLIP: Joseph Felsenstein, see also http://evolution.genetics.washington.edu/phylip/credits.html - - -2. The programs in the RIO distribution have been modified specifically - to work within RIO and cannot be used for any other purpose. - - -3. I am responsible for any accidentally introduced errors. - - -4. The original can be downloaded from the following sites: - TREE-PUZZLE: http://www.tree-puzzle.de/ - PHYLIP: http://evolution.genetics.washington.edu/phylip.html - - -RIO also contains hmmer (version 2.2g). -hmmer can be downloaded at: http://hmmer.wustl.edu/ - - - -Christian Zmasek, 07/28/2006 - - diff --git a/forester/archive/RIO/others/phylip_mod/src/CHANGES b/forester/archive/RIO/others/phylip_mod/src/CHANGES deleted file mode 100644 index 3c03411..0000000 --- a/forester/archive/RIO/others/phylip_mod/src/CHANGES +++ /dev/null @@ -1,29 +0,0 @@ -Based on Phylip 3.65 - -phylip.h --------- - -#define MAXNCH 20 -> #define MAXNCH 26 -#define nmlngth 10 -> #define nmlngth 26 - - - -seq.h ------ - -#define MAXNCH 20 -> #define MAXNCH 26 - - - -protdist.c ----------- - -#define nmlngth 10 -> #define nmlngth 26 - - - -Makefile --------- - -Commented out instructions for programs I currently don't need. - diff --git a/forester/archive/RIO/others/phylip_mod/src/Makefile b/forester/archive/RIO/others/phylip_mod/src/Makefile deleted file mode 100644 index 9aed148..0000000 --- a/forester/archive/RIO/others/phylip_mod/src/Makefile +++ /dev/null @@ -1,451 +0,0 @@ -# Modified by Christian Zmasek. Use at your own risk. -# -# Generic Linux/Unix Makefile for PHYLIP 3.6. -# -# You should not need to change anything, though if you want you could -# change the first (noncomment) statement to some directory location -# that might be more useful. -# The compressed tar archive phylip.tar.Z when uncompressed and extracted -# puts the source code into a directory ./src, and also makes two other -# directories ./exe and ./doc for the final executables and the documentation -# files. Only change the EXEDIR settings if you want something different -# from that structure. -# If it causes trouble in compiling, the CFLAGS statement below may also need -# to be changed. -# -# To use the PHYLIP v3.6 Makefile, type -# make install to compile the whole package and install -# the executables in $(EXEDIR), and then -# remove the object files to save space -# make all to compile the whole package but not install it -# or remove the object files -# make put to move the executables into $(EXEDIR) -# make clean to remove all object files and executables from the -# current directory -# make dnaml to compile and link one program, (in this example, -# DnaML) and leave the executable and object files -# in the current directory (where the source code is). -# You will have to move the executable in to the -# executables directory (e.g. "mv dnaml ../exe") -# Note that the program name should be lower case. -# -# ---------------------------------------------------------------------------- -# (Starting here is the section where you may want to change things) -# ---------------------------------------------------------------------------- -# -# these are the statements we have been talking about: -# one of the reasons for changing them would be to put the executables -# on a different file system. -# The default configuration is to have within the overall PHYLIP -# directory three subdirectories: "src" for source code, "exe" for the -# executables, and "doc" for the documentation files. -# -# the following specifies the directory where the executables will be placed -EXEDIR = ../exe -# -# ---------------------------------------------------------------------------- -# -# In the following statements (the ones that set CFLAGS, DFLAGS, LIBS -# and DLIBS, CC and DC) you should make sure each is set properly. -# Usually this will simply involve making sure that the proper statement -# has no "#" as its first character and that all other possibilities -# have "#" for their first character. -# -# ---------------------------------------------------------------------------- -# -# This is the CFLAGS statement: -# -# if these statements say "-g" and that causes trouble, remove the "-g" -# if the "-lX11" does not work (X compiling not working), you may want to -# remove that switch or fix it. -# -# Here are some possible CFLAGS statements: -# -# -#A minimal one -#CFLAGS = -# -# A basic one for debugging -#CFLAGS = -g -# -# An optimized one for gcc -CFLAGS = -O3 -fomit-frame-pointer -# -# For some serious debugging using Gnu gcc -# -#CFLAGS=-g -Wall -Wmain -Wmissing-prototypes -Wreturn-type -Wstrict-prototypes -Wunused -Werror -Wredundant-decls -Waggregate-return -Wcast-align -Wcomment -# -# For Digital Alpha systems with Compaq Tru64 Unix -# (however, be aware that this may cause floating-point problems in programs -# like Dnaml owing to not using IEEE floating point standards). -#CFLAGS = -fast -# -# ---------------------------------------------------------------------------- -# -# and here are some possible DFLAGS statements: -# -# A minimal one -#DFLAGS = -DX $(CFLAGS) -# -# A basic one for debugging -#DFLAGS = -g -DX -# -# -# For Gnu C++ for runs -#DFLAGS = -I/usr/X11R6/include -O3 -DX -fomit-frame-pointer -# -# For Digital Alpha systems with Compaq Tru64 Unix -#DFLAGS = -DX -fast -# -# for Linux with X Windows development packages installed -# or for MacOS X with X Windows installed -DFLAGS = $(CFLAGS) -DX -I/usr/X11R6/include -# -# ---------------------------------------------------------------------------- -# -# These are the libraries for the CC and DC compiles, respectively -# -LIBS = -lm -# -# if the Xlib library for the X windowing system is somewhere -# unexpected, you may have to change the path /usr/X11R6/lib in this one -# -# For gcc for Linux with X windows development packages installed -# or for MacOS X with X windows installed -DLIBS= -L/usr/X11R6/lib/ -lX11 -lXaw -lXt -# -# ---------------------------------------------------------------------------- -# -# The next two assignments are the invocations of the compiler for the -# ordinary compiles and the tree-drawing programs, CC and DC -# -# This one specifies the "cc" C compiler -CC = cc $(CFLAGS) -# -# To use GCC instead, if it is not the compiler that "cc" invokes -#CC = gcc $(CFLAGS) -# -# This one specifies the "cc" C compiler for the Draw programs -DC = cc $(DFLAGS) -# -# To use GCC instead, if it is not the compiler that "cc" invokes -#DC = gcc $(DFLAGS) -# -# ---------------------------------------------------------------------------- -# (After this point there should not be any reason to change anything) -# ---------------------------------------------------------------------------- -# -# -# the list of programs -# -#PROGS = clique consense contml contrast dnacomp dnadist \ -# dnainvar dnaml dnamlk dnamove dnapars dnapenny \ -# dolmove dollop dolpenny factor fitch gendist kitsch \ -# mix move neighbor pars penny proml promlk protdist \ -# protpars restdist restml retree seqboot treedist \ -# drawgram drawtree - -PROGS = consense fitch neighbor proml promlk protdist protpars seqboot - -# -# general commands -# - -# -# The first uses a symbol you are unlikely to type. It is the one that -# is executed if you just type "make". It tells you how to use the -# Makefile. -# -a1b2c3d4: - @echo "" - @echo " To use the PHYLIP v3.6 Makefile, type" - @echo " make all to compile the whole package but not install it" - @echo " or remove the object files" - @echo " " - -introduce: - @echo "Building PHYLIP based on version 3.6 - Modified by Christian Zmasek. Use at your own risk." - -all: introduce $(PROGS) - @echo "Finished compiling." - @echo "" - -#install: all put clean -# @echo "Done." -# @echo "" - -#put: -# @echo "Installing PHYLIP v3.6 binaries in $(EXEDIR)" -# @mkdir -p $(EXEDIR) -# @cp $(PROGS) $(EXEDIR) -# @echo "Installing font files in $(EXEDIR)" -# @cp font* $(EXEDIR) -# @echo "Finished installation." -# @echo "" - -#clean: -# @echo "Removing object files to save space" -# @rm -f *.o -# @echo "Finished removing object files. Now will remove" -# @echo "executable files from the current directory, but not from the" -# @echo "executables directory. (If some are not here, the makefile" -# @echo "will terminate with an error message but this is not a problem)" -# @echo "" -# @echo "Removing executables from this directory" -# @rm -f $(PROGS) -# @echo "Finished cleanup." -# @echo "" - -# -# compile the shared stuff -# - -phylip.o: phylip.c phylip.h - $(CC) -c phylip.c - -seq.o: seq.c phylip.h seq.h - $(CC) -c seq.c - -#disc.o: disc.c phylip.h disc.h -# $(CC) -c disc.c - -#discrete.o: discrete.c discrete.h phylip.h -# $(CC) -c discrete.c - -#dollo.o: dollo.c phylip.h dollo.h -# $(CC) -c dollo.c - -#wagner.o: wagner.c phylip.h wagner.h -# $(CC) -c wagner.c - -dist.o: dist.c phylip.h dist.h - $(CC) -c dist.c - -#cont.o: cont.c cont.h phylip.h -# $(CC) -c cont.c - -#moves.o: moves.c phylip.h moves.h -# $(CC) -c moves.c - -# -# compile the individual programs -# - -#clique.o: clique.c disc.h phylip.h -# $(CC) -c clique.c - -#clique: clique.o clique.c disc.o disc.c phylip.o phylip.c disc.h phylip.h -# $(CC) clique.o disc.o phylip.o $(LIBS) -o clique - -cons.o: cons.c cons.h phylip.h - $(CC) -c cons.c - -consense.o: consense.c cons.h phylip.h - $(CC) -c consense.c - -consense: consense.o consense.c phylip.o phylip.c cons.o cons.c cons.h phylip.h - $(CC) consense.o cons.o phylip.o $(LIBS) -o consense - -#contml.o: contml.c cont.h phylip.h -# $(CC) -c contml.c - -#contml: contml.o contml.c cont.o cont.c phylip.o phylip.c cont.h phylip.h -# $(CC) contml.o cont.o phylip.o $(LIBS) -o contml - -#contrast.o: contrast.c cont.h phylip.h -# $(CC) -c contrast.c - -#contrast: contrast.o contrast.c cont.o cont.c phylip.o phylip.c cont.h phylip.h -# $(CC) contrast.o cont.o phylip.o $(LIBS) -o contrast - -#dnacomp.o: dnacomp.c seq.h phylip.h -# $(CC) -c dnacomp.c - -#dnacomp: dnacomp.o seq.o phylip.o dnacomp.c seq.c phylip.c seq.h phylip.h -# $(CC) dnacomp.o seq.o phylip.o $(LIBS) -o dnacomp - -#dnadist.o: dnadist.c seq.h phylip.h -# $(CC) -c dnadist.c - -#dnadist: dnadist.o seq.o phylip.o dnadist.c seq.c phylip.c seq.h phylip.h -# $(CC) dnadist.o seq.o phylip.o $(LIBS) -o dnadist - -#dnainvar.o: dnainvar.c seq.h phylip.h -# $(CC) -c dnainvar.c - -#dnainvar: dnainvar.o seq.o phylip.o dnainvar.c seq.c phylip.c seq.h phylip.h -# $(CC) dnainvar.o seq.o phylip.o $(LIBS) -o dnainvar - -#dnaml.o: dnaml.c seq.h phylip.h -# $(CC) -c dnaml.c - -#dnaml: dnaml.o seq.o phylip.o dnaml.c seq.c phylip.c seq.h phylip.h -# $(CC) dnaml.o seq.o phylip.o $(LIBS) -o dnaml - -#dnamlk.o: dnamlk.c seq.h phylip.h -# $(CC) -c dnamlk.c - -#dnamlk: dnamlk.o seq.o phylip.o dnamlk.c seq.c phylip.c -# $(CC) dnamlk.o seq.o phylip.o $(LIBS) -o dnamlk - -#dnamove.o: dnamove.c seq.h moves.h phylip.h -# $(CC) -c dnamove.c - -#dnamove: dnamove.o seq.o moves.o phylip.o dnamove.c seq.c phylip.c seq.h phylip.h -# $(CC) dnamove.o seq.o moves.o phylip.o $(LIBS) -o dnamove - -#dnapenny.o: dnapenny.c seq.h phylip.h -# $(CC) -c dnapenny.c - -#dnapenny: dnapenny.o seq.o phylip.o dnapenny.c seq.c phylip.c seq.h phylip.h -# $(CC) dnapenny.o seq.o phylip.o $(LIBS) -o dnapenny - -#dnapars.o: dnapars.c seq.h phylip.h -# $(CC) -c dnapars.c - -#dnapars: dnapars.o seq.o phylip.o dnapars.c seq.c phylip.c seq.h phylip.h -# $(CC) dnapars.o seq.o phylip.o $(LIBS) -o dnapars - -#dolmove.o: dolmove.c disc.h moves.h dollo.h phylip.h -# $(CC) -c dolmove.c - -#dolmove: dolmove.o disc.o moves.o dollo.o phylip.o dolmove.c disc.c moves.c dollo.c phylip.c disc.h moves.h dollo.h phylip.h -# $(CC) dolmove.o disc.o moves.o dollo.o phylip.o $(LIBS) -o dolmove - -#dollop.o: dollop.c disc.h dollo.h phylip.h -# $(CC) -c dollop.c - -#dollop: dollop.o disc.o dollo.o phylip.o dollop.c disc.c dollo.c phylip.c disc.h dollo.h phylip.h -# $(CC) dollop.o disc.o dollo.o phylip.o $(LIBS) -o dollop - -#dolpenny.o: dolpenny.c disc.h dollo.h phylip.h -# $(CC) -c dolpenny.c - -#dolpenny: dolpenny.o disc.o dollo.o phylip.o dolpenny.c disc.c dollo.c phylip.c disc.h dollo.h phylip.h -# $(CC) dolpenny.o disc.o dollo.o phylip.o $(LIBS) -o dolpenny - -#draw.o: draw.c draw.h phylip.h -# $(DC) -c draw.c - -#draw2.o: draw2.c draw.h phylip.h -# $(DC) -c draw2.c - -#drawgram.o: drawgram.c draw.h phylip.h -# $(DC) -c drawgram.c - -#drawgram: drawgram.o draw.o draw2.o phylip.o drawgram.c draw.c draw2.c draw.h phylip.h -# $(DC) $(DLIBS) draw.o draw2.o drawgram.o phylip.o $(LIBS) -o drawgram - -#drawtree.o: drawtree.c draw.h phylip.h -# $(DC) -c drawtree.c - -#drawtree: drawtree.o draw.o draw2.o phylip.o drawtree.c draw.c draw2.c draw.h phylip.h -# $(DC) $(DLIBS) draw.o draw2.o drawtree.o phylip.o $(LIBS) -o drawtree - -#factor.o: factor.c phylip.h -# $(CC) -c factor.c - -#factor: factor.o phylip.o factor.c phylip.c phylip.h -# $(CC) factor.o phylip.o $(LIBS) -o factor - -fitch.o: fitch.c dist.h phylip.h - $(CC) -c fitch.c - -fitch: fitch.o dist.o phylip.o fitch.c dist.c phylip.c dist.h phylip.h - $(CC) fitch.o dist.o phylip.o $(LIBS) -o fitch - -#gendist.o: gendist.c phylip.h -# $(CC) -c gendist.c - -#gendist: gendist.o phylip.o gendist.c phylip.c phylip.h -# $(CC) gendist.o phylip.o $(LIBS) -o gendist - -#kitsch.o: kitsch.c dist.h phylip.h -# $(CC) -c kitsch.c - -#kitsch: kitsch.o dist.o phylip.o kitsch.c dist.c phylip.c dist.h phylip.h -# $(CC) kitsch.o dist.o phylip.o $(LIBS) -o kitsch - -#mix.o: mix.c disc.h wagner.h phylip.h -# $(CC) -c mix.c - -#mix: mix.o disc.o wagner.o phylip.o mix.c disc.c wagner.c phylip.c disc.h wagner.h phylip.h -# $(CC) mix.o disc.o wagner.o phylip.o $(LIBS) -o mix - -#move.o: move.c disc.h moves.h wagner.h phylip.h -# $(CC) -c move.c - -#move: move.o disc.o moves.o wagner.o phylip.o move.c disc.c moves.c wagner.c phylip.c disc.h moves.h wagner.h phylip.h -# $(CC) move.o disc.o moves.o wagner.o phylip.o $(LIBS) -o move - -neighbor.o: neighbor.c dist.h phylip.h - $(CC) -c neighbor.c - -neighbor: neighbor.o dist.o phylip.o neighbor.c dist.c phylip.c dist.h phylip.h - $(CC) neighbor.o dist.o phylip.o $(LIBS) -o neighbor - -#pars.o: pars.c discrete.h phylip.h -# $(CC) -c pars.c - -#pars: pars.o pars.c discrete.o discrete.c phylip.o phylip.c discrete.h phylip.h -# $(CC) pars.o discrete.o phylip.o $(LIBS) -o pars - -#penny.o: penny.c disc.h wagner.h phylip.h -# $(CC) -c penny.c - -#penny: penny.o disc.o wagner.o phylip.o penny.c disc.c wagner.c disc.h wagner.h phylip.h -# $(CC) penny.o disc.o wagner.o phylip.o $(LIBS) -o penny - -proml.o: proml.c seq.h phylip.h - $(CC) -c proml.c - -proml: proml.o seq.o phylip.o proml.c seq.c phylip.c seq.h phylip.h - $(CC) proml.o seq.o phylip.o $(LIBS) -o proml - -promlk.o: promlk.c seq.h phylip.h - $(CC) -c promlk.c - -promlk: promlk.o seq.o phylip.o promlk.c seq.c phylip.c - $(CC) promlk.o seq.o phylip.o $(LIBS) -o promlk - -protdist.o: protdist.c seq.h phylip.h - $(CC) -c protdist.c - -protdist: protdist.o seq.o phylip.o protdist.c seq.c phylip.c seq.h phylip.h - $(CC) protdist.o seq.o phylip.o $(LIBS) -o protdist - -protpars.o: protpars.c seq.h phylip.h - $(CC) -c protpars.c - -protpars: protpars.o seq.o phylip.o protpars.c seq.c phylip.c seq.h phylip.h - $(CC) protpars.o seq.o phylip.o $(LIBS) -o protpars - -#restdist.o: restdist.c seq.h phylip.h -# $(CC) -c restdist.c - -#restdist: restdist.o seq.o phylip.o restdist.c seq.c phylip.c seq.h phylip.h -# $(CC) restdist.o seq.o phylip.o $(LIBS) -o restdist - -#restml.o: restml.c seq.h phylip.h -# $(CC) -c restml.c - -#restml: restml.o seq.o phylip.o restml.c seq.c phylip.c seq.h phylip.h -# $(CC) restml.o seq.o phylip.o $(LIBS) -o restml - -#retree.o: retree.c moves.h phylip.h -# $(CC) -c retree.c - -#retree: retree.o moves.o phylip.o retree.c moves.c phylip.c moves.h phylip.h -# $(CC) retree.o moves.o phylip.o $(LIBS) -o retree - -seqboot.o: seqboot.c phylip.h - $(CC) -c seqboot.c - -seqboot: seqboot.o seq.o phylip.o seqboot.c seq.c phylip.c seq.h phylip.h - $(CC) seqboot.o seq.o phylip.o $(LIBS) -o seqboot - -#treedist.o: treedist.c cons.h phylip.h -# $(CC) -c treedist.c - -#treedist: treedist.o phylip.o cons.o -# $(CC) treedist.o cons.o phylip.o $(LIBS) -o treedist diff --git a/forester/archive/RIO/others/phylip_mod/src/cons.c b/forester/archive/RIO/others/phylip_mod/src/cons.c deleted file mode 100644 index 2fa7c4d..0000000 --- a/forester/archive/RIO/others/phylip_mod/src/cons.c +++ /dev/null @@ -1,1457 +0,0 @@ -#include "phylip.h" -#include "cons.h" - -int tree_pairing; - -Char outfilename[FNMLNGTH], intreename[FNMLNGTH], intree2name[FNMLNGTH], outtreename[FNMLNGTH]; -node *root; - -long numopts, outgrno, col, setsz; -long maxgrp; /* max. no. of groups in all trees found */ - -boolean trout, firsttree, noroot, outgropt, didreroot, prntsets, - progress, treeprint, goteof, strict, mr=false, mre=false, - ml=false; /* initialized all false for Treedist */ -pointarray nodep; -pointarray treenode; -group_type **grouping, **grping2, **group2;/* to store groups found */ -double *lengths, *lengths2; -long **order, **order2, lasti; -group_type *fullset; -node *grbg; -long tipy; - -double **timesseen, **tmseen2, **times2 ; -double trweight, ntrees, mlfrac; - -/* prototypes */ -void censor(void); -boolean compatible(long, long); -void elimboth(long); -void enternohash(group_type*, long*); -void enterpartition (group_type*, long*); -void reorient(node* n); - -/* begin hash table code */ - -#define NUM_BUCKETS 100 - -typedef struct namenode { - struct namenode *next; - plotstring naym; - int hitCount; -} namenode; - -typedef namenode **hashtype; - -hashtype hashp; - -long namesGetBucket(plotstring); -void namesAdd(plotstring); -boolean namesSearch(plotstring); -void namesDelete(plotstring); -void namesClearTable(void); -void namesCheckTable(void); -void missingnameRecurs(node *p); - -/** - * namesGetBucket - return the bucket for a given name - */ -long namesGetBucket(plotstring searchname) { - long i; - long sum = 0; - - for (i = 0; (i < MAXNCH) && (searchname[i] != '\0'); i++) { - sum += searchname[i]; - } - return (sum % NUM_BUCKETS); -} - - -/** - * namesAdd - add a name to the hash table - * - * The argument is added at the head of the appropriate linked list. No - * checking is done for duplicates. The caller can call - * namesSearch to check for an existing name prior to calling - * namesAdd. - */ -void namesAdd(plotstring addname) { - long bucket = namesGetBucket(addname); - namenode *hp, *temp; - - temp = hashp[bucket]; - hashp[bucket] = (namenode *)Malloc(sizeof(namenode)); - hp = hashp[bucket]; - strcpy(hp->naym, addname); - hp->next = temp; - hp->hitCount = 0; -} - -/** - * namesSearch - search for a name in the hash table - * - * Return true if the name is found, else false. - */ -boolean namesSearch(plotstring searchname) { - long i = namesGetBucket(searchname); - namenode *p; - - p = hashp[i]; - if (p == NULL) { - return false; - } - do { - if (strcmp(searchname,p->naym) == 0) { - p->hitCount++; - return true; - } - p = p->next; - } while (p != NULL); - - return false; -} - -/** - * Go through hash table and check that the hit count on all entries is one. - * If it is zero, then a species was missed, if it is two, then there is a - * duplicate species. - */ - -void namesCheckTable(void) { - namenode *p; - long i; - - for (i=0; i< NUM_BUCKETS; i++) { - p = hashp[i]; - while (p != NULL){ - if(p->hitCount >1){ - printf("\n\nERROR in user tree: duplicate name found: "); - puts(p->naym); - printf("\n\n"); - exxit(-1); - } else if(p->hitCount == 0){ - printf("\n\nERROR in user tree: name %s not found\n\n\n", - p->naym); - exxit(-1); - } - p->hitCount = 0; - p = p->next; - } - } -} - -/** - * namesClearTable - empty names out of the table and - * return allocated memory - */ -void namesClearTable(void) { - long i; - namenode *p, *temp; - - for (i=0; i< NUM_BUCKETS; i++) { - p = hashp[i]; - if (p != NULL) { - do { - temp = p; - p = p->next; - free(temp); - } while (p != NULL); - hashp[i] = NULL; - } - } -} -/* end hash table code */ - -void initconsnode(node **p, node **grbg, node *q, long len, long nodei, - long *ntips, long *parens, initops whichinit, - pointarray treenode, pointarray nodep, Char *str, - Char *ch, FILE *intree) -{ - /* initializes a node */ - long i; - char c; - boolean minusread; - double valyew, divisor, fracchange; - - switch (whichinit) { - case bottom: - gnu(grbg, p); - (*p)->index = nodei; - (*p)->tip = false; - for (i=0; inayme[i] = '\0'; - nodep[(*p)->index - 1] = (*p); - (*p)->v = 0; - break; - case nonbottom: - gnu(grbg, p); - (*p)->index = nodei; - (*p)->v = 0; - break; - case tip: - (*ntips)++; - gnu(grbg, p); - nodep[(*ntips) - 1] = *p; - setupnode(*p, *ntips); - (*p)->tip = true; - strncpy ((*p)->nayme, str, MAXNCH); - if (firsttree && prntsets) { - fprintf(outfile, " %ld. ", *ntips); - for (i = 0; i < len; i++) - putc(str[i], outfile); - putc('\n', outfile); - if ((*ntips > 0) && (((*ntips) % 10) == 0)) - putc('\n', outfile); - } - (*p)->v = 0; - break; - case length: - processlength(&valyew, &divisor, ch, &minusread, intree, parens); - fracchange = 1.0; - (*p)->v = valyew / divisor / fracchange; - break; - case treewt: - if (!eoln(intree)) { - fscanf(intree, "%lf", &trweight); - getch(ch, parens, intree); - if (*ch != ']') { - printf("\n\nERROR: Missing right square bracket\n\n"); - exxit(-1); - } else { - getch(ch, parens, intree); - if (*ch != ';') { - printf("\n\nERROR: Missing semicolon after square brackets\n\n"); - exxit(-1); - } - } - } - break; - case unittrwt: - /* This comes not only when setting trweight but also at the end of - * any tree. The following code saves the current position in a - * file and reads to a new line. If there is a new line then we're - * at the end of tree, otherwise warn the user. This function should - * really leave the file alone, so once we're done with 'intree' - * we seek the position back so that it doesn't look like we did - * anything */ - trweight = 1.0 ; - i = ftell (intree); - c = ' '; - while (c == ' ') { - if (eoff(intree)) { - fseek(intree,i,SEEK_SET); - return; - } - c = gettc(intree); - } - fseek(intree,i,SEEK_SET); - if ( c != '\n' && c!= '\r') - printf("WARNING: Tree weight set to 1.0\n"); - if ( c == '\r' ) - if ( (c == gettc(intree)) != '\n') - ungetc(c, intree); - break; - case hsnolength: - (*p)->v = -1; /* signal value that a length is missing */ - break; - default: /* cases hslength, iter, hsnolength */ - break; /* should there be an error message here?*/ - } -} /* initconsnode */ - - -void censor(void) -{ - /* delete groups that are too rare to be in the consensus tree */ - long i; - - i = 1; - do { - if (timesseen[i-1]) - if (!(mre || (mr && (2*(*timesseen[i-1]) > ntrees)) - || (ml && ((*timesseen[i-1]) > mlfrac*ntrees)) - || (strict && ((*timesseen[i-1]) == ntrees)))) { - free(grouping[i - 1]); - free(timesseen[i - 1]); - grouping[i - 1] = NULL; - timesseen[i - 1] = NULL; - } - i++; - } while (i < maxgrp); -} /* censor */ - - -void compress(long *n) -{ - /* push all the nonempty subsets to the front end of their array */ - long i, j; - - i = 1; - j = 1; - do { - while (grouping[i - 1] != NULL) - i++; - if (j <= i) - j = i + 1; - while ((grouping[j - 1] == NULL) && (j < maxgrp)) - j++; - if (j < maxgrp) { - grouping[i - 1] = (group_type *)Malloc(setsz * sizeof(group_type)); - timesseen[i - 1] = (double *)Malloc(sizeof(double)); - memcpy(grouping[i - 1], grouping[j - 1], setsz * sizeof(group_type)); - *timesseen[i - 1] = *timesseen[j - 1]; - free(grouping[j - 1]); - free(timesseen[j - 1]); - grouping[j - 1] = NULL; - timesseen[j - 1] = NULL; - } - } while (j != maxgrp); - (*n) = i - 1; -} /* compress */ - - -void sort(long n) -{ - /* Shell sort keeping grouping, timesseen in same order */ - long gap, i, j; - group_type *stemp; - double rtemp; - - gap = n / 2; - stemp = (group_type *)Malloc(setsz * sizeof(group_type)); - while (gap > 0) { - for (i = gap + 1; i <= n; i++) { - j = i - gap; - while (j > 0) { - if (*timesseen[j - 1] < *timesseen[j + gap - 1]) { - memcpy(stemp, grouping[j - 1], setsz * sizeof(group_type)); - memcpy(grouping[j - 1], grouping[j + gap - 1], setsz * sizeof(group_type)); - memcpy(grouping[j + gap - 1], stemp, setsz * sizeof(group_type)); - rtemp = *timesseen[j - 1]; - *timesseen[j - 1] = *timesseen[j + gap - 1]; - *timesseen[j + gap - 1] = rtemp; - } - j -= gap; - } - } - gap /= 2; - } - free(stemp); -} /* sort */ - - -boolean compatible(long i, long j) -{ - /* are groups i and j compatible? */ - boolean comp; - long k; - - comp = true; - for (k = 0; k < setsz; k++) - if ((grouping[i][k] & grouping[j][k]) != 0) - comp = false; - if (!comp) { - comp = true; - for (k = 0; k < setsz; k++) - if ((grouping[i][k] & ~grouping[j][k]) != 0) - comp = false; - if (!comp) { - comp = true; - for (k = 0; k < setsz; k++) - if ((grouping[j][k] & ~grouping[i][k]) != 0) - comp = false; - if (!comp) { - comp = noroot; - if (comp) { - for (k = 0; k < setsz; k++) - if ((fullset[k] & ~grouping[i][k] & ~grouping[j][k]) != 0) - comp = false; - } - } - } - } - return comp; -} /* compatible */ - - -void eliminate(long *n, long *n2) -{ - /* eliminate groups incompatible with preceding ones */ - long i, j, k; - boolean comp; - - for (i = 2; i <= (*n); i++) { - comp = true; - for (j = 0; comp && (j <= i - 2); j++) { - if ((timesseen[j] != NULL) && *timesseen[j] > 0) { - comp = compatible(i-1,j); - if (!comp) { - (*n2)++; - times2[(*n2) - 1] = (double *)Malloc(sizeof(double)); - group2[(*n2) - 1] = (group_type *)Malloc(setsz * sizeof(group_type)); - *times2[(*n2) - 1] = *timesseen[i - 1]; - memcpy(group2[(*n2) - 1], grouping[i - 1], setsz * sizeof(group_type)); - *timesseen[i - 1] = 0.0; - for (k = 0; k < setsz; k++) - grouping[i - 1][k] = 0; - } - } - } - if (*timesseen[i - 1] == 0.0) { - free(grouping[i - 1]); - free(timesseen[i - 1]); - timesseen[i - 1] = NULL; - grouping[i - 1] = NULL; - } - } -} /* eliminate */ - - -void printset(long n) -{ - /* print out the n sets of species */ - long i, j, k, size; - boolean noneprinted; - - fprintf(outfile, "\nSet (species in order) "); - for (i = 1; i <= spp - 25; i++) - putc(' ', outfile); - fprintf(outfile, " How many times out of %7.2f\n\n", ntrees); - noneprinted = true; - for (i = 0; i < n; i++) { - if ((timesseen[i] != NULL) && (*timesseen[i] > 0)) { - size = 0; - k = 0; - for (j = 1; j <= spp; j++) { - if (j == ((k+1)*SETBITS+1)) k++; - if (((1L << (j - 1 - k*SETBITS)) & grouping[i][k]) != 0) - size++; - } - if (size != 1 && !(noroot && size >= (spp-1))) { - noneprinted = false; - k = 0; - for (j = 1; j <= spp; j++) { - if (j == ((k+1)*SETBITS+1)) k++; - if (((1L << (j - 1 - k*SETBITS)) & grouping[i][k]) != 0) - putc('*', outfile); - else - putc('.', outfile); - if (j % 10 == 0) - putc(' ', outfile); - } - for (j = 1; j <= 23 - spp; j++) - putc(' ', outfile); - fprintf(outfile, " %5.2f\n", *timesseen[i]); - } - } - } - if (noneprinted) - fprintf(outfile, " NONE\n"); -} /* printset */ - - -void bigsubset(group_type *st, long n) -{ - /* Find a maximal subset of st among the n groupings, - to be the set at the base of the tree. */ - long i, j; - group_type *su; - boolean max, same; - - su = (group_type *)Malloc(setsz * sizeof(group_type)); - for (i = 0; i < setsz; i++) - su[i] = 0; - for (i = 0; i < n; i++) { - max = true; - for (j = 0; j < setsz; j++) - if ((grouping[i][j] & ~st[j]) != 0) - max = false; - if (max) { - same = true; - for (j = 0; j < setsz; j++) - if (grouping[i][j] != st[j]) - same = false; - max = !same; - } - if (max) { - for (j = 0; j < setsz; j ++) - if ((su[j] & ~grouping[i][j]) != 0) - max = false; - if (max) { - same = true; - for (j = 0; j < setsz; j ++) - if (su[j] != grouping[i][j]) - same = false; - max = !same; - } - if (max) - memcpy(su, grouping[i], setsz * sizeof(group_type)); - } - } - memcpy(st, su, setsz * sizeof(group_type)); - free(su); -} /* bigsubset */ - - -void recontraverse(node **p, group_type *st, long n, long *nextnode) -{ - /* traverse to add next node to consensus tree */ - long i, j = 0, k = 0, l = 0; - - boolean found, same = 0, zero, zero2; - group_type *tempset, *st2; - node *q, *r; - - for (i = 1; i <= spp; i++) { /* count species in set */ - if (i == ((l+1)*SETBITS+1)) l++; - if (((1L << (i - 1 - l*SETBITS)) & st[l]) != 0) { - k++; /* k is the number of species in the set */ - j = i; /* j is set to last species in the set */ - } - } - if (k == 1) { /* if only 1, set up that tip */ - *p = nodep[j - 1]; - (*p)->tip = true; - (*p)->index = j; - return; - } - gnu(&grbg, p); /* otherwise make interior node */ - (*p)->tip = false; - (*p)->index = *nextnode; - nodep[*nextnode - 1] = *p; - (*nextnode)++; - (*p)->deltav = 0.0; - for (i = 0; i < n; i++) { /* go through all sets */ - same = true; /* to find one which is this one */ - for (j = 0; j < setsz; j++) - if (grouping[i][j] != st[j]) - same = false; - if (same) - (*p)->deltav = *timesseen[i]; - } - tempset = (group_type *)Malloc(setsz * sizeof(group_type)); - memcpy(tempset, st, setsz * sizeof(group_type)); - q = *p; - st2 = (group_type *)Malloc(setsz * sizeof(group_type)); - memcpy(st2, st, setsz * sizeof(group_type)); - zero = true; /* having made two copies of the set ... */ - for (j = 0; j < setsz; j++) /* see if they are empty */ - if (tempset[j] != 0) - zero = false; - if (!zero) - bigsubset(tempset, n); /* find biggest set within it */ - zero = zero2 = false; /* ... tempset is that subset */ - while (!zero && !zero2) { - zero = zero2 = true; - for (j = 0; j < setsz; j++) { - if (st2[j] != 0) - zero = false; - if (tempset[j] != 0) - zero2 = false; - } - if (!zero && !zero2) { - gnu(&grbg, &q->next); - q->next->index = q->index; - q = q->next; - q->tip = false; - r = *p; - recontraverse(&q->back, tempset, n, nextnode); /* put it on tree */ - *p = r; - q->back->back = q; - for (j = 0; j < setsz; j++) - st2[j] &= ~tempset[j]; /* remove that subset from the set */ - memcpy(tempset, st2, setsz * sizeof(group_type)); /* that becomes set */ - found = false; - i = 1; - while (!found && i <= n) { - if (grouping[i - 1] != 0) { - same = true; - for (j = 0; j < setsz; j++) - if (grouping[i - 1][j] != tempset[j]) - same = false; - } - if ((grouping[i - 1] != 0) && same) - found = true; - else - i++; - } - zero = true; - for (j = 0; j < setsz; j++) - if (tempset[j] != 0) - zero = false; - if (!zero && !found) - bigsubset(tempset, n); - } - } - q->next = *p; - free(tempset); - free(st2); -} /* recontraverse */ - - -void reconstruct(long n) -{ - /* reconstruct tree from the subsets */ - long nextnode; - group_type *s; - - nextnode = spp + 1; - s = (group_type *)Malloc(setsz * sizeof(group_type)); - memcpy(s, fullset, setsz * sizeof(group_type)); - recontraverse(&root, s, n, &nextnode); - free(s); -} /* reconstruct */ - - -void coordinates(node *p, long *tipy) -{ - /* establishes coordinates of nodes */ - node *q, *first, *last; - long maxx; - - if (p->tip) { - p->xcoord = 0; - p->ycoord = *tipy; - p->ymin = *tipy; - p->ymax = *tipy; - (*tipy) += down; - return; - } - q = p->next; - maxx = 0; - while (q != p) { - coordinates(q->back, tipy); - if (!q->back->tip) { - if (q->back->xcoord > maxx) - maxx = q->back->xcoord; - } - q = q->next; - } - first = p->next->back; - q = p; - while (q->next != p) - q = q->next; - last = q->back; - p->xcoord = maxx + OVER; - p->ycoord = (long)((first->ycoord + last->ycoord) / 2); - p->ymin = first->ymin; - p->ymax = last->ymax; -} /* coordinates */ - - -void drawline(long i) -{ - /* draws one row of the tree diagram by moving up tree */ - node *p, *q; - long n, j; - boolean extra, done, trif; - node *r, *first = NULL, *last = NULL; - boolean found; - - p = root; - q = root; - fprintf(outfile, " "); - extra = false; - trif = false; - do { - if (!p->tip) { - found = false; - r = p->next; - while (r != p && !found) { - if (i >= r->back->ymin && i <= r->back->ymax) { - q = r->back; - found = true; - } else - r = r->next; - } - first = p->next->back; - r = p; - while (r->next != p) - r = r->next; - last = r->back; - } - done = (p->tip || p == q); - n = p->xcoord - q->xcoord; - if (extra) { - n--; - extra = false; - } - if (q->ycoord == i && !done) { - if (trif) - putc('-', outfile); - else - putc('+', outfile); - trif = false; - if (!q->tip) { - for (j = 1; j <= n - 7; j++) - putc('-', outfile); - if (noroot && (root->next->next->next == root) && - (((root->next->back == q) && root->next->next->back->tip) - || ((root->next->next->back == q) && root->next->back->tip))) - fprintf(outfile, "------|"); - else { - if (!strict) { /* write number of times seen */ - if (q->deltav >= 100) - fprintf(outfile, "%5.1f-|", (double)q->deltav); - else if (q->deltav >= 10) - fprintf(outfile, "-%4.1f-|", (double)q->deltav); - else - fprintf(outfile, "--%3.1f-|", (double)q->deltav); - } else - fprintf(outfile, "------|"); - } - extra = true; - trif = true; - } else { - for (j = 1; j < n; j++) - putc('-', outfile); - } - } else if (!p->tip && last->ycoord > i && first->ycoord < i && - (i != p->ycoord || p == root)) { - putc('|', outfile); - for (j = 1; j < n; j++) - putc(' ', outfile); - } else { - for (j = 1; j <= n; j++) - putc(' ', outfile); - if (trif) - trif = false; - } - if (q != p) - p = q; - } while (!done); - if (p->ycoord == i && p->tip) { - for (j = 0; (j < MAXNCH) && (p->nayme[j] != '\0'); j++) - putc(p->nayme[j], outfile); - } - putc('\n', outfile); -} /* drawline */ - - -void printree() -{ - /* prints out diagram of the tree */ - long i; - long tipy; - - if (treeprint) { - fprintf(outfile, "\nCONSENSUS TREE:\n"); - if (mr || mre || ml) { - if (noroot) { - fprintf(outfile, "the numbers on the branches indicate the number\n"); - fprintf(outfile, "of times the partition of the species into the two sets\n"); - fprintf(outfile, "which are separated by that branch occurred\n"); - } else { - fprintf(outfile, "the numbers forks indicate the number\n"); - fprintf(outfile, "of times the group consisting of the species\n"); - fprintf(outfile, "which are to the right of that fork occurred\n"); - } - fprintf(outfile, "among the trees, out of %6.2f trees\n", ntrees); - if (ntrees <= 1.001) - fprintf(outfile, "(trees had fractional weights)\n"); - } - tipy = 1; - coordinates(root, &tipy); - putc('\n', outfile); - for (i = 1; i <= tipy - down; i++) - drawline(i); - putc('\n', outfile); - } - if (noroot) { - fprintf(outfile, "\n remember:"); - if (didreroot) - fprintf(outfile, " (though rerooted by outgroup)"); - fprintf(outfile, " this is an unrooted tree!\n"); - } - putc('\n', outfile); -} /* printree */ - - -void enternohash(group_type *s, long *n) -{ - /* if set s is already there, enter it into groupings in the next slot - (without hash-coding). n is number of sets stored there and is updated */ - long i, j; - boolean found; - - found = false; - for (i = 0; i < (*n); i++) { /* go through looking whether it is there */ - found = true; - for (j = 0; j < setsz; j++) { /* check both parts of partition */ - found = found && (grouping[i][j] == s[j]); - found = found && (group2[i][j] == (fullset[j] & (~s[j]))); - } - if (found) - break; - } - if (!found) { /* if not, add it to the slot after the end, - which must be empty */ - grouping[i] = (group_type *)Malloc(setsz * sizeof(group_type)); - timesseen[i] = (double *)Malloc(sizeof(double)); - group2[i] = (group_type *)Malloc(setsz * sizeof(group_type)); - for (j = 0; j < setsz; j++) - grouping[i][j] = s[j]; - *timesseen[i] = 1; - (*n)++; - } -} /* enternohash */ - - -void enterpartition (group_type *s1, long *n) -{ - /* try to put this partition in list of partitions. If implied by others, - don't bother. If others implied by it, replace them. If this one - vacuous because only one element in s1, forget it */ - long i, j; - boolean found; - -/* this stuff all to be rewritten but left here so pieces can be used */ - found = false; - for (i = 0; i < (*n); i++) { /* go through looking whether it is there */ - found = true; - for (j = 0; j < setsz; j++) { /* check both parts of partition */ - found = found && (grouping[i][j] == s1[j]); - found = found && (group2[i][j] == (fullset[j] & (~s1[j]))); - } - if (found) - break; - } - if (!found) { /* if not, add it to the slot after the end, - which must be empty */ - grouping[i] = (group_type *)Malloc(setsz * sizeof(group_type)); - timesseen[i] = (double *)Malloc(sizeof(double)); - group2[i] = (group_type *)Malloc(setsz * sizeof(group_type)); - for (j = 0; j < setsz; j++) - grouping[i][j] = s1[j]; - *timesseen[i] = 1; - (*n)++; - } -} /* enterpartition */ - - -void elimboth(long n) -{ - /* for Adams case: eliminate pairs of groups incompatible with each other */ - long i, j; - boolean comp; - - for (i = 0; i < n-1; i++) { - for (j = i+1; j < n; j++) { - comp = compatible(i,j); - if (!comp) { - *timesseen[i] = 0.0; - *timesseen[j] = 0.0; - } - } - if (*timesseen[i] == 0.0) { - free(grouping[i]); - free(timesseen[i]); - timesseen[i] = NULL; - grouping[i] = NULL; - } - } - if (*timesseen[n-1] == 0.0) { - free(grouping[n-1]); - free(timesseen[n-1]); - timesseen[n-1] = NULL; - grouping[n-1] = NULL; - } -} /* elimboth */ - - -void consensus(pattern_elm ***pattern_array, long trees_in) -{ - long i, n, n2, tipy; - - group2 = (group_type **) Malloc(maxgrp*sizeof(group_type *)); - for (i = 0; i < maxgrp; i++) - group2[i] = NULL; - times2 = (double **)Malloc(maxgrp*sizeof(double *)); - for (i = 0; i < maxgrp; i++) - times2[i] = NULL; - n2 = 0; - censor(); /* drop groups that are too rare */ - compress(&n); /* push everybody to front of array */ - if (!strict) { /* drop those incompatible, if any */ - sort(n); - eliminate(&n, &n2); - compress(&n); - } - reconstruct(n); - tipy = 1; - coordinates(root, &tipy); - if (prntsets) { - fprintf(outfile, "\nSets included in the consensus tree\n"); - printset(n); - for (i = 0; i < n2; i++) { - if (!grouping[i]) { - grouping[i] = (group_type *)Malloc(setsz * sizeof(group_type)); - timesseen[i] = (double *)Malloc(sizeof(double)); - } - memcpy(grouping[i], group2[i], setsz * sizeof(group_type)); - *timesseen[i] = *times2[i]; - } - n = n2; - fprintf(outfile, "\n\nSets NOT included in consensus tree:"); - if (n2 == 0) - fprintf(outfile, " NONE\n"); - else { - putc('\n', outfile); - printset(n); - } - } - putc('\n', outfile); - if (strict) - fprintf(outfile, "\nStrict consensus tree\n"); - if (mre) - fprintf(outfile, "\nExtended majority rule consensus tree\n"); - if (ml) { - fprintf(outfile, "\nM consensus tree (l = %4.2f)\n", mlfrac); - fprintf(outfile, " l\n"); - } - if (mr) - fprintf(outfile, "\nMajority rule consensus tree\n"); - printree(); - free(nayme); - for (i = 0; i < maxgrp; i++) - free(grouping[i]); - free(grouping); - for (i = 0; i < maxgrp; i++) - free(order[i]); - free(order); - for (i = 0; i < maxgrp; i++) - if (timesseen[i] != NULL) - free(timesseen[i]); - free(timesseen); -} /* consensus */ - - -void rehash() -{ - group_type *s; - long i, j, k; - double temp, ss, smult; - boolean done; - - smult = (sqrt(5.0) - 1) / 2; - s = (group_type *)Malloc(setsz * sizeof(group_type)); - for (i = 0; i < maxgrp/2; i++) { - k = *order[i]; - memcpy(s, grouping[k], setsz * sizeof(group_type)); - ss = 0.0; - for (j = 0; j < setsz; j++) - ss += s[j] /* pow(2, SETBITS*j)*/; - temp = ss * smult; - j = (long)(maxgrp * (temp - floor(temp))); - done = false; - while (!done) { - if (!grping2[j]) { - grping2[j] = (group_type *)Malloc(setsz * sizeof(group_type)); - order2[i] = (long *)Malloc(sizeof(long)); - tmseen2[j] = (double *)Malloc(sizeof(double)); - memcpy(grping2[j], grouping[k], setsz * sizeof(group_type)); - *tmseen2[j] = *timesseen[k]; - *order2[i] = j; - grouping[k] = NULL; - timesseen[k] = NULL; - order[i] = NULL; - done = true; - } else { - j++; - if (j >= maxgrp) j -= maxgrp; - } - } - } - free(s); -} /* rehash */ - - -void enternodeset(node* r) -{ /* enter a set of species into the hash table */ - long i, j, start; - double ss, n; - boolean done, same; - double times ; - group_type *s; - - s = r->nodeset; - same = true; - for (i = 0; i < setsz; i++) - if (s[i] != fullset[i]) - same = false; - if (same) - return; - times = trweight; - ss = 0.0; /* compute the hashcode for the set */ - n = ((sqrt(5.0) - 1.0) / 2.0); /* use an irrational multiplier */ - for (i = 0; i < setsz; i++) - ss += s[i] * n; - i = (long)(maxgrp * (ss - floor(ss))) + 1; /* use fractional part of code */ - start = i; - done = false; /* go through seeing if it is there */ - while (!done) { - if (grouping[i - 1]) { /* ... i.e. if group is absent, or */ - same = false; /* (will be false if timesseen = 0) */ - if (!(timesseen[i-1] == 0)) { /* ... if timesseen = 0 */ - same = true; - for (j = 0; j < setsz; j++) { - if (s[j] != grouping[i - 1][j]) - same = false; - } - } - } - if (grouping[i - 1] && same) { /* if it is there, increment timesseen */ - *timesseen[i - 1] += times; - lengths[i - 1] = nodep[r->index - 1]->v; - done = true; - } else if (!grouping[i - 1]) { /* if not there and slot empty ... */ - grouping[i - 1] = (group_type *)Malloc(setsz * sizeof(group_type)); - lasti++; - order[lasti] = (long *)Malloc(sizeof(long)); - timesseen[i - 1] = (double *)Malloc(sizeof(double)); - memcpy(grouping[i - 1], s, setsz * sizeof(group_type)); - *timesseen[i - 1] = times; - *order[lasti] = i - 1; - done = true; - lengths[i - 1] = nodep[r->index -1]->v; - } else { /* otherwise look to put it in next slot ... */ - i++; - if (i > maxgrp) i -= maxgrp; - } - if (!done && i == start) { /* if no place to put it, expand hash table */ - maxgrp = maxgrp*2; - tmseen2 = (double **)Malloc(maxgrp*sizeof(double *)); - for (j = 0; j < maxgrp; j++) - tmseen2[j] = NULL; - grping2 = (group_type **)Malloc(maxgrp*sizeof(group_type *)); - for (j = 0; j < maxgrp; j++) - grping2[j] = NULL; - order2 = (long **)Malloc(maxgrp*sizeof(long *)); - for (j = 0; j < maxgrp; j++) - order2[j] = NULL; - lengths2 = (double *)Malloc(maxgrp*sizeof(double)); - for (j = 0; j < maxgrp; j++) - lengths2[j] = 0.0; - memcpy(lengths2,lengths,maxgrp*sizeof(double) / 2); - rehash(); - free(lengths); - free(timesseen); - free(grouping); - free(order); - timesseen = tmseen2; - grouping = grping2; - lengths = lengths2; - order = order2; - done = true; - lasti = maxgrp/2 - 1; - enternodeset(r); - } - } -} /* enternodeset */ - - -void accumulate(node *r) -{ - node *q; - long i; - - if (r->tip) { - if (!r->nodeset) - r->nodeset = (group_type *)Malloc(setsz * sizeof(group_type)); - for (i = 0; i < setsz; i++) - r->nodeset[i] = 0L; - i = (r->index-1) / (long)SETBITS; - r->nodeset[i] = 1L << (r->index - 1 - i*SETBITS); - } - else { - q = r->next; - while (q != r) { - accumulate(q->back); - q = q->next; - } - q = r->next; - if (!r->nodeset) - r->nodeset = (group_type *)Malloc(setsz * sizeof(group_type)); - for (i = 0; i < setsz; i++) - r->nodeset[i] = 0; - while (q != r) { - for (i = 0; i < setsz; i++) - r->nodeset[i] |= q->back->nodeset[i]; - q = q->next; - } - } - if ((!r->tip && (r->next->next != r)) || r->tip) - enternodeset(r); -} /* accumulate */ - - -void dupname2(Char *name, node *p, node *this) -{ - /* search for a duplicate name recursively */ - node *q; - - if (p->tip) { - if (p != this) { - if (namesSearch(p->nayme)) { - printf("\n\nERROR in user tree: duplicate name found: "); - puts(p->nayme); - printf("\n\n"); - exxit(-1); - } else { - namesAdd(p->nayme); - } - } - } else { - q = p; - while (p->next != q) { - dupname2(name, p->next->back, this); - p = p->next; - } - } -} /* dupname2 */ - - -void dupname(node *p) -{ - /* search for a duplicate name in tree */ - node *q; - - if (p->tip) { - if (namesSearch(p->nayme)) { - printf("\n\nERROR in user tree: duplicate name found: "); - puts(p->nayme); - printf("\n\n"); - exxit(-1); - } else { - namesAdd(p->nayme); - } - } else { - q = p; - while (p->next != q) { - dupname(p->next->back); - p = p->next; - } - } -} /* dupname */ - - -void missingnameRecurs(node *p) -{ - /* search for missing names in first tree */ - node *q; - - if (p->tip) { - if (!namesSearch(p->nayme)) { - printf("\n\nERROR in user tree: name %s not found in first tree\n\n\n", - p->nayme); - exxit(-1); - } - } else { - q = p; - while (p->next != q) { - missingnameRecurs(p->next->back); - p = p->next; - } - } -} /* missingnameRecurs */ - -/** - * wrapper for recursive missingname function - */ -void missingname(node *p){ - missingnameRecurs(p); - namesCheckTable(); -} /* missingname */ - -void gdispose(node *p) -{ - /* go through tree throwing away nodes */ - node *q, *r; - - if (p->tip) { - chuck(&grbg, p); - return; - } - q = p->next; - while (q != p) { - gdispose(q->back); - r = q; - q = q->next; - chuck(&grbg, r); - } - chuck(&grbg, q); -} /* gdispose */ - - -void initreenode(node *p) -{ - /* traverse tree and assign species names to tip nodes */ - node *q; - - if (p->tip) { - memcpy(nayme[p->index - 1], p->nayme, MAXNCH); - } else { - q = p->next; - while (q && q != p) { - initreenode(q->back); - q = q->next; - } - } -} /* initreenode */ - - -void reroot(node *outgroup, long *nextnode) -{ - /* reorients tree, putting outgroup in desired position. */ - long i; - boolean nroot; - node *p, *q; - double newv; - - nroot = false; - p = root->next; - while (p != root) { - if ((outgroup->back == p) && (root->next->next->next == root)) { - nroot = true; - p = root; - } else - p = p->next; - } - if (nroot && root->next->next->next == root) { - root->next->next->back->v += root->next->back->v; - root->next->back->v = 0; - } - if (nroot) return; - - p = root; - i = 0; - while (p->next != root) { - p = p->next; - i++; - } - if (i == 2) { - newv = root->next->back->v + root->next->next->back->v; - root->next->back->back = p->back; - p->back->back = root->next->back; - q = root->next; - p->back->v = newv; - q->back->v = newv; - } else { - p->next = root->next; - nodep[root->index-1] = root->next; - gnu(&grbg, &root->next); - q = root->next; - gnu(&grbg, &q->next); - p = q->next; - p->next = root; - q->tip = false; - p->tip = false; - nodep[*nextnode] = root; - (*nextnode)++; - root->index = *nextnode; - root->next->index = root->index; - root->next->next->index = root->index; - } - newv = outgroup->v; - q->back = outgroup; - p->back = outgroup->back; - outgroup->back->back = p; - outgroup->back = q; - outgroup->v = 0; - outgroup->back->v = 0; - root->v = 0; - p->v = newv; - p->back->v = newv; - reorient(root); -} /* reroot */ - - -void reorient(node* n) { - node* p; - - if ( n->tip ) return; - if ( nodep[n->index - 1] != n ) { - nodep[n->index - 1] = n; - if ( n->back ) - n->v = n->back->v; - } - - for ( p = n->next ; p != n ; p = p->next) - reorient(p->back); -} - - -void store_pattern (pattern_elm ***pattern_array, - double *timesseen_changes, int trees_in_file) -{ - /* put a tree's groups into a pattern array. - Don't forget that when not Adams, grouping[] is not compressed. . . */ - long i, total_groups=0, j=0, k; - - /* First, find out how many groups exist in the given tree. */ - for (i = 0 ; i < maxgrp ; i++) - if ((grouping[i] != NULL) && - (*timesseen[i] > timesseen_changes[i])) - /* If this is group exists and is present in the current tree, */ - total_groups++ ; - - /* Then allocate a space to store the bit patterns. . . */ - for (i = 0 ; i < setsz ; i++) { - pattern_array[i][trees_in_file] - = (pattern_elm *) Malloc(sizeof(pattern_elm)) ; - pattern_array[i][trees_in_file]->apattern = - (group_type *) Malloc (total_groups * sizeof (group_type)) ; - pattern_array[i][trees_in_file]->length = - (double *) Malloc (maxgrp * sizeof (double)) ; - for ( j = 0 ; j < maxgrp ; j++ ) { - pattern_array[i][trees_in_file]->length[j] = -1; - } - pattern_array[i][trees_in_file]->patternsize = (long *)Malloc(sizeof(long)); - } - j = 0; - /* Then go through groupings again, and copy in each element - appropriately. */ - for (i = 0 ; i < maxgrp ; i++) - if (grouping[i] != NULL) { - if (*timesseen[i] > timesseen_changes[i]) { - for (k = 0 ; k < setsz ; k++) - pattern_array[k][trees_in_file]->apattern[j] = grouping[i][k] ; - pattern_array[0][trees_in_file]->length[j] = lengths[i]; - j++ ; - timesseen_changes[i] = *timesseen[i] ; - } - } - *pattern_array[0][trees_in_file]->patternsize = total_groups; -} /* store_pattern */ - - -boolean samename(naym name1, plotstring name2) -{ - return !(strncmp(name1, name2, MAXNCH)); -} /* samename */ - - -void reordertips() -{ - /* matchs tip nodes to species names first read in */ - long i, j; - boolean done; - node *p, *q, *r; - for (i = 0; i < spp; i++) { - j = 0; - done = false; - do { - if (samename(nayme[i], nodep[j]->nayme)) { - done = true; - if (i != j) { - p = nodep[i]; - q = nodep[j]; - r = p->back; - p->back->back = q; - q->back->back = p; - p->back = q->back; - q->back = r; - memcpy(q->nayme, p->nayme, MAXNCH); - memcpy(p->nayme, nayme[i], MAXNCH); - } - } - j++; - } while (j < spp && !done); - } -} /* reordertips */ - - -void read_groups (pattern_elm ****pattern_array,double *timesseen_changes, - long trees_in_1, long total_trees, FILE *intree) -{ - /* read the trees. Accumulate sets. */ - int i, j, k; - boolean haslengths, initial; - long nextnode, trees_read = 0; - - /* set up the groupings array and the timesseen array */ - grouping = (group_type **) Malloc(maxgrp*sizeof(group_type *)); - lengths = (double *) Malloc(maxgrp*sizeof(double)); - for (i = 0; i < maxgrp; i++) - grouping[i] = NULL; - order = (long **) Malloc(maxgrp*sizeof(long *)); - for (i = 0; i < maxgrp; i++) - order[i] = NULL; - timesseen = (double **)Malloc(maxgrp*sizeof(double *)); - for (i = 0; i < maxgrp; i++) - timesseen[i] = NULL; - - firsttree = true; - grbg = NULL; - initial = true; - while (!eoff(intree)) { /* go till end of input tree file */ - for (i = 0; i < maxgrp; i++) { - lengths[i] = -1; - } - goteof = false; - nextnode = 0; - haslengths = true; - allocate_nodep(&nodep, &intree, &spp); - if (firsttree) - nayme = (naym *)Malloc(spp*sizeof(naym)); - treeread(intree, &root, treenode, &goteof, &firsttree, nodep, - &nextnode, &haslengths, &grbg, initconsnode,true,-1); - if (!initial) { - reordertips(); - missingname(root); - } else { - initial = false; - hashp = (hashtype)Malloc(sizeof(namenode) * NUM_BUCKETS); - for (i=0;i= 1); - if (!done1) { - printf("ERROR: Bad outgroup number: %ld\n", outgrno); - printf(" Must be greater than zero\n"); - } - countup(&loopcount2, 10); - } while (done1 != true); - } - break; - - case 'R': - noroot = !noroot; - break; - - case 'T': - initterminal(&ibmpc, &ansi); - break; - - case '1': - prntsets = !prntsets; - break; - - case '2': - progress = !progress; - break; - - case '3': - treeprint = !treeprint; - break; - - case '4': - trout = !trout; - break; - - } - } else - printf("Not a possible option!\n"); - } - countup(&loopcount, 100); - } while (!done); - if (ml) { - do { - printf("\nFraction (l) of times a branch must appear\n"); - scanf("%lf%*[^\n]", &mlfrac); - getchar(); - } while ((mlfrac < 0.5) || (mlfrac > 1.0)); - } -} /* getoptions */ - - -void count_siblings(node **p) -{ - node *tmp_node; - int i; - - if (!(*p)) { - /* This is a leaf, */ - return; - } else { - tmp_node = (*p)->next; - } - - for (i = 0 ; i < 1000; i++) { - if (tmp_node == (*p)) { - /* When we've gone through all the siblings, */ - break; - } else if (tmp_node) { - tmp_node = tmp_node->next; - } else { - /* Should this be executed? */ - return ; - } - } -} /* count_siblings */ - - -void treeout(node *p) -{ - /* write out file with representation of final tree */ - long i, n = 0; - Char c; - node *q; - double x; - - count_siblings (&p); - - if (p->tip) { - /* If we're at a node which is a leaf, figure out how long the - name is and print it out. */ - for (i = 1; i <= MAXNCH; i++) { - if (p->nayme[i - 1] != '\0') - n = i; - } - for (i = 0; i < n; i++) { - c = p->nayme[i]; - if (c == ' ') - c = '_'; - putc(c, outtree); - } - col += n; - } else { - /* If we're at a furcation, print out the proper formatting, loop - through all the children, calling the procedure recursively. */ - putc('(', outtree); - col++; - q = p->next; - while (q != p) { - /* This should terminate when we've gone through all the - siblings, */ - treeout(q->back); - q = q->next; - if (q == p) - break; - putc(',', outtree); - col++; - if (col > 60) { - putc('\n', outtree); - col = 0; - } - } - putc(')', outtree); - col++; - } - - if (p->tip) - x = ntrees; - else - x = (double)p->deltav; - - if (p == root) { - /* When we're all done with this tree, */ - fprintf(outtree, ";\n"); - return; - } - - /* Figure out how many characters the branch length requires: */ - else { - if (!strict) { - if (x >= 100.0) { - fprintf(outtree, ":%5.1f", x); - col += 4; - } else if (x >= 10.0) { - fprintf(outtree, ":%4.1f", x); - col += 3; - } else if (x >= 0.99) { - fprintf(outtree, ":%3.1f", x); - col += 2; - } else { - fprintf(outtree, ":%4.2f", x); - col += 3; - } - } - } -} /* treeout */ - - -int main(int argc, Char *argv[]) -{ - /* Local variables added by Dan F. */ - pattern_elm ***pattern_array; - double *timesseen_changes = NULL; - long trees_in = 0; - long i, j; - node *p, *q; - -#ifdef MAC - argc = 1; /* macsetup("Consense", ""); */ - argv[0] = "Consense"; -#endif - init(argc, argv); - openfile(&intree, INTREE, "input tree file", "r", argv[0], intreename); - openfile(&outfile, OUTFILE, "output file", "w", argv[0], outfilename); - - /* Initialize option-based variables, then ask for changes regarding - their values. */ - getoptions(); - - ntrees = 0.0; - maxgrp = 32767; /* initial size of set hash table */ - lasti = -1; - - if (trout) - openfile(&outtree, OUTTREE, "output tree file", "w", argv[0], outtreename); - if (prntsets) - fprintf(outfile, "Species in order: \n\n"); - - trees_in = countsemic(&intree); - - /* Read the tree file and put together grouping, order, and timesseen */ - read_groups (&pattern_array, timesseen_changes, trees_in, trees_in, intree); - /* Compute the consensus tree. */ - putc('\n', outfile); - nodep = (pointarray)Malloc(2*(1+spp)*sizeof(node *)); - for (i = 0; i < spp; i++) { - nodep[i] = (node *)Malloc(sizeof(node)); - for (j = 0; j < MAXNCH; j++) - nodep[i]->nayme[j] = '\0'; - strncpy(nodep[i]->nayme, nayme[i], MAXNCH); - } - for (i = spp; i < 2*(1+spp); i++) - nodep[i] = NULL; - consensus(pattern_array, trees_in); - printf("\n"); - if (trout) { - treeout(root); - if (progress) - printf("Consensus tree written to file \"%s\"\n\n", outtreename); - } - if (progress) - printf("Output written to file \"%s\"\n\n", outfilename); - for (i = 0; i < spp; i++) - free(nodep[i]); - for (i = spp; i < 2*(1 + spp); i++) { - if (nodep[i] != NULL) { - p = nodep[i]->next; - do { - q = p->next; - free(p); - p = q; - } while (p != nodep[i]); - free(p); - } - } - free(nodep); - FClose(outtree); - FClose(intree); - FClose(outfile); - -#ifdef MAC - fixmacfile(outfilename); - fixmacfile(outtreename); -#endif -printf("Done.\n\n"); - -#ifdef WIN32 - phyRestoreConsoleAttributes(); -#endif - -return 0; -} /* main */ - diff --git a/forester/archive/RIO/others/phylip_mod/src/dist.c b/forester/archive/RIO/others/phylip_mod/src/dist.c deleted file mode 100644 index 3500c12..0000000 --- a/forester/archive/RIO/others/phylip_mod/src/dist.c +++ /dev/null @@ -1,526 +0,0 @@ -#include "phylip.h" -#include "dist.h" - -/* version 3.6. (c) Copyright 1993-2004 by the University of Washington. - Written by Joseph Felsenstein, Akiko Fuseki, Sean Lamont, and Andrew Keeffe. - Permission is granted to copy and use this program provided no fee is - charged for it and provided that this copyright notice is not removed. */ - -void alloctree(pointptr *treenode, long nonodes) -{ - /* allocate treenode dynamically */ - /* used in fitch, kitsch & neighbor */ - long i, j; - node *p, *q; - - *treenode = (pointptr)Malloc(nonodes*sizeof(node *)); - for (i = 0; i < spp; i++) - (*treenode)[i] = (node *)Malloc(sizeof(node)); - for (i = spp; i < nonodes; i++) { - q = NULL; - for (j = 1; j <= 3; j++) { - p = (node *)Malloc(sizeof(node)); - p->next = q; - q = p; - } - p->next->next->next = p; - (*treenode)[i] = p; - } -} /* alloctree */ - - -void freetree(pointptr *treenode, long nonodes) -{ - long i, j; - node *p, *q; - - for (i = 0; i < spp; i++) - free((*treenode)[i]); - for (i = spp; i < nonodes; i++) { - p = (*treenode)[i]; - for (j = 1; j <= 3; j++) { - q = p; - p = p->next; - free(q); - } - } - free(*treenode); -} /* freetree */ - - -void allocd(long nonodes, pointptr treenode) -{ - /* used in fitch & kitsch */ - long i, j; - node *p; - - for (i = 0; i < (spp); i++) { - treenode[i]->d = (vector)Malloc(nonodes*sizeof(double)); - } - for (i = spp; i < nonodes; i++) { - p = treenode[i]; - for (j = 1; j <= 3; j++) { - p->d = (vector)Malloc(nonodes*sizeof(double)); - p = p->next; - } - } -} - - -void freed(long nonodes, pointptr treenode) -{ - /* used in fitch */ - long i, j; - node *p; - - for (i = 0; i < (spp); i++) { - free(treenode[i]->d); - } - for (i = spp; i < nonodes; i++) { - p = treenode[i]; - for (j = 1; j <= 3; j++) { - free(p->d); - p = p->next; - } - } -} - - -void allocw(long nonodes, pointptr treenode) -{ - /* used in fitch & kitsch */ - long i, j; - node *p; - - for (i = 0; i < (spp); i++) { - treenode[i]->w = (vector)Malloc(nonodes*sizeof(double)); - } - for (i = spp; i < nonodes; i++) { - p = treenode[i]; - for (j = 1; j <= 3; j++) { - p->w = (vector)Malloc(nonodes*sizeof(double)); - p = p->next; - } - } -} - - -void freew(long nonodes, pointptr treenode) -{ - /* used in fitch */ - long i, j; - node *p; - - for (i = 0; i < (spp); i++) { - free(treenode[i]->w); - } - for (i = spp; i < nonodes; i++) { - p = treenode[i]; - for (j = 1; j <= 3; j++) { - free(p->w); - p = p->next; - } - } -} - - -void setuptree(tree *a, long nonodes) -{ - /* initialize a tree */ - /* used in fitch, kitsch, & neighbor */ - long i=0; - node *p; - - for (i = 1; i <= nonodes; i++) { - a->nodep[i - 1]->back = NULL; - a->nodep[i - 1]->tip = (i <= spp); - a->nodep[i - 1]->iter = true; - a->nodep[i - 1]->index = i; - a->nodep[i - 1]->t = 0.0; - a->nodep[i - 1]->sametime = false; - a->nodep[i - 1]->v = 0.0; - if (i > spp) { - p = a->nodep[i-1]->next; - while (p != a->nodep[i-1]) { - p->back = NULL; - p->tip = false; - p->iter = true; - p->index = i; - p->t = 0.0; - p->sametime = false; - p = p->next; - } - } - } - a->likelihood = -1.0; - a->start = a->nodep[0]; - a->root = NULL; -} /* setuptree */ - - -void inputdata(boolean replicates, boolean printdata, boolean lower, - boolean upper, vector *x, intvector *reps) -{ - /* read in distance matrix */ - /* used in fitch & neighbor */ - long i=0, j=0, k=0, columns=0; - boolean skipit=false, skipother=false; - - if (replicates) - columns = 4; - else - columns = 6; - if (printdata) { - fprintf(outfile, "\nName Distances"); - if (replicates) - fprintf(outfile, " (replicates)"); - fprintf(outfile, "\n---- ---------"); - if (replicates) - fprintf(outfile, "-------------"); - fprintf(outfile, "\n\n"); - } - for (i = 0; i < spp; i++) { - x[i][i] = 0.0; - scan_eoln(infile); - initname(i); - for (j = 0; j < spp; j++) { - skipit = ((lower && j + 1 >= i + 1) || (upper && j + 1 <= i + 1)); - skipother = ((lower && i + 1 >= j + 1) || (upper && i + 1 <= j + 1)); - if (!skipit) { - if (eoln(infile)) - scan_eoln(infile); - if (fscanf(infile, "%lf", &x[i][j]) != 1) { - printf("The infile is of the wrong type\n"); - exxit(-1); - } - if (replicates) { - if (eoln(infile)) - scan_eoln(infile); - fscanf(infile, "%ld", &reps[i][j]); - } else - reps[i][j] = 1; - } - if (!skipit && skipother) { - x[j][i] = x[i][j]; - reps[j][i] = reps[i][j]; - } - if ((i == j) && (fabs(x[i][j]) > 0.000000001)) { - printf("\nERROR: diagonal element of row %ld of distance matrix ", i+1); - printf("is not zero.\n"); - printf(" Is it a distance matrix?\n\n"); - exxit(-1); - } - if ((j < i) && (fabs(x[i][j]-x[j][i]) > 0.000000001)) { - printf("ERROR: distance matrix is not symmetric:\n"); - printf(" (%ld,%ld) element and (%ld,%ld) element are unequal.\n", - i+1, j+1, j+1, i+1); - printf(" They are %10.6f and %10.6f, respectively.\n", - x[i][j], x[j][i]); - printf(" Is it a distance matrix?\n\n"); - exxit(-1); - } - } - } - scan_eoln(infile); - if (!printdata) - return; - for (i = 0; i < spp; i++) { - for (j = 0; j < nmlngth; j++) - putc(nayme[i][j], outfile); - putc(' ', outfile); - for (j = 1; j <= spp; j++) { - fprintf(outfile, "%10.5f", x[i][j - 1]); - if (replicates) - fprintf(outfile, " (%3ld)", reps[i][j - 1]); - if (j % columns == 0 && j < spp) { - putc('\n', outfile); - for (k = 1; k <= nmlngth + 1; k++) - putc(' ', outfile); - } - } - putc('\n', outfile); - } - putc('\n', outfile); -} /* inputdata */ - - -void coordinates(node *p, double lengthsum, long *tipy, double *tipmax, - node *start, boolean njoin) -{ - /* establishes coordinates of nodes */ - node *q, *first, *last; - - if (p->tip) { - p->xcoord = (long)(over * lengthsum + 0.5); - p->ycoord = *tipy; - p->ymin = *tipy; - p->ymax = *tipy; - (*tipy) += down; - if (lengthsum > *tipmax) - *tipmax = lengthsum; - return; - } - q = p->next; - do { - if (q->back) - coordinates(q->back, lengthsum + q->v, tipy,tipmax, start, njoin); - q = q->next; - } while ((p == start || p != q) && (p != start || p->next != q)); - first = p->next->back; - q = p; - while (q->next != p && q->next->back) /* is this right ? */ - q = q->next; - last = q->back; - p->xcoord = (long)(over * lengthsum + 0.5); - if (p == start && p->back) - p->ycoord = p->next->next->back->ycoord; - else - p->ycoord = (first->ycoord + last->ycoord) / 2; - p->ymin = first->ymin; - p->ymax = last->ymax; -} /* coordinates */ - - -void drawline(long i, double scale, node *start, boolean rooted) -{ - /* draws one row of the tree diagram by moving up tree */ - node *p, *q; - long n=0, j=0; - boolean extra=false, trif=false; - node *r, *first =NULL, *last =NULL; - boolean done=false; - - p = start; - q = start; - extra = false; - trif = false; - if (i == (long)p->ycoord && p == start) { /* display the root */ - if (rooted) { - if (p->index - spp >= 10) - fprintf(outfile, "-"); - else - fprintf(outfile, "--"); - } - else { - if (p->index - spp >= 10) - fprintf(outfile, " "); - else - fprintf(outfile, " "); - } - if (p->index - spp >= 10) - fprintf(outfile, "%2ld", p->index - spp); - else - fprintf(outfile, "%ld", p->index - spp); - extra = true; - trif = true; - } else - fprintf(outfile, " "); - do { - if (!p->tip) { /* internal nodes */ - r = p->next; - /* r->back here is going to the same node. */ - do { - if (!r->back) { - r = r->next; - continue; - } - if (i >= r->back->ymin && i <= r->back->ymax) { - q = r->back; - break; - } - r = r->next; - } while (!((p != start && r == p) || (p == start && r == p->next))); - first = p->next->back; - r = p; - while (r->next != p) - r = r->next; - last = r->back; - if (!rooted && (p == start)) - last = p->back; - } /* end internal node case... */ - /* draw the line: */ - done = (p->tip || p == q); - n = (long)(scale * (q->xcoord - p->xcoord) + 0.5); - if (!q->tip) { - if ((n < 3) && (q->index - spp >= 10)) - n = 3; - if ((n < 2) && (q->index - spp < 10)) - n = 2; - } - if (extra) { - n--; - extra = false; - } - if ((long)q->ycoord == i && !done) { - if (p->ycoord != q->ycoord) - putc('+', outfile); - if (trif) { - n++; - trif = false; - } - if (!q->tip) { - for (j = 1; j <= n - 2; j++) - putc('-', outfile); - if (q->index - spp >= 10) - fprintf(outfile, "%2ld", q->index - spp); - else - fprintf(outfile, "-%ld", q->index - spp); - extra = true; - } else { - for (j = 1; j < n; j++) - putc('-', outfile); - } - } else if (!p->tip) { - if ((long)last->ycoord > i && (long)first->ycoord < i - && i != (long)p->ycoord) { - putc('!', outfile); - for (j = 1; j < n; j++) - putc(' ', outfile); - } else { - for (j = 1; j <= n; j++) - putc(' ', outfile); - trif = false; - } - } - if (q != p) - p = q; - } while (!done); - if ((long)p->ycoord == i && p->tip) { - for (j = 0; j < nmlngth; j++) - putc(nayme[p->index - 1][j], outfile); - } - putc('\n', outfile); -} /* drawline */ - - -void printree(node *start, boolean treeprint, - boolean njoin, boolean rooted) -{ - /* prints out diagram of the tree */ - /* used in fitch & neighbor */ - long i; - long tipy; - double scale,tipmax; - - if (!treeprint) - return; - putc('\n', outfile); - tipy = 1; - tipmax = 0.0; - coordinates(start, 0.0, &tipy, &tipmax, start, njoin); - scale = 1.0 / (long)(tipmax + 1.000); - for (i = 1; i <= (tipy - down); i++) - drawline(i, scale, start, rooted); - putc('\n', outfile); -} /* printree */ - - -void treeoutr(node *p, long *col, tree *curtree) -{ - /* write out file with representation of final tree. - * Rooted case. Used in kitsch and neighbor. */ - long i, n, w; - Char c; - double x; - - if (p->tip) { - n = 0; - for (i = 1; i <= nmlngth; i++) { - if (nayme[p->index - 1][i - 1] != ' ') - n = i; - } - for (i = 0; i < n; i++) { - c = nayme[p->index - 1][i]; - if (c == ' ') - c = '_'; - putc(c, outtree); - } - (*col) += n; - } else { - putc('(', outtree); - (*col)++; - treeoutr(p->next->back,col,curtree); - putc(',', outtree); - (*col)++; - if ((*col) > 55) { - putc('\n', outtree); - (*col) = 0; - } - treeoutr(p->next->next->back,col,curtree); - putc(')', outtree); - (*col)++; - } - x = p->v; - if (x > 0.0) - w = (long)(0.43429448222 * log(x)); - else if (x == 0.0) - w = 0; - else - w = (long)(0.43429448222 * log(-x)) + 1; - if (w < 0) - w = 0; - if (p == curtree->root) - fprintf(outtree, ";\n"); - else { - fprintf(outtree, ":%*.5f", (int)(w + 7), x); - (*col) += w + 8; - } -} /* treeoutr */ - - -void treeout(node *p, long *col, double m, boolean njoin, node *start) -{ - /* write out file with representation of final tree */ - /* used in fitch & neighbor */ - long i=0, n=0, w=0; - Char c; - double x=0.0; - - if (p->tip) { - n = 0; - for (i = 1; i <= nmlngth; i++) { - if (nayme[p->index - 1][i - 1] != ' ') - n = i; - } - for (i = 0; i < n; i++) { - c = nayme[p->index - 1][i]; - if (c == ' ') - c = '_'; - putc(c, outtree); - } - *col += n; - } else { - putc('(', outtree); - (*col)++; - treeout(p->next->back, col, m, njoin, start); - putc(',', outtree); - (*col)++; - if (*col > 55) { - putc('\n', outtree); - *col = 0; - } - treeout(p->next->next->back, col, m, njoin, start); - if (p == start && njoin) { - putc(',', outtree); - treeout(p->back, col, m, njoin, start); - } - putc(')', outtree); - (*col)++; - } - x = p->v; - if (x > 0.0) - w = (long)(m * log(x)); - else if (x == 0.0) - w = 0; - else - w = (long)(m * log(-x)) + 1; - if (w < 0) - w = 0; - if (p == start) - fprintf(outtree, ";\n"); - else { - fprintf(outtree, ":%*.5f", (int) w + 7, x); - *col += w + 8; - } -} /* treeout */ - diff --git a/forester/archive/RIO/others/phylip_mod/src/dist.h b/forester/archive/RIO/others/phylip_mod/src/dist.h deleted file mode 100644 index 574fd1f..0000000 --- a/forester/archive/RIO/others/phylip_mod/src/dist.h +++ /dev/null @@ -1,35 +0,0 @@ - -/* version 3.6. (c) Copyright 1993-2000 by the University of Washington. - Written by Joseph Felsenstein, Akiko Fuseki, Sean Lamont, and Andrew Keeffe. - Permission is granted to copy and use this program provided no fee is - charged for it and provided that this copyright notice is not removed. */ - -/* - dist.h: included in fitch, kitsch, & neighbor -*/ - -#define over 60 - - -typedef long *intvector; - -typedef node **pointptr; - -#ifndef OLDC -/*function prototypes*/ -void alloctree(pointptr *, long); -void freetree(pointptr *, long); -void allocd(long, pointptr); -void freed(long, pointptr); -void allocw(long, pointptr); -void freew(long, pointptr); -void setuptree(tree *, long); -void inputdata(boolean, boolean, boolean, boolean, vector *, intvector *); -void coordinates(node *, double, long *, double *, node *, boolean); -void drawline(long, double, node *, boolean); -void printree(node *, boolean, boolean, boolean); -void treeoutr(node *, long *, tree *); -void treeout(node *, long *, double, boolean, node *); -/*function prototypes*/ -#endif - diff --git a/forester/archive/RIO/others/phylip_mod/src/fitch.c b/forester/archive/RIO/others/phylip_mod/src/fitch.c deleted file mode 100644 index c55d050..0000000 --- a/forester/archive/RIO/others/phylip_mod/src/fitch.c +++ /dev/null @@ -1,1176 +0,0 @@ - -#include "phylip.h" -#include "dist.h" - -/* version 3.6. (c) Copyright 1993-2004 by the University of Washington. - Written by Joseph Felsenstein, Akiko Fuseki, Sean Lamont, and Andrew Keeffe. - Permission is granted to copy and use this program provided no fee is - charged for it and provided that this copyright notice is not removed. */ - -#define zsmoothings 10 /* number of zero-branch correction iterations */ -#define epsilonf 0.000001 /* a very small but not too small number */ -#define delta 0.0001 /* a not quite so small number */ -#define MAXNUMTREES 100000000 /* a number bigger than conceivable numtrees */ - - -#ifndef OLDC -/* function prototypes */ -void getoptions(void); -void allocrest(void); -void doinit(void); -void inputoptions(void); -void fitch_getinput(void); -void secondtraverse(node *, double , long *, double *); -void firsttraverse(node *, long *, double *); -double evaluate(tree *); -void nudists(node *, node *); -void makedists(node *); - -void makebigv(node *); -void correctv(node *); -void alter(node *, node *); -void nuview(node *); -void update(node *); -void smooth(node *); -void filltraverse(node *, node *, boolean); -void fillin(node *, node *, boolean); -void insert_(node *, node *, boolean); -void copynode(node *, node *); - -void copy_(tree *, tree *); -void setuptipf(long, tree *); -void buildnewtip(long , tree *, long); -void buildsimpletree(tree *, long); -void addtraverse(node *, node *, boolean, long *, boolean *); -void re_move(node **, node **); -void rearrange(node *, long *, long *, boolean *); -void describe(node *); -void summarize(long); -void nodeinit(node *); -void initrav(node *); -void treevaluate(void); -void maketree(void); -void globrearrange(long* numtrees,boolean* succeeded); -/* function prototypes */ -#endif - - - -Char infilename[FNMLNGTH], outfilename[FNMLNGTH], intreename[FNMLNGTH], outtreename[FNMLNGTH]; -long nonodes2, outgrno, nums, col, datasets, ith, njumble, jumb=0; -long inseed; -vector *x; -intvector *reps; -boolean minev, global, jumble, lengths, usertree, lower, upper, negallowed, - outgropt, replicates, trout, printdata, progress, treeprint, - mulsets, firstset; -double power; -double trweight; /* to make treeread happy */ -boolean goteof, haslengths; /* ditto ... */ -boolean first; /* ditto ... */ -node *addwhere; - -longer seed; -long *enterorder; -tree curtree, priortree, bestree, bestree2; -Char ch; -char *progname; - - - -void getoptions() -{ - /* interactively set options */ - long inseed0=0, loopcount; - Char ch; - boolean done=false; - - putchar('\n'); - minev = false; - global = false; - jumble = false; - njumble = 1; - lengths = false; - lower = false; - negallowed = false; - outgrno = 1; - outgropt = false; - power = 2.0; - replicates = false; - trout = true; - upper = false; - usertree = false; - printdata = false; - progress = true; - treeprint = true; - loopcount = 0; - do { - cleerhome(); - printf("\nFitch-Margoliash method version %s\n\n",VERSION); - printf("Settings for this run:\n"); - printf(" D Method (F-M, Minimum Evolution)? %s\n", - (minev ? "Minimum Evolution" : "Fitch-Margoliash")); - printf(" U Search for best tree? %s\n", - (usertree ? "No, use user trees in input file" : "Yes")); - if (usertree) { - printf(" N Use lengths from user trees? %s\n", - (lengths ? "Yes" : "No")); - } - printf(" P Power?%9.5f\n",power); - printf(" - Negative branch lengths allowed? %s\n", - negallowed ? "Yes" : "No"); - printf(" O Outgroup root?"); - if (outgropt) - printf(" Yes, at species number%3ld\n", outgrno); - else - printf(" No, use as outgroup species%3ld\n", outgrno); - printf(" L Lower-triangular data matrix?"); - if (lower) - printf(" Yes\n"); - else - printf(" No\n"); - printf(" R Upper-triangular data matrix?"); - if (upper) - printf(" Yes\n"); - else - printf(" No\n"); - printf(" S Subreplicates?"); - if (replicates) - printf(" Yes\n"); - else - printf(" No\n"); - if (!usertree) { - printf(" G Global rearrangements?"); - if (global) - printf(" Yes\n"); - else - printf(" No\n"); - printf(" J Randomize input order of species?"); - if (jumble) - printf(" Yes (seed =%8ld,%3ld times)\n", inseed0, njumble); - else - printf(" No. Use input order\n"); - } - printf(" M Analyze multiple data sets?"); - if (mulsets) - printf(" Yes, %2ld sets\n", datasets); - else - printf(" No\n"); - printf(" 0 Terminal type (IBM PC, ANSI, none)?"); - if (ibmpc) - printf(" IBM PC\n"); - if (ansi) - printf(" ANSI\n"); - if (!(ibmpc || ansi)) - printf(" (none)\n"); - printf(" 1 Print out the data at start of run"); - if (printdata) - printf(" Yes\n"); - else - printf(" No\n"); - printf(" 2 Print indications of progress of run"); - if (progress) - printf(" Yes\n"); - else - printf(" No\n"); - printf(" 3 Print out tree"); - if (treeprint) - printf(" Yes\n"); - else - printf(" No\n"); - printf(" 4 Write out trees onto tree file?"); - if (trout) - printf(" Yes\n"); - else - printf(" No\n"); - printf( - "\n Y to accept these or type the letter for one to change\n"); -#ifdef WIN32 - phyFillScreenColor(); -#endif - scanf("%c%*[^\n]", &ch); - getchar(); - uppercase(&ch); - done = (ch == 'Y'); - if (!done) { - if (strchr("DJOUNPG-LRSM01234",ch) != NULL) { - switch (ch) { - - case 'D': - minev = !minev; - if (minev && (!negallowed)) - negallowed = true; - break; - - case '-': - negallowed = !negallowed; - break; - - case 'G': - global = !global; - break; - - case 'J': - jumble = !jumble; - if (jumble) - initjumble(&inseed, &inseed0, seed, &njumble); - else njumble = 1; - break; - - case 'L': - lower = !lower; - break; - - case 'N': - lengths = !lengths; - break; - - case 'O': - outgropt = !outgropt; - if (outgropt) - initoutgroup(&outgrno, spp); - break; - - case 'P': - initpower(&power); - break; - - case 'R': - upper = !upper; - break; - - case 'S': - replicates = !replicates; - break; - - case 'U': - usertree = !usertree; - break; - - case 'M': - mulsets = !mulsets; - if (mulsets) - initdatasets(&datasets); - jumble = true; - if (jumble) - initseed(&inseed, &inseed0, seed); - break; - - case '0': - initterminal(&ibmpc, &ansi); - break; - - case '1': - printdata = !printdata; - break; - - case '2': - progress = !progress; - break; - - case '3': - treeprint = !treeprint; - break; - - case '4': - trout = !trout; - break; - } - } else - printf("Not a possible option!\n"); - } - countup(&loopcount, 100); - } while (!done); - if (lower && upper) { - printf("ERROR: Data matrix cannot be both uppeR and Lower triangular\n"); - exxit(-1); - } -} /* getoptions */ - - -void allocrest() -{ - long i; - - x = (vector *)Malloc(spp*sizeof(vector)); - reps = (intvector *)Malloc(spp*sizeof(intvector)); - for (i=0;i 1) { - alloctree(&bestree2.nodep, nonodes2); - allocd(nonodes2, bestree2.nodep); - allocw(nonodes2, bestree2.nodep); - } - } - allocrest(); -} /* doinit */ - - -void inputoptions() -{ - /* print options information */ - if (!firstset) - samenumsp2(ith); - fprintf(outfile, "\nFitch-Margoliash method version %s\n\n",VERSION); - if (minev) - fprintf(outfile, "Minimum evolution method option\n\n"); - fprintf(outfile, " __ __ 2\n"); - fprintf(outfile, " \\ \\ (Obs - Exp)\n"); - fprintf(outfile, "Sum of squares = /_ /_ ------------\n"); - fprintf(outfile, " "); - if (power == (long)power) - fprintf(outfile, "%2ld\n", (long)power); - else - fprintf(outfile, "%4.1f\n", power); - fprintf(outfile, " i j Obs\n\n"); - fprintf(outfile, "Negative branch lengths "); - if (!negallowed) - fprintf(outfile, "not "); - fprintf(outfile, "allowed\n\n"); - if (global) - fprintf(outfile, "global optimization\n\n"); -} /* inputoptions */ - - -void fitch_getinput() -{ - /* reads the input data */ - inputoptions(); -} /* fitch_getinput */ - - -void secondtraverse(node *q, double y, long *nx, double *sum) -{ - /* from each of those places go back to all others */ - /* nx comes from firsttraverse */ - /* sum comes from evaluate via firsttraverse */ - double z=0.0, TEMP=0.0; - - z = y + q->v; - if (q->tip) { - TEMP = q->d[(*nx) - 1] - z; - *sum += q->w[(*nx) - 1] * (TEMP * TEMP); - } else { - secondtraverse(q->next->back, z, nx, sum); - secondtraverse(q->next->next->back, z, nx,sum); - } -} /* secondtraverse */ - - -void firsttraverse(node *p, long *nx, double *sum) -{ - /* go through tree calculating branch lengths */ - if (minev && (p != curtree.start)) - *sum += p->v; - if (p->tip) { - if (!minev) { - *nx = p->index; - secondtraverse(p->back, 0.0, nx, sum); - } - } else { - firsttraverse(p->next->back, nx,sum); - firsttraverse(p->next->next->back, nx,sum); - } -} /* firsttraverse */ - - -double evaluate(tree *t) -{ - double sum=0.0; - long nx=0; - /* evaluate likelihood of a tree */ - firsttraverse(t->start->back ,&nx, &sum); - firsttraverse(t->start, &nx, &sum); - if ((!minev) && replicates && (lower || upper)) - sum /= 2; - t->likelihood = -sum; - return (-sum); -} /* evaluate */ - - -void nudists(node *x, node *y) -{ - /* compute distance between an interior node and tips */ - long nq=0, nr=0, nx=0, ny=0; - double dil=0, djl=0, wil=0, wjl=0, vi=0, vj=0; - node *qprime, *rprime; - - qprime = x->next; - rprime = qprime->next->back; - qprime = qprime->back; - ny = y->index; - dil = qprime->d[ny - 1]; - djl = rprime->d[ny - 1]; - wil = qprime->w[ny - 1]; - wjl = rprime->w[ny - 1]; - vi = qprime->v; - vj = rprime->v; - x->w[ny - 1] = wil + wjl; - if (wil + wjl <= 0.0) - x->d[ny - 1] = 0.0; - else - x->d[ny - 1] = ((dil - vi) * wil + (djl - vj) * wjl) / (wil + wjl); - nx = x->index; - nq = qprime->index; - nr = rprime->index; - dil = y->d[nq - 1]; - djl = y->d[nr - 1]; - wil = y->w[nq - 1]; - wjl = y->w[nr - 1]; - y->w[nx - 1] = wil + wjl; - if (wil + wjl <= 0.0) - y->d[nx - 1] = 0.0; - else - y->d[nx - 1] = ((dil - vi) * wil + (djl - vj) * wjl) / (wil + wjl); -} /* nudists */ - - -void makedists(node *p) -{ - /* compute distances among three neighbors of a node */ - long i=0, nr=0, ns=0; - node *q, *r, *s; - - r = p->back; - nr = r->index; - for (i = 1; i <= 3; i++) { - q = p->next; - s = q->back; - ns = s->index; - if (s->w[nr - 1] + r->w[ns - 1] <= 0.0) - p->dist = 0.0; - else - p->dist = (s->w[nr - 1] * s->d[nr - 1] + r->w[ns - 1] * r->d[ns - 1]) / - (s->w[nr - 1] + r->w[ns - 1]); - p = q; - r = s; - nr = ns; - } -} /* makedists */ - - -void makebigv(node *p) -{ - /* make new branch length */ - long i=0; - node *temp, *q, *r; - - q = p->next; - r = q->next; - for (i = 1; i <= 3; i++) { - if (p->iter) { - p->v = (p->dist + r->dist - q->dist) / 2.0; - p->back->v = p->v; - } - temp = p; - p = q; - q = r; - r = temp; - } -} /* makebigv */ - - -void correctv(node *p) -{ - /* iterate branch lengths if some are to be zero */ - node *q, *r, *temp; - long i=0, j=0, n=0, nq=0, nr=0, ntemp=0; - double wq=0.0, wr=0.0; - - q = p->next; - r = q->next; - n = p->back->index; - nq = q->back->index; - nr = r->back->index; - for (i = 1; i <= zsmoothings; i++) { - for (j = 1; j <= 3; j++) { - if (p->iter) { - wr = r->back->w[n - 1] + p->back->w[nr - 1]; - wq = q->back->w[n - 1] + p->back->w[nq - 1]; - if (wr + wq <= 0.0 && !negallowed) - p->v = 0.0; - else - p->v = ((p->dist - q->v) * wq + (r->dist - r->v) * wr) / (wr + wq); - if (p->v < 0 && !negallowed) - p->v = 0.0; - p->back->v = p->v; - } - temp = p; - p = q; - q = r; - r = temp; - ntemp = n; - n = nq; - nq = nr; - nr = ntemp; - } - } -} /* correctv */ - - -void alter(node *x, node *y) -{ - /* traverse updating these views */ - nudists(x, y); - if (!y->tip) { - alter(x, y->next->back); - alter(x, y->next->next->back); - } -} /* alter */ - - -void nuview(node *p) -{ - /* renew information about subtrees */ - long i=0; - node *q, *r, *pprime, *temp; - - q = p->next; - r = q->next; - for (i = 1; i <= 3; i++) { - temp = p; - pprime = p->back; - alter(p, pprime); - p = q; - q = r; - r = temp; - } -} /* nuview */ - - -void update(node *p) -{ - /* update branch lengths around a node */ - - if (p->tip) - return; - makedists(p); - if (p->iter || p->next->iter || p->next->next->iter) { - makebigv(p); - correctv(p); - } - nuview(p); -} /* update */ - - -void smooth(node *p) -{ - /* go through tree getting new branch lengths and views */ - if (p->tip) - return; - update(p); - smooth(p->next->back); - smooth(p->next->next->back); -} /* smooth */ - - -void filltraverse(node *pb, node *qb, boolean contin) -{ - if (qb->tip) - return; - if (contin) { - filltraverse(pb, qb->next->back,contin); - filltraverse(pb, qb->next->next->back,contin); - nudists(qb, pb); - return; - } - if (!qb->next->back->tip) - nudists(qb->next->back, pb); - if (!qb->next->next->back->tip) - nudists(qb->next->next->back, pb); -} /* filltraverse */ - - -void fillin(node *pa, node *qa, boolean contin) -{ - if (!pa->tip) { - fillin(pa->next->back, qa, contin); - fillin(pa->next->next->back, qa, contin); - } - filltraverse(pa, qa, contin); -} /* fillin */ - - -void insert_(node *p, node *q, boolean contin_) -{ - /* put p and q together and iterate info. on resulting tree */ - double x=0.0, oldlike; - hookup(p->next->next, q->back); - hookup(p->next, q); - x = q->v / 2.0; - p->v = 0.0; - p->back->v = 0.0; - p->next->v = x; - p->next->back->v = x; - p->next->next->back->v = x; - p->next->next->v = x; - fillin(p->back, p, contin_); - evaluate(&curtree); - do { - oldlike = curtree.likelihood; - smooth(p); - smooth(p->back); - evaluate(&curtree); - } while (fabs(curtree.likelihood - oldlike) > delta); -} /* insert_ */ - - -void copynode(node *c, node *d) -{ - /* make a copy of a node */ - - memcpy(d->d, c->d, nonodes2*sizeof(double)); - memcpy(d->w, c->w, nonodes2*sizeof(double)); - d->v = c->v; - d->iter = c->iter; - d->dist = c->dist; - d->xcoord = c->xcoord; - d->ycoord = c->ycoord; - d->ymin = c->ymin; - d->ymax = c->ymax; -} /* copynode */ - - -void copy_(tree *a, tree *b) -{ - /* make copy of a tree a to tree b */ - long i, j=0; - node *p, *q; - - for (i = 0; i < spp; i++) { - copynode(a->nodep[i], b->nodep[i]); - if (a->nodep[i]->back) { - if (a->nodep[i]->back == a->nodep[a->nodep[i]->back->index - 1]) - b->nodep[i]->back = b->nodep[a->nodep[i]->back->index - 1]; - else if (a->nodep[i]->back - == a->nodep[a->nodep[i]->back->index - 1]->next) - b->nodep[i]->back = b->nodep[a->nodep[i]->back->index - 1]->next; - else - b->nodep[i]->back - = b->nodep[a->nodep[i]->back->index - 1]->next->next; - } - else b->nodep[i]->back = NULL; - } - for (i = spp; i < nonodes2; i++) { - p = a->nodep[i]; - q = b->nodep[i]; - for (j = 1; j <= 3; j++) { - copynode(p, q); - if (p->back) { - if (p->back == a->nodep[p->back->index - 1]) - q->back = b->nodep[p->back->index - 1]; - else if (p->back == a->nodep[p->back->index - 1]->next) - q->back = b->nodep[p->back->index - 1]->next; - else - q->back = b->nodep[p->back->index - 1]->next->next; - } - else - q->back = NULL; - p = p->next; - q = q->next; - } - } - b->likelihood = a->likelihood; - b->start = a->start; -} /* copy_ */ - - -void setuptipf(long m, tree *t) -{ - /* initialize branch lengths and views in a tip */ - long i=0; - intvector n=(long *)Malloc(spp * sizeof(long)); - node *WITH; - - WITH = t->nodep[m - 1]; - memcpy(WITH->d, x[m - 1], (nonodes2 * sizeof(double))); - memcpy(n, reps[m - 1], (spp * sizeof(long))); - for (i = 0; i < spp; i++) { - if (i + 1 != m && n[i] > 0) { - if (WITH->d[i] < epsilonf) - WITH->d[i] = epsilonf; - WITH->w[i] = n[i] / exp(power * log(WITH->d[i])); - } else { - WITH->w[i] = 0.0; - WITH->d[i] = 0.0; - } - } - for (i = spp; i < nonodes2; i++) { - WITH->w[i] = 1.0; - WITH->d[i] = 0.0; - } - WITH->index = m; - if (WITH->iter) WITH->v = 0.0; - free(n); -} /* setuptipf */ - - -void buildnewtip(long m, tree *t, long nextsp) -{ - /* initialize and hook up a new tip */ - node *p; - setuptipf(m, t); - p = t->nodep[nextsp + spp - 3]; - hookup(t->nodep[m - 1], p); -} /* buildnewtip */ - - -void buildsimpletree(tree *t, long nextsp) -{ - /* make and initialize a three-species tree */ - curtree.start=curtree.nodep[enterorder[0] - 1]; - setuptipf(enterorder[0], t); - setuptipf(enterorder[1], t); - hookup(t->nodep[enterorder[0] - 1], t->nodep[enterorder[1] - 1]); - buildnewtip(enterorder[2], t, nextsp); - insert_(t->nodep[enterorder[2] - 1]->back, t->nodep[enterorder[0] - 1], - false); -} /* buildsimpletree */ - - -void addtraverse(node *p, node *q, boolean contin, long *numtrees, - boolean *succeeded) -{ - /* traverse through a tree, finding best place to add p */ - insert_(p, q, true); - (*numtrees)++; - if (evaluate(&curtree) > (bestree.likelihood + - epsilonf * fabs(bestree.likelihood))){ - copy_(&curtree, &bestree); - addwhere = q; - (*succeeded)=true; - } - copy_(&priortree, &curtree); - if (!q->tip && contin) { - addtraverse(p, q->next->back, contin,numtrees,succeeded); - addtraverse(p, q->next->next->back, contin,numtrees,succeeded); - } -} /* addtraverse */ - - -void re_move(node **p, node **q) -{ - /* re_move p and record in q where it was */ - *q = (*p)->next->back; - hookup(*q, (*p)->next->next->back); - (*p)->next->back = NULL; - (*p)->next->next->back = NULL; - update(*q); - update((*q)->back); -} /* re_move */ - - -void globrearrange(long* numtrees,boolean* succeeded) -{ - /* does global rearrangements */ - tree globtree; - tree oldtree; - int i,j,k,num_sibs,num_sibs2; - node *where,*sib_ptr,*sib_ptr2; - double oldbestyet = curtree.likelihood; - int success = false; - - alloctree(&globtree.nodep,nonodes2); - alloctree(&oldtree.nodep,nonodes2); - setuptree(&globtree,nonodes2); - setuptree(&oldtree,nonodes2); - allocd(nonodes2, globtree.nodep); - allocd(nonodes2, oldtree.nodep); - allocw(nonodes2, globtree.nodep); - allocw(nonodes2, oldtree.nodep); - copy_(&curtree,&globtree); - copy_(&curtree,&oldtree); - for ( i = spp ; i < nonodes2 ; i++ ) { - num_sibs = count_sibs(curtree.nodep[i]); - sib_ptr = curtree.nodep[i]; - if ( (i - spp) % (( nonodes2 / 72 ) + 1 ) == 0 ) - putchar('.'); - fflush(stdout); - for ( j = 0 ; j <= num_sibs ; j++ ) { - re_move(&sib_ptr,&where); - copy_(&curtree,&priortree); - - if (where->tip) { - copy_(&oldtree,&curtree); - copy_(&oldtree,&bestree); - sib_ptr=sib_ptr->next; - continue; - } - else num_sibs2 = count_sibs(where); - sib_ptr2 = where; - for ( k = 0 ; k < num_sibs2 ; k++ ) { - addwhere = NULL; - addtraverse(sib_ptr,sib_ptr2->back,true,numtrees,succeeded); - if ( addwhere && where != addwhere && where->back != addwhere - && bestree.likelihood > globtree.likelihood) { - copy_(&bestree,&globtree); - success = true; - } - sib_ptr2 = sib_ptr2->next; - } - copy_(&oldtree,&curtree); - copy_(&oldtree,&bestree); - sib_ptr = sib_ptr->next; - } - } - copy_(&globtree,&curtree); - copy_(&globtree,&bestree); - if (success && globtree.likelihood > oldbestyet) { - *succeeded = true; - } - else { - *succeeded = false; - } - freed(nonodes2, globtree.nodep); - freed(nonodes2, oldtree.nodep); - freew(nonodes2, globtree.nodep); - freew(nonodes2, oldtree.nodep); - freetree(&globtree.nodep,nonodes2); - freetree(&oldtree.nodep,nonodes2); -} - - -void rearrange(node *p, long *numtrees, long *nextsp, boolean *succeeded) -{ - node *q, *r; - if (!p->tip && !p->back->tip) { - r = p->next->next; - re_move(&r, &q); - copy_(&curtree, &priortree); - addtraverse(r, q->next->back, false, numtrees,succeeded); - addtraverse(r, q->next->next->back, false, numtrees,succeeded); - copy_(&bestree, &curtree); - if (global && ((*nextsp) == spp)) { - putchar('.'); - fflush(stdout); - } - } - if (!p->tip) { - rearrange(p->next->back, numtrees,nextsp,succeeded); - rearrange(p->next->next->back, numtrees,nextsp,succeeded); - } -} /* rearrange */ - - -void describe(node *p) -{ - /* print out information for one branch */ - long i=0; - node *q; - - q = p->back; - fprintf(outfile, "%4ld ", q->index - spp); - if (p->tip) { - for (i = 0; i < nmlngth; i++) - putc(nayme[p->index - 1][i], outfile); - } else - fprintf(outfile, "%4ld ", p->index - spp); - fprintf(outfile, "%15.5f\n", q->v); - if (!p->tip) { - describe(p->next->back); - describe(p->next->next->back); - } -} /* describe */ - - -void summarize(long numtrees) -{ - /* print out branch lengths etc. */ - long i, j, totalnum; - - fprintf(outfile, "\nremember:"); - if (outgropt) - fprintf(outfile, " (although rooted by outgroup)"); - fprintf(outfile, " this is an unrooted tree!\n\n"); - if (!minev) - fprintf(outfile, "Sum of squares = %11.5f\n\n", -curtree.likelihood); - else - fprintf(outfile, "Sum of branch lengths = %11.5f\n\n", -curtree.likelihood); - if ((power == 2.0) && !minev) { - totalnum = 0; - for (i = 1; i <= nums; i++) { - for (j = 1; j <= nums; j++) { - if (i != j) - totalnum += reps[i - 1][j - 1]; - } - } - fprintf(outfile, "Average percent standard deviation = "); - fprintf(outfile, "%11.5f\n\n", - 100 * sqrt(-curtree.likelihood / (totalnum - 2))); - } - fprintf(outfile, "Between And Length\n"); - fprintf(outfile, "------- --- ------\n"); - describe(curtree.start->next->back); - describe(curtree.start->next->next->back); - describe(curtree.start->back); - fprintf(outfile, "\n\n"); - if (trout) { - col = 0; - treeout(curtree.start, &col, 0.43429445222, true, - curtree.start); - } -} /* summarize */ - - -void nodeinit(node *p) -{ - /* initialize a node */ - long i, j; - - for (i = 1; i <= 3; i++) { - for (j = 0; j < nonodes2; j++) { - p->w[j] = 1.0; - p->d[j] = 0.0; - } - p = p->next; - } - if ((!lengths) || p->iter) - p->v = 1.0; - if ((!lengths) || p->back->iter) - p->back->v = 1.0; -} /* nodeinit */ - - -void initrav(node *p) -{ - /* traverse to initialize */ - if (p->tip) - return; - nodeinit(p); - initrav(p->next->back); - initrav(p->next->next->back); -} /* initrav */ - -void treevaluate() -{ - /* evaluate user-defined tree, iterating branch lengths */ - long i; - double oldlike; - - for (i = 1; i <= spp; i++) - setuptipf(i, &curtree); - unroot(&curtree,nonodes2); - - initrav(curtree.start); - if (curtree.start->back != NULL) { - initrav(curtree.start->back); - evaluate(&curtree); - do { - oldlike = curtree.likelihood; - smooth(curtree.start); - evaluate(&curtree); - } while (fabs(curtree.likelihood - oldlike) > delta); - } - evaluate(&curtree); -} /* treevaluate */ - - -void maketree() -{ - /* contruct the tree */ - long nextsp,numtrees; - boolean succeeded=false; - long i, j, which; - - if (usertree) { - inputdata(replicates, printdata, lower, upper, x, reps); - setuptree(&curtree, nonodes2); - for (which = 1; which <= spp; which++) - setuptipf(which, &curtree); - if (eoln(infile)) - scan_eoln(infile); - openfile(&intree,INTREE,"input tree file","r",progname,intreename); - numtrees = countsemic(&intree); - if (numtrees > MAXNUMTREES) { - printf("\nERROR: number of input trees is read incorrectly from %s\n", - intreename); - exxit(-1); - } - if (treeprint) { - fprintf(outfile, "User-defined tree"); - if (numtrees > 1) - putc('s', outfile); - fprintf(outfile, ":\n\n"); - } - first = true; - which = 1; - while (which <= numtrees) { - treeread2 (intree, &curtree.start, curtree.nodep, - lengths, &trweight, &goteof, &haslengths, &spp,false,nonodes2); - nums = spp; - curtree.start = curtree.nodep[outgrno - 1]->back; - treevaluate(); - printree(curtree.start, treeprint, false, false); - summarize(numtrees); - clear_connections(&curtree,nonodes2); - which++; - } - FClose(intree); - } else { - if (jumb == 1) { - inputdata(replicates, printdata, lower, upper, x, reps); - setuptree(&curtree, nonodes2); - setuptree(&priortree, nonodes2); - setuptree(&bestree, nonodes2); - if (njumble > 1) setuptree(&bestree2, nonodes2); - } - for (i = 1; i <= spp; i++) - enterorder[i - 1] = i; - if (jumble) - randumize(seed, enterorder); - nextsp = 3; - buildsimpletree(&curtree, nextsp); - curtree.start = curtree.nodep[enterorder[0] - 1]->back; - if (jumb == 1) numtrees = 1; - nextsp = 4; - if (progress) { - printf("Adding species:\n"); - writename(0, 3, enterorder); -#ifdef WIN32 - phyFillScreenColor(); -#endif - } - while (nextsp <= spp) { - nums = nextsp; - buildnewtip(enterorder[nextsp - 1], &curtree, nextsp); - copy_(&curtree, &priortree); - bestree.likelihood = -99999.0; - curtree.start = curtree.nodep[enterorder[0] - 1]->back; - addtraverse(curtree.nodep[enterorder[nextsp - 1] - 1]->back, - curtree.start, true, &numtrees,&succeeded); - copy_(&bestree, &curtree); - if (progress) { - writename(nextsp - 1, 1, enterorder); -#ifdef WIN32 - phyFillScreenColor(); -#endif - } - if (global && nextsp == spp) { - if (progress) { - printf("Doing global rearrangements\n"); - printf(" !"); - for (j = spp; j < nonodes2; j++) - if ( (j - spp) % (( nonodes2 / 72 ) + 1 ) == 0 ) - putchar('-'); - printf("!\n"); - printf(" "); - } - } - succeeded = true; - while (succeeded) { - succeeded = false; - curtree.start = curtree.nodep[enterorder[0] - 1]->back; - if (nextsp == spp && global) - globrearrange (&numtrees,&succeeded); - else{ - rearrange(curtree.start,&numtrees,&nextsp,&succeeded); - } - if (global && ((nextsp) == spp) && progress) - printf("\n "); - } - if (global && nextsp == spp) { - putc('\n', outfile); - if (progress) - putchar('\n'); - } - if (njumble > 1) { - if (jumb == 1 && nextsp == spp) - copy_(&bestree, &bestree2); - else if (nextsp == spp) { - if (bestree2.likelihood < bestree.likelihood) - copy_(&bestree, &bestree2); - } - } - if (nextsp == spp && jumb == njumble) { - if (njumble > 1) copy_(&bestree2, &curtree); - curtree.start = curtree.nodep[outgrno - 1]->back; - printree(curtree.start, treeprint, true, false); - summarize(numtrees); - } - nextsp++; - } - } - if (jumb == njumble && progress) { - printf("\nOutput written to file \"%s\"\n\n", outfilename); - if (trout) { - printf("Tree also written onto file \"%s\"\n", outtreename); - putchar('\n'); - } - } -} /* maketree */ - - -int main(int argc, Char *argv[]) -{ - int i; -#ifdef MAC - argc = 1; /* macsetup("Fitch",""); */ - argv[0]="Fitch"; -#endif - init(argc,argv); - progname = argv[0]; - openfile(&infile,INFILE,"input file","r",argv[0],infilename); - openfile(&outfile,OUTFILE,"output file","w",argv[0],outfilename); - - ibmpc = IBMCRT; - ansi = ANSICRT; - mulsets = false; - datasets = 1; - firstset = true; - doinit(); - if (trout) - openfile(&outtree,OUTTREE,"output tree file","w",argv[0],outtreename); - for (i=0;i 1) { - fprintf(outfile, "Data set # %ld:\n\n",ith); - if (progress) - printf("\nData set # %ld:\n\n",ith); - } - fitch_getinput(); - for (jumb = 1; jumb <= njumble; jumb++) - maketree(); - firstset = false; - if (eoln(infile) && (ith < datasets)) - scan_eoln(infile); - } - if (trout) - FClose(outtree); - FClose(outfile); - FClose(infile); -#ifdef MAC - fixmacfile(outfilename); - fixmacfile(outtreename); -#endif - printf("Done.\n\n"); -#ifdef WIN32 - phyRestoreConsoleAttributes(); -#endif - return 0; -} diff --git a/forester/archive/RIO/others/phylip_mod/src/neighbor.c b/forester/archive/RIO/others/phylip_mod/src/neighbor.c deleted file mode 100644 index 62150ce..0000000 --- a/forester/archive/RIO/others/phylip_mod/src/neighbor.c +++ /dev/null @@ -1,602 +0,0 @@ - -#include "phylip.h" -#include "dist.h" - -/* version 3.6. (c) Copyright 1993-2004 by the University of Washington. - Written by Mary Kuhner, Jon Yamato, Joseph Felsenstein, Akiko Fuseki, - Sean Lamont, and Andrew Keeffe. - Permission is granted to copy and use this program provided no fee is - charged for it and provided that this copyright notice is not removed. */ - - -#ifndef OLDC -/* function prototypes */ -void getoptions(void); -void allocrest(void); -void doinit(void); -void inputoptions(void); -void getinput(void); -void describe(node *, double); -void summarize(void); -void nodelabel(boolean); -void jointree(void); -void maketree(void); -void freerest(void); -/* function prototypes */ -#endif - - -Char infilename[FNMLNGTH], outfilename[FNMLNGTH], outtreename[FNMLNGTH]; -long nonodes2, outgrno, col, datasets, ith; -long inseed; -vector *x; -intvector *reps; -boolean jumble, lower, upper, outgropt, replicates, trout, - printdata, progress, treeprint, mulsets, njoin; -tree curtree; -longer seed; -long *enterorder; -Char progname[20]; - -/* variables for maketree, propagated globally for C version: */ -node **cluster; - - -void getoptions() -{ - /* interactively set options */ - long inseed0 = 0, loopcount; - Char ch; - - fprintf(outfile, "\nNeighbor-Joining/UPGMA method version %s\n\n",VERSION); - putchar('\n'); - jumble = false; - lower = false; - outgrno = 1; - outgropt = false; - replicates = false; - trout = true; - upper = false; - printdata = false; - progress = true; - treeprint = true; - njoin = true; - loopcount = 0; - for(;;) { - cleerhome(); - printf("\nNeighbor-Joining/UPGMA method version %s\n\n",VERSION); - printf("Settings for this run:\n"); - printf(" N Neighbor-joining or UPGMA tree? %s\n", - (njoin ? "Neighbor-joining" : "UPGMA")); - if (njoin) { - printf(" O Outgroup root?"); - if (outgropt) - printf(" Yes, at species number%3ld\n", outgrno); - else - printf(" No, use as outgroup species%3ld\n", outgrno); - } - printf(" L Lower-triangular data matrix? %s\n", - (lower ? "Yes" : "No")); - printf(" R Upper-triangular data matrix? %s\n", - (upper ? "Yes" : "No")); - printf(" S Subreplicates? %s\n", - (replicates ? "Yes" : "No")); - printf(" J Randomize input order of species?"); - if (jumble) - printf(" Yes (random number seed =%8ld)\n", inseed0); - else - printf(" No. Use input order\n"); - printf(" M Analyze multiple data sets?"); - if (mulsets) - printf(" Yes, %2ld sets\n", datasets); - else - printf(" No\n"); - printf(" 0 Terminal type (IBM PC, ANSI, none)? %s\n", - (ibmpc ? "IBM PC" : ansi ? "ANSI" : "(none)")); - printf(" 1 Print out the data at start of run %s\n", - (printdata ? "Yes" : "No")); - printf(" 2 Print indications of progress of run %s\n", - (progress ? "Yes" : "No")); - printf(" 3 Print out tree %s\n", - (treeprint ? "Yes" : "No")); - printf(" 4 Write out trees onto tree file? %s\n", - (trout ? "Yes" : "No")); - printf("\n\n Y to accept these or type the letter for one to change\n"); -#ifdef WIN32 - phyFillScreenColor(); -#endif - scanf("%c%*[^\n]", &ch); - getchar(); - if (ch == '\n') - ch = ' '; - uppercase(&ch); - if (ch == 'Y') - break; - if (strchr("NJOULRSM01234",ch) != NULL){ - switch (ch) { - - case 'J': - jumble = !jumble; - if (jumble) - initseed(&inseed, &inseed0, seed); - break; - - case 'L': - lower = !lower; - break; - - case 'O': - outgropt = !outgropt; - if (outgropt) - initoutgroup(&outgrno, spp); - else - outgrno = 1; - break; - - case 'R': - upper = !upper; - break; - - case 'S': - replicates = !replicates; - break; - - case 'N': - njoin = !njoin; - break; - - case 'M': - mulsets = !mulsets; - if (mulsets) - initdatasets(&datasets); - jumble = true; - if (jumble) - initseed(&inseed, &inseed0, seed); - break; - - case '0': - initterminal(&ibmpc, &ansi); - break; - - case '1': - printdata = !printdata; - break; - - case '2': - progress = !progress; - break; - - case '3': - treeprint = !treeprint; - break; - - case '4': - trout = !trout; - break; - } - } else - printf("Not a possible option!\n"); - countup(&loopcount, 100); - } -} /* getoptions */ - - -void allocrest() -{ - long i; - - x = (vector *)Malloc(spp*sizeof(vector)); - for (i = 0; i < spp; i++) - x[i] = (vector)Malloc(spp*sizeof(double)); - reps = (intvector *)Malloc(spp*sizeof(intvector)); - for (i = 0; i < spp; i++) - reps[i] = (intvector)Malloc(spp*sizeof(long)); - nayme = (naym *)Malloc(spp*sizeof(naym)); - enterorder = (long *)Malloc(spp*sizeof(long)); - cluster = (node **)Malloc(spp*sizeof(node *)); -} /* allocrest */ - - -void freerest() -{ - long i; - - for (i = 0; i < spp; i++) - free(x[i]); - free(x); - for (i = 0; i < spp; i++) - free(reps[i]); - free(reps); - free(nayme); - free(enterorder); - free(cluster); -} /* freerest */ - - -void doinit() -{ - /* initializes variables */ - node *p; - - inputnumbers2(&spp, &nonodes2, 2); - nonodes2 += (njoin ? 0 : 1); - getoptions(); - alloctree(&curtree.nodep, nonodes2+1); - p = curtree.nodep[nonodes2]->next->next; - curtree.nodep[nonodes2]->next = curtree.nodep[nonodes2]; - free(p); - allocrest(); - -} /* doinit */ - - -void inputoptions() -{ - /* read options information */ - - if (ith != 1) - samenumsp2(ith); - putc('\n', outfile); - if (njoin) - fprintf(outfile, " Neighbor-joining method\n"); - else - fprintf(outfile, " UPGMA method\n"); - fprintf(outfile, "\n Negative branch lengths allowed\n\n"); -} /* inputoptions */ - - -void describe(node *p, double height) -{ - /* print out information for one branch */ - long i; - node *q; - - q = p->back; - if (njoin) - fprintf(outfile, "%4ld ", q->index - spp); - else - fprintf(outfile, "%4ld ", q->index - spp); - if (p->tip) { - for (i = 0; i < nmlngth; i++) - putc(nayme[p->index - 1][i], outfile); - putc(' ', outfile); - } else { - if (njoin) - fprintf(outfile, "%4ld ", p->index - spp); - else { - fprintf(outfile, "%4ld ", p->index - spp); - } - } - if (njoin) - fprintf(outfile, "%12.5f\n", q->v); - else - fprintf(outfile, "%10.5f %10.5f\n", q->v, q->v+height); - if (!p->tip) { - describe(p->next->back, height+q->v); - describe(p->next->next->back, height+q->v); - } -} /* describe */ - - -void summarize() -{ - /* print out branch lengths etc. */ - putc('\n', outfile); - if (njoin) { - fprintf(outfile, "remember:"); - if (outgropt) - fprintf(outfile, " (although rooted by outgroup)"); - fprintf(outfile, " this is an unrooted tree!\n"); - } - if (njoin) { - fprintf(outfile, "\nBetween And Length\n"); - fprintf(outfile, "------- --- ------\n"); - } else { - fprintf(outfile, "From To Length Height\n"); - fprintf(outfile, "---- -- ------ ------\n"); - } - describe(curtree.start->next->back, 0.0); - describe(curtree.start->next->next->back, 0.0); - if (njoin) - describe(curtree.start->back, 0.0); - fprintf(outfile, "\n\n"); -} /* summarize */ - - -void nodelabel(boolean isnode) -{ - if (isnode) - printf("node"); - else - printf("species"); -} /* nodelabel */ - - -void jointree() -{ - /* calculate the tree */ - long nc, nextnode, mini=0, minj=0, i, j, ia, ja, ii, jj, nude, iter; - double fotu2, total, tmin, dio, djo, bi, bj, bk, dmin=0, da; - long el[3]; - vector av; - intvector oc; - - double *R; /* added in revisions by Y. Ina */ - R = (double *)Malloc(spp * sizeof(double)); - - for (i = 0; i <= spp - 2; i++) { - for (j = i + 1; j < spp; j++) { - da = (x[i][j] + x[j][i]) / 2.0; - x[i][j] = da; - x[j][i] = da; - } - } - /* First initialization */ - fotu2 = spp - 2.0; - nextnode = spp + 1; - av = (vector)Malloc(spp*sizeof(double)); - oc = (intvector)Malloc(spp*sizeof(long)); - for (i = 0; i < spp; i++) { - av[i] = 0.0; - oc[i] = 1; - } - /* Enter the main cycle */ - if (njoin) - iter = spp - 3; - else - iter = spp - 1; - for (nc = 1; nc <= iter; nc++) { - for (j = 2; j <= spp; j++) { - for (i = 0; i <= j - 2; i++) - x[j - 1][i] = x[i][j - 1]; - } - tmin = 99999.0; - /* Compute sij and minimize */ - if (njoin) { /* many revisions by Y. Ina from here ... */ - for (i = 0; i < spp; i++) - R[i] = 0.0; - for (ja = 2; ja <= spp; ja++) { - jj = enterorder[ja - 1]; - if (cluster[jj - 1] != NULL) { - for (ia = 0; ia <= ja - 2; ia++) { - ii = enterorder[ia]; - if (cluster[ii - 1] != NULL) { - R[ii - 1] += x[ii - 1][jj - 1]; - R[jj - 1] += x[ii - 1][jj - 1]; - } - } - } - } - } /* ... to here */ - for (ja = 2; ja <= spp; ja++) { - jj = enterorder[ja - 1]; - if (cluster[jj - 1] != NULL) { - for (ia = 0; ia <= ja - 2; ia++) { - ii = enterorder[ia]; - if (cluster[ii - 1] != NULL) { - if (njoin) { - total = fotu2 * x[ii - 1][jj - 1] - R[ii - 1] - R[jj - 1]; - /* this statement part of revisions by Y. Ina */ - } else - total = x[ii - 1][jj - 1]; - if (total < tmin) { - tmin = total; - mini = ii; - minj = jj; - } - } - } - } - } - /* compute lengths and print */ - if (njoin) { - dio = 0.0; - djo = 0.0; - for (i = 0; i < spp; i++) { - dio += x[i][mini - 1]; - djo += x[i][minj - 1]; - } - dmin = x[mini - 1][minj - 1]; - dio = (dio - dmin) / fotu2; - djo = (djo - dmin) / fotu2; - bi = (dmin + dio - djo) * 0.5; - bj = dmin - bi; - bi -= av[mini - 1]; - bj -= av[minj - 1]; - } else { - bi = x[mini - 1][minj - 1] / 2.0 - av[mini - 1]; - bj = x[mini - 1][minj - 1] / 2.0 - av[minj - 1]; - av[mini - 1] += bi; - } - if (progress) { - printf("Cycle %3ld: ", iter - nc + 1); - if (njoin) - nodelabel((boolean)(av[mini - 1] > 0.0)); - else - nodelabel((boolean)(oc[mini - 1] > 1.0)); - printf(" %ld (%10.5f) joins ", mini, bi); - if (njoin) - nodelabel((boolean)(av[minj - 1] > 0.0)); - else - nodelabel((boolean)(oc[minj - 1] > 1.0)); - printf(" %ld (%10.5f)\n", minj, bj); -#ifdef WIN32 - phyFillScreenColor(); -#endif - } - hookup(curtree.nodep[nextnode - 1]->next, cluster[mini - 1]); - hookup(curtree.nodep[nextnode - 1]->next->next, cluster[minj - 1]); - cluster[mini - 1]->v = bi; - cluster[minj - 1]->v = bj; - cluster[mini - 1]->back->v = bi; - cluster[minj - 1]->back->v = bj; - cluster[mini - 1] = curtree.nodep[nextnode - 1]; - cluster[minj - 1] = NULL; - nextnode++; - if (njoin) - av[mini - 1] = dmin * 0.5; - /* re-initialization */ - fotu2 -= 1.0; - for (j = 0; j < spp; j++) { - if (cluster[j] != NULL) { - if (njoin) { - da = (x[mini - 1][j] + x[minj - 1][j]) * 0.5; - if (mini - j - 1 < 0) - x[mini - 1][j] = da; - if (mini - j - 1 > 0) - x[j][mini - 1] = da; - } else { - da = x[mini - 1][j] * oc[mini - 1] + x[minj - 1][j] * oc[minj - 1]; - da /= oc[mini - 1] + oc[minj - 1]; - x[mini - 1][j] = da; - x[j][mini - 1] = da; - } - } - } - for (j = 0; j < spp; j++) { - x[minj - 1][j] = 0.0; - x[j][minj - 1] = 0.0; - } - oc[mini - 1] += oc[minj - 1]; - } - /* the last cycle */ - nude = 1; - for (i = 1; i <= spp; i++) { - if (cluster[i - 1] != NULL) { - el[nude - 1] = i; - nude++; - } - } - if (!njoin) { - curtree.start = cluster[el[0] - 1]; - curtree.start->back = NULL; - free(av); - free(oc); - return; - } - bi = (x[el[0] - 1][el[1] - 1] + x[el[0] - 1][el[2] - 1] - x[el[1] - 1] - [el[2] - 1]) * 0.5; - bj = x[el[0] - 1][el[1] - 1] - bi; - bk = x[el[0] - 1][el[2] - 1] - bi; - bi -= av[el[0] - 1]; - bj -= av[el[1] - 1]; - bk -= av[el[2] - 1]; - if (progress) { - printf("last cycle:\n"); - putchar(' '); - nodelabel((boolean)(av[el[0] - 1] > 0.0)); - printf(" %ld (%10.5f) joins ", el[0], bi); - nodelabel((boolean)(av[el[1] - 1] > 0.0)); - printf(" %ld (%10.5f) joins ", el[1], bj); - nodelabel((boolean)(av[el[2] - 1] > 0.0)); - printf(" %ld (%10.5f)\n", el[2], bk); -#ifdef WIN32 - phyFillScreenColor(); -#endif - } - hookup(curtree.nodep[nextnode - 1], cluster[el[0] - 1]); - hookup(curtree.nodep[nextnode - 1]->next, cluster[el[1] - 1]); - hookup(curtree.nodep[nextnode - 1]->next->next, cluster[el[2] - 1]); - cluster[el[0] - 1]->v = bi; - cluster[el[1] - 1]->v = bj; - cluster[el[2] - 1]->v = bk; - cluster[el[0] - 1]->back->v = bi; - cluster[el[1] - 1]->back->v = bj; - cluster[el[2] - 1]->back->v = bk; - curtree.start = cluster[el[0] - 1]->back; - free(av); - free(oc); - free(R); -} /* jointree */ - - -void maketree() -{ - /* construct the tree */ - long i ; - - inputdata(replicates, printdata, lower, upper, x, reps); - if (njoin && (spp < 3)) { - printf("\nERROR: Neighbor-Joining runs must have at least 3 species\n\n"); - exxit(-1); - } - if (progress) - putchar('\n'); - if (ith == 1) - setuptree(&curtree, nonodes2 + 1); - for (i = 1; i <= spp; i++) - enterorder[i - 1] = i; - if (jumble) - randumize(seed, enterorder); - for (i = 0; i < spp; i++) - cluster[i] = curtree.nodep[i]; - jointree(); - if (njoin) - curtree.start = curtree.nodep[outgrno - 1]->back; - printree(curtree.start, treeprint, njoin, (boolean)(!njoin)); - if (treeprint) - summarize(); - if (trout) { - col = 0; - if (njoin) - treeout(curtree.start, &col, 0.43429448222, njoin, curtree.start); - else - curtree.root = curtree.start, - treeoutr(curtree.start,&col,&curtree); - } - if (progress) { - printf("\nOutput written on file \"%s\"\n\n", outfilename); - if (trout) - printf("Tree written on file \"%s\"\n\n", outtreename); - } -} /* maketree */ - - -int main(int argc, Char *argv[]) -{ /* main program */ -#ifdef MAC - argc = 1; /* macsetup("Neighbor",""); */ - argv[0] = "Neighbor"; -#endif - init(argc, argv); - openfile(&infile,INFILE,"input file", "r",argv[0],infilename); - openfile(&outfile,OUTFILE,"output file", "w",argv[0],outfilename); - ibmpc = IBMCRT; - ansi = ANSICRT; - mulsets = false; - datasets = 1; - doinit(); - if (trout) - openfile(&outtree,OUTTREE,"output tree file", "w",argv[0],outtreename); - ith = 1; - while (ith <= datasets) { - if (datasets > 1) { - fprintf(outfile, "Data set # %ld:\n",ith); - if (progress) - printf("Data set # %ld:\n",ith); - } - inputoptions(); - maketree(); - if (eoln(infile) && (ith < datasets)) - scan_eoln(infile); - ith++; - } - FClose(infile); - FClose(outfile); - FClose(outtree); - freerest(); -#ifdef MAC - fixmacfile(outfilename); - fixmacfile(outtreename); -#endif - printf("Done.\n\n"); -#ifdef WIN32 - phyRestoreConsoleAttributes(); -#endif - return 0; -} - - - - - diff --git a/forester/archive/RIO/others/phylip_mod/src/phylip.c b/forester/archive/RIO/others/phylip_mod/src/phylip.c deleted file mode 100644 index 2c87e74..0000000 --- a/forester/archive/RIO/others/phylip_mod/src/phylip.c +++ /dev/null @@ -1,2750 +0,0 @@ - -/* version 3.6. (c) Copyright 1993-2004 by the University of Washington. - Written by Joseph Felsenstein, Akiko Fuseki, Sean Lamont, Andrew Keeffe, - and Dan Fineman. - Permission is granted to copy and use this program provided no fee is - charged for it and provided that this copyright notice is not removed. */ - -#ifdef OSX_CARBON -#include -#endif - -#include -#include -#ifdef WIN32 -#include -/* for console code (clear screen, text color settings) */ -CONSOLE_SCREEN_BUFFER_INFO savecsbi; -HANDLE hConsoleOutput; - -void phyClearScreen(); -void phySaveConsoleAttributes(); -void phySetConsoleAttributes(); -void phyRestoreConsoleAttributes(); -void phyFillScreenColor(); -#endif - -#include "phylip.h" - -#ifndef OLDC -static void crash_handler(int signum); - -#endif -#if defined(OSX_CARBON) && defined(__MWERKS__) -boolean fixedpath = false; -#endif -FILE *infile, *outfile, *intree, *intree2, *outtree, *weightfile, *catfile, *ancfile, *mixfile, *factfile; -long spp, words, bits; -boolean ibmpc, ansi, tranvsp; -naym *nayme; /* names of species */ - -static void crash_handler(int sig_num) -{ /* when we crash, lets print out something usefull */ - printf("ERROR: "); - switch(sig_num) { -#ifdef SIGSEGV - case SIGSEGV: - puts("This program has caused a Segmentation fault."); - break; -#endif /* SIGSEGV */ -#ifdef SIGFPE - case SIGFPE: - puts("This program has caused a Floating Point Exception"); - break; -#endif /* SIGFPE */ -#ifdef SIGILL - case SIGILL: - puts("This program has attempted an illegal instruction"); - break; -#endif /* SIGILL */ -#ifdef SIGPIPE - case SIGPIPE: - puts("This program tried to write to a broken pipe"); - break; -#endif /* SIGPIPE */ -#ifdef SIGBUS - case SIGBUS: - puts("This program had a bus error"); - break; -#endif /* SIGBUS */ - } - if (sig_num == SIGSEGV) { - puts( - " This may have been caused by an incorrectly formatted input file"); - puts( - " or input tree file. You should check those files carefully."); - puts(" If this seems to be a bug, please mail joe@gs.washington.edu"); - } - else { - puts(" Most likely, you have encountered a bug in the program."); - puts(" Since this seems to be a bug, please mail joe@gs.washington.edu"); - } - puts(" with the name of the program, your computer system type,"); - puts(" a full description of the problem, and with the input data file."); - puts(" (which should be in the body of the message, not as an Attachment)."); - -#ifdef WIN32 - puts ("Press Enter or Return to close program."); - puts(" You may have to press Enter or Return twice."); - getchar (); - getchar (); - phyRestoreConsoleAttributes(); -#endif - abort(); -} - - -void init(int argc, char** argv) -{ /* initialization routine for all programs - * anything done at the beginig for every program should be done here */ - - /* set up signal handler for - * segfault,floating point exception, illeagal instruction, bad pipe, bus error - * there are more signals that can cause a crash, but these are the most common - * even these aren't found on all machines. */ -#ifdef SIGSEGV - signal(SIGSEGV, crash_handler); -#endif /* SIGSEGV */ -#ifdef SIGFPE - signal(SIGFPE, crash_handler); -#endif /* SIGFPE */ -#ifdef SIGILL - signal(SIGILL, crash_handler); -#endif /* SIGILL */ -#ifdef SIGPIPE - signal(SIGPIPE, crash_handler); -#endif /* SIGPIPE */ -#ifdef SIGBUS - signal(SIGBUS, crash_handler); -#endif /* SIGBUS */ - -#ifdef WIN32 - phySetConsoleAttributes(); - phyClearScreen(); -#endif - -} - -void scan_eoln(FILE *f) -{ /* eat everything to the end of line or eof*/ - char ch; - - while (!eoff(f) && !eoln(f)) - gettc(f); - if (!eoff(f)) - ch = gettc(f); -} - - -boolean eoff(FILE *f) -{ /* check for end of file */ - int ch; - - if (feof(f)) - return true; - ch = getc(f); - if (ch == EOF) { - ungetc(ch, f); - return true; - } - ungetc(ch, f); - return false; -} /*eoff*/ - - -boolean eoln(FILE *f) -{ /* check for end of line or eof*/ - register int ch; - - ch = getc(f); - if (ch == EOF) - return true; - ungetc(ch, f); - return ((ch == '\n') || (ch == '\r')); -} /*eoln*/ - - -int filexists(char *filename) -{ /* check whether file already exists */ - FILE *fp; - fp =fopen(filename,"rb"); - if (fp) { - fclose(fp); - return 1; - } else - return 0; -} /*filexists*/ - - -const char* get_command_name (const char *vektor) -{ /* returns the name of the program from vektor without the whole path */ - char *last_slash; - - /* Point to the last slash... */ - last_slash = strrchr (vektor, DELIMITER); - - if (last_slash) - /* If there was a last slash, return the character after it */ - return last_slash + 1; - else - /* If not, return the vector */ - return vektor; - -} /*get_command_name*/ - - -void getstryng(char *fname) -{ /* read in a file name from stdin and take off newline if any */ - - fname = fgets(fname, 100, stdin); - if (strchr(fname, '\n') != NULL) - *strchr(fname, '\n') = '\0'; -} /* getstryng */ - - -void countup(long *loopcount, long maxcount) -{ /* count how many times this loop has tried to read data, bail out - if exceeds maxcount */ - - (*loopcount)++; - if ((*loopcount) >= maxcount) { - printf("\nERROR: Made %ld attempts to read input in loop. Aborting run.\n", - *loopcount); - exxit(-1); - } -} /* countup */ - - -void openfile(FILE **fp,const char *filename,const char *filedesc, - const char *mode,const char *application, char *perm) -{ /* open a file, testing whether it exists etc. */ - FILE *of; - char file[FNMLNGTH]; - char filemode[3]; - char input[FNMLNGTH]; - char ch; - const char *progname_without_path; - long loopcount, loopcount2; -#if defined(OSX_CARBON) && defined(__MWERKS__) - ProcessSerialNumber myProcess; - FSRef bundleLocation; - unsigned char bundlePath[FNMLNGTH]; - - if(!fixedpath){ - /* change path to the bundle location instead of root directory */ - GetCurrentProcess(&myProcess); - GetProcessBundleLocation(&myProcess, &bundleLocation); - FSRefMakePath(&bundleLocation, bundlePath, FNMLNGTH); - chdir((const char*)bundlePath); - chdir(".."); /* get out of the .app directory */ - - fixedpath = true; - } -#endif - - progname_without_path = get_command_name(application); - - strcpy(file,filename); - strcpy(filemode,mode); - strcat(filemode,"b"); - loopcount = 0; - while (1){ - if (filemode[0] == 'w' && filexists(file)){ - printf("\n%s: the file \"%s\" that you wanted to\n", - progname_without_path, file); - printf(" use as %s already exists.\n", filedesc); - printf(" Do you want to Replace it, Append to it,\n"); - printf(" write to a new File, or Quit?\n"); - loopcount2 = 0; - do { - printf(" (please type R, A, F, or Q) \n"); -#ifdef WIN32 - phyFillScreenColor(); -#endif - fgets(input, sizeof(input), stdin); - ch = input[0]; - uppercase(&ch); - countup(&loopcount2, 10); - } while (ch != 'A' && ch != 'R' && ch != 'F' && ch != 'Q'); - if (ch == 'Q') - exxit(-1); - if (ch == 'A') { - strcpy(filemode,"ab"); - continue; - } - else if (ch == 'F') { - file[0] = '\0'; - loopcount2 = 0; - while (file[0] =='\0') { - printf("Please enter a new file name> "); - getstryng(file); - countup(&loopcount2, 10); - } - strcpy(filemode,"wb"); - continue; - } - } - of = fopen(file,filemode); - if (of) - break; - else { - switch (filemode[0]){ - - case 'r': - printf("%s: can't find %s \"%s\"\n", progname_without_path, - filedesc, file); - file[0] = '\0'; - loopcount2 = 0; - while (file[0] =='\0'){ - printf("Please enter a new file name> "); - countup(&loopcount2, 10); - getstryng(file);} - break; - - case 'w': - case 'a': - printf("%s: can't write %s \"%s\"\n", progname_without_path, - filedesc, file); - file[0] = '\0'; - loopcount2 = 0; - while (file[0] =='\0'){ - printf("Please enter a new file name> "); - countup(&loopcount2, 10); - getstryng(file);} - continue; - default: - printf("There is some error in the call of openfile. Unknown mode.\n"); - exxit(-1); - } - } - countup(&loopcount, 20); - } - *fp = of; - if (perm != NULL) - strcpy(perm,file); -} /* openfile */ - - -void cleerhome() -{ /* home cursor and clear screen, if possible */ -#ifdef WIN32 - if(ibmpc || ansi){ - phyClearScreen(); - } else { - printf("\n\n"); - } -#else - printf("%s", ((ibmpc || ansi) ? ("\033[2J\033[H") : "\n\n")); -#endif -} /* cleerhome */ - - -double randum(longer seed) -{ /* random number generator -- slow but machine independent - This is a multiplicative congruential 32-bit generator - x(t+1) = 1664525 * x(t) mod 2^32, one that passes the - Coveyou-Macpherson and Lehmer tests, see Knuth ACP vol. 2 - We here implement it representing each integer in base-64 - notation -- i.e. as an array of 6 six-bit chunks */ - long i, j, k, sum; - longer mult, newseed; - double x; - - mult[0] = 13; /* these four statements set the multiplier */ - mult[1] = 24; /* -- they are its "digits" in a base-64 */ - mult[2] = 22; /* notation: 1664525 = 13*64^3+24*64^2 */ - mult[3] = 6; /* +22*64+6 */ - for (i = 0; i <= 5; i++) - newseed[i] = 0; - for (i = 0; i <= 5; i++) { - sum = newseed[i]; - k = i; - if (i > 3) - k = 3; - for (j = 0; j <= k; j++) - sum += mult[j] * seed[i - j]; - newseed[i] = sum; - for (j = i; j <= 4; j++) { - newseed[j + 1] += newseed[j] / 64; - newseed[j] &= 63; - } - } - memcpy(seed, newseed, sizeof(longer)); - seed[5] &= 3; - x = 0.0; - for (i = 0; i <= 5; i++) - x = x / 64.0 + seed[i]; - x /= 4.0; - return x; -} /* randum */ - - -void randumize(longer seed, long *enterorder) -{ /* randomize input order of species */ - long i, j, k; - - for (i = 0; i < spp; i++) { - j = (long)(randum(seed) * (i+1)); - k = enterorder[j]; - enterorder[j] = enterorder[i]; - enterorder[i] = k; - } -} /* randumize */ - - -double normrand(longer seed) -{/* standardized Normal random variate */ - double x; - - x = randum(seed)+randum(seed)+randum(seed)+randum(seed) - + randum(seed)+randum(seed)+randum(seed)+randum(seed) - + randum(seed)+randum(seed)+randum(seed)+randum(seed)-6.0; - return(x); -} /* normrand */ - - -long readlong(const char *prompt) -{ /* read a long */ - long res, loopcount; - char string[100]; - - loopcount = 0; - do { - printf("%s",prompt); - getstryng(string); - if (sscanf(string,"%ld",&res) == 1) - break; - countup(&loopcount, 10); - } while (1); - return res; -} /* readlong */ - - -void uppercase(Char *ch) -{ /* convert ch to upper case */ - *ch = (islower (*ch) ? toupper(*ch) : (*ch)); -} /* uppercase */ - - -void initseed(long *inseed, long *inseed0, longer seed) -{ /* input random number seed */ - long i, loopcount; - - loopcount = 0; - do { - printf("Random number seed (must be odd)?\n"); - scanf("%ld%*[^\n]", inseed); - getchar(); - countup(&loopcount, 10); - } while (((*inseed) < 0) || ((*inseed) & 1) == 0); - *inseed0 = *inseed; - for (i = 0; i <= 5; i++) - seed[i] = 0; - i = 0; - do { - seed[i] = *inseed & 63; - *inseed /= 64; - i++; - } while (*inseed != 0); -} /*initseed*/ - - -void initjumble(long *inseed, long *inseed0, longer seed, long *njumble) -{ /* input number of jumblings for jumble option */ - long loopcount; - - initseed(inseed, inseed0, seed); - loopcount = 0; - do { - printf("Number of times to jumble?\n"); - scanf("%ld%*[^\n]", njumble); - getchar(); - countup(&loopcount, 10); - } while ((*njumble) < 1); -} /*initjumble*/ - - -void initoutgroup(long *outgrno, long spp) -{ /* input outgroup number */ - long loopcount; - boolean done; - - loopcount = 0; - do { - printf("Type number of the outgroup:\n"); - scanf("%ld%*[^\n]", outgrno); - getchar(); - done = (*outgrno >= 1 && *outgrno <= spp); - if (!done) { - printf("BAD OUTGROUP NUMBER: %ld\n", *outgrno); - printf(" Must be in range 1 - %ld\n", spp); - } - countup(&loopcount, 10); - } while (done != true); -} /*initoutgroup*/ - - -void initthreshold(double *threshold) -{ /* input threshold for threshold parsimony option */ - long loopcount; - boolean done; - - loopcount = 0; - do { - printf("What will be the threshold value?\n"); - scanf("%lf%*[^\n]", threshold); - getchar(); - done = (*threshold >= 1.0); - if (!done) - printf("BAD THRESHOLD VALUE: it must be greater than 1\n"); - else - *threshold = (long)(*threshold * 10.0 + 0.5) / 10.0; - countup(&loopcount, 10); - } while (done != true); -} /*initthreshold*/ - - -void initcatn(long *categs) -{ /* initialize category number for rate categories */ - long loopcount; - - loopcount = 0; - *categs = 0; - do { - printf("Number of categories (1-%d)?\n", maxcategs); - scanf("%ld%*[^\n]", categs); - getchar(); - countup(&loopcount, 10); - } while (*categs > maxcategs || *categs < 1); -} /*initcatn*/ - - -void initcategs(long categs, double *rate) -{ /* initialize category rates for HMM rates */ - long i, loopcount, scanned; - char line[100], rest[100]; - boolean done; - - loopcount = 0; - for (;;){ - printf("Rate for each category? (use a space to separate)\n"); - getstryng(line); - done = true; - for (i = 0; i < categs; i++){ - scanned = sscanf(line,"%lf %[^\n]", &rate[i],rest); - if ((scanned < 2 && i < (categs - 1)) || - (scanned < 1 && i == (categs - 1))){ - printf("Please enter exactly %ld values.\n",categs); - done = false; - break; - } - strcpy(line,rest); - } - if (done) - break; - countup(&loopcount, 100); - } -} /*initcategs*/ - - -void initprobcat(long categs, double *probsum, double *probcat) -{ /* input probabilities of rate categores for HMM rates */ - long i, loopcount, scanned; - boolean done; - char line[100], rest[100]; - - loopcount = 0; - do { - printf("Probability for each category?"); - printf(" (use a space to separate)\n"); - getstryng(line); - done = true; - for (i = 0; i < categs; i++){ - scanned = sscanf(line,"%lf %[^\n]",&probcat[i],rest); - if ((scanned < 2 && i < (categs - 1)) || - (scanned < 1 && i == (categs - 1))){ - done = false; - printf("Please enter exactly %ld values.\n",categs); - break;} - strcpy(line,rest); - } - if (!done) - continue; - *probsum = 0.0; - for (i = 0; i < categs; i++) - *probsum += probcat[i]; - if (fabs(1.0 - (*probsum)) > 0.001) { - done = false; - printf("Probabilities must add up to"); - printf(" 1.0, plus or minus 0.001.\n"); - } - countup(&loopcount, 100); - } while (!done); -} /*initprobcat*/ - - -void lgr(long m, double b, raterootarray lgroot) -{ /* For use by initgammacat. Get roots of m-th Generalized Laguerre - polynomial, given roots of (m-1)-th, these are to be - stored in lgroot[m][] */ - long i; - double upper, lower, x, y; - boolean dwn; /* is function declining in this interval? */ - - if (m == 1) { - lgroot[1][1] = 1.0+b; - } else { - dwn = true; - for (i=1; i<=m; i++) { - if (i < m) { - if (i == 1) - lower = 0.0; - else - lower = lgroot[m-1][i-1]; - upper = lgroot[m-1][i]; - } else { /* i == m, must search above */ - lower = lgroot[m-1][i-1]; - x = lgroot[m-1][m-1]; - do { - x = 2.0*x; - y = glaguerre(m, b,x); - } while ((dwn && (y > 0.0)) || ((!dwn) && (y < 0.0))); - upper = x; - } - while (upper-lower > 0.000000001) { - x = (upper+lower)/2.0; - if (glaguerre(m, b, x) > 0.0) { - if (dwn) - lower = x; - else - upper = x; - } else { - if (dwn) - upper = x; - else - lower = x; - } - } - lgroot[m][i] = (lower+upper)/2.0; - dwn = !dwn; /* switch for next one */ - } - } -} /* lgr */ - - -double logfac (long n) -{ /* log(n!) values were calculated with Mathematica - with a precision of 30 digits */ - long i; - double x; - - switch (n) - { - case 0: - return 0.; - case 1: - return 0.; - case 2: - return 0.693147180559945309417232121458; - case 3: - return 1.791759469228055000812477358381; - case 4: - return 3.1780538303479456196469416013; - case 5: - return 4.78749174278204599424770093452; - case 6: - return 6.5792512120101009950601782929; - case 7: - return 8.52516136106541430016553103635; - case 8: - return 10.60460290274525022841722740072; - case 9: - return 12.80182748008146961120771787457; - case 10: - return 15.10441257307551529522570932925; - case 11: - return 17.50230784587388583928765290722; - case 12: - return 19.98721449566188614951736238706; - default: - x = 19.98721449566188614951736238706; - for (i = 13; i <= n; i++) - x += log(i); - return x; - } -} - - -double glaguerre(long m, double b, double x) -{ /* Generalized Laguerre polynomial computed recursively. - For use by initgammacat */ - long i; - double gln, glnm1, glnp1; /* L_n, L_(n-1), L_(n+1) */ - - if (m == 0) - return 1.0; - else { - if (m == 1) - return 1.0 + b - x; - else { - gln = 1.0+b-x; - glnm1 = 1.0; - for (i=2; i <= m; i++) { - glnp1 = ((2*(i-1)+b+1.0-x)*gln - (i-1+b)*glnm1)/i; - glnm1 = gln; - gln = glnp1; - } - return gln; - } - } -} /* glaguerre */ - - -void initlaguerrecat(long categs, double alpha, double *rate, double *probcat) -{ /* calculate rates and probabilities to approximate Gamma distribution - of rates with "categs" categories and shape parameter "alpha" using - rates and weights from Generalized Laguerre quadrature */ - long i; - raterootarray lgroot; /* roots of GLaguerre polynomials */ - double f, x, xi, y; - - alpha = alpha - 1.0; - lgroot[1][1] = 1.0+alpha; - for (i = 2; i <= categs; i++) - lgr(i, alpha, lgroot); /* get roots for L^(a)_n */ - /* here get weights */ - /* Gamma weights are (1+a)(1+a/2) ... (1+a/n)*x_i/((n+1)^2 [L_{n+1}^a(x_i)]^2) */ - f = 1; - for (i = 1; i <= categs; i++) - f *= (1.0+alpha/i); - for (i = 1; i <= categs; i++) { - xi = lgroot[categs][i]; - y = glaguerre(categs+1, alpha, xi); - x = f*xi/((categs+1)*(categs+1)*y*y); - rate[i-1] = xi/(1.0+alpha); - probcat[i-1] = x; - } -} /* initlaguerrecat */ - - -double hermite(long n, double x) -{ /* calculates hermite polynomial with degree n and parameter x */ - /* seems to be unprecise for n>13 -> root finder does not converge*/ - double h1 = 1.; - double h2 = 2. * x; - double xx = 2. * x; - long i; - - for (i = 1; i < n; i++) { - xx = 2. * x * h2 - 2. * (i) * h1; - h1 = h2; - h2 = xx; - } - return xx; -} /* hermite */ - - -void root_hermite(long n, double *hroot) -{ /* find roots of Hermite polynmials */ - long z; - long ii; - long start; - - if (n % 2 == 0) { - start = n/2; - z = 1; - } else { - start = n/2 + 1; - z=2; - hroot[start-1] = 0.0; - } - for (ii = start; ii < n; ii++) { /* search only upwards*/ - hroot[ii] = halfroot(hermite,n,hroot[ii-1]+EPSILON, 1./n); - hroot[start - z] = -hroot[ii]; - z++; - } -} /* root_hermite */ - - -double halfroot(double (*func)(long m, double x), long n, double startx, - double delta) -{ /* searches from the bound (startx) only in one direction - (by positive or negative delta, which results in - other-bound=startx+delta) - delta should be small. - (*func) is a function with two arguments */ - double xl; - double xu; - double xm; - double fu; - double fl; - double fm = 100000.; - double gradient; - boolean dwn; - - /* decide if we search above or below startx and escapes to trace back - to the starting point that most often will be - the root from the previous calculation */ - if (delta < 0) { - xu = startx; - xl = xu + delta; - } else { - xl = startx; - xu = xl + delta; - } - delta = fabs(delta); - fu = (*func)(n, xu); - fl = (*func)(n, xl); - gradient = (fl-fu)/(xl-xu); - while(fabs(fm) > EPSILON) { /* is root outside of our bracket?*/ - if ((fu<0.0 && fl<0.0) || (fu>0.0 && fl > 0.0)) { - xu += delta; - fu = (*func)(n, xu); - fl = (*func)(n, xl); - gradient = (fl-fu)/(xl-xu); - dwn = (gradient < 0.0) ? true : false; - } else { - xm = xl - fl / gradient; - fm = (*func)(n, xm); - if (dwn) { - if (fm > 0.) { - xl = xm; - fl = fm; - } else { - xu = xm; - fu = fm; - } - } else { - if (fm > 0.) { - xu = xm; - fu = fm; - } else { - xl = xm; - fl = fm; - } - } - gradient = (fl-fu)/(xl-xu); - } - } - return xm; -} /* halfroot */ - - -void hermite_weight(long n, double * hroot, double * weights) -{ - /* calculate the weights for the hermite polynomial at the roots - using formula from Abramowitz and Stegun chapter 25.4.46 p.890 */ - long i; - double hr2; - double numerator; - - numerator = exp(0.6931471805599 * ( n-1.) + logfac(n)) / (n*n); - for (i = 0; i < n; i++) { - hr2 = hermite(n-1, hroot[i]); - weights[i] = numerator / (hr2*hr2); - } -} /* hermiteweight */ - - -void inithermitcat(long categs, double alpha, double *rate, double *probcat) -{ /* calculates rates and probabilities */ - long i; - double *hroot; - double std; - - std = SQRT2 /sqrt(alpha); - hroot = (double *) Malloc((categs+1) * sizeof(double)); - root_hermite(categs, hroot); /* calculate roots */ - hermite_weight(categs, hroot, probcat); /* set weights */ - for (i=0; i= 100.0) - inithermitcat(categs, alpha, rate, probcat); - else - initlaguerrecat(categs, alpha, rate, probcat); -} /* initgammacat */ - - -void inithowmany(long *howmanny, long howoften) -{/* input how many cycles */ - long loopcount; - - loopcount = 0; - do { - printf("How many cycles of %4ld trees?\n", howoften); - scanf("%ld%*[^\n]", howmanny); - getchar(); - countup(&loopcount, 10); - } while (*howmanny <= 0); -} /*inithowmany*/ - - - -void inithowoften(long *howoften) -{ /* input how many trees per cycle */ - long loopcount; - - loopcount = 0; - do { - printf("How many trees per cycle?\n"); - scanf("%ld%*[^\n]", howoften); - getchar(); - countup(&loopcount, 10); - } while (*howoften <= 0); -} /*inithowoften*/ - - -void initlambda(double *lambda) -{ /* input patch length parameter for autocorrelated HMM rates */ - long loopcount; - - loopcount = 0; - do { - printf("Mean block length of sites having the same rate (greater than 1)?\n"); - scanf("%lf%*[^\n]", lambda); - getchar(); - countup(&loopcount, 10); - } while (*lambda <= 1.0); - *lambda = 1.0 / *lambda; -} /*initlambda*/ - - -void initfreqs(double *freqa, double *freqc, double *freqg, double *freqt) -{ /* input frequencies of the four bases */ - char input[100]; - long scanned, loopcount; - - printf("Base frequencies for A, C, G, T/U (use blanks to separate)?\n"); - loopcount = 0; - do { - getstryng(input); - scanned = sscanf(input,"%lf%lf%lf%lf%*[^\n]", freqa, freqc, freqg, freqt); - if (scanned == 4) - break; - else - printf("Please enter exactly 4 values.\n"); - countup(&loopcount, 100); - } while (1); -} /* initfreqs */ - - -void initratio(double *ttratio) -{ /* input transition/transversion ratio */ - long loopcount; - - loopcount = 0; - do { - printf("Transition/transversion ratio?\n"); - scanf("%lf%*[^\n]", ttratio); - getchar(); - countup(&loopcount, 10); - } while (*ttratio < 0.0); -} /* initratio */ - - -void initpower(double *power) -{ - printf("New power?\n"); - scanf("%lf%*[^\n]", power); - getchar(); -} /*initpower*/ - - -void initdatasets(long *datasets) -{ - /* handle multi-data set option */ - long loopcount; - boolean done; - - loopcount = 0; - do { - printf("How many data sets?\n"); - scanf("%ld%*[^\n]", datasets); - getchar(); - done = (*datasets > 1); - if (!done) - printf("Bad data sets number: it must be greater than 1\n"); - countup(&loopcount, 10); - } while (!done); -} /* initdatasets */ - - -void justweights(long *datasets) -{ - /* handle multi-data set option by weights */ - long loopcount; - boolean done; - - loopcount = 0; - do { - printf("How many sets of weights?\n"); - scanf("%ld%*[^\n]", datasets); - getchar(); - done = (*datasets >= 1); - if (!done) - printf("BAD NUMBER: it must be greater than 1\n"); - countup(&loopcount, 10); - } while (!done); -} /* justweights */ - - -void initterminal(boolean *ibmpc, boolean *ansi) -{ - /* handle terminal option */ - if (*ibmpc) { - *ibmpc = false; - *ansi = true; - } else if (*ansi) - *ansi = false; - else - *ibmpc = true; -} /*initterminal*/ - - -void initnumlines(long *screenlines) -{ - long loopcount; - - loopcount = 0; - do { - *screenlines = readlong("Number of lines on screen?\n"); - countup(&loopcount, 10); - } while (*screenlines <= 12); -} /*initnumlines*/ - - -void initbestrees(bestelm *bestrees, long maxtrees, boolean glob) -{ - /* initializes either global or local field of each array in bestrees */ - long i; - - if (glob) - for (i = 0; i < maxtrees; i++) - bestrees[i].gloreange = false; - else - for (i = 0; i < maxtrees; i++) - bestrees[i].locreange = false; -} /* initbestrees */ - - -void newline(FILE *filename, long i, long j, long k) -{ - /* go to new line if i is a multiple of j, indent k spaces */ - long m; - - if ((i - 1) % j != 0 || i <= 1) - return; - putc('\n', filename); - for (m = 1; m <= k; m++) - putc(' ', filename); -} /* newline */ - - -void inputnumbersold(long *spp, long *chars, long *nonodes, long n) -{ - /* input the numbers of species and of characters */ - - if (fscanf(infile, "%ld%ld", spp, chars) != 2 || *spp <= 0 || *chars <= 0) { - printf( - "ERROR: Unable to read the number of species or characters in data set\n"); - printf( - "The input file is incorrect (perhaps it was not saved text only).\n"); - } - *nonodes = *spp * 2 - n; -} /* inputnumbersold */ - - -void inputnumbers(long *spp, long *chars, long *nonodes, long n) -{ - /* input the numbers of species and of characters */ - - if (fscanf(infile, "%ld%ld", spp, chars) != 2 || *spp <= 0 || *chars <= 0) { - printf( - "ERROR: Unable to read the number of species or characters in data set\n"); - printf( - "The input file is incorrect (perhaps it was not saved text only).\n"); - } - *nonodes = *spp * 2 - n; -} /* inputnumbers */ - - -void inputnumbers2(long *spp, long *nonodes, long n) -{ - /* read species number */ - - if (fscanf(infile, "%ld", spp) != 1 || *spp <= 0) { - printf("ERROR: Unable to read the number of species in data set\n"); - printf( - "The input file is incorrect (perhaps it was not saved text only).\n"); - } - fprintf(outfile, "\n%4ld Populations\n", *spp); - *nonodes = *spp * 2 - n; -} /* inputnumbers2 */ - - -void inputnumbers3(long *spp, long *chars) -{ - /* input the numbers of species and of characters */ - - if (fscanf(infile, "%ld%ld", spp, chars) != 2 || *spp <= 0 || *chars <= 0) { - printf( - "ERROR: Unable to read the number of species or characters in data set\n"); - printf( - "The input file is incorrect (perhaps it was not saved text only).\n"); - exxit(-1); - } -} /* inputnumbers3 */ - - -void samenumsp(long *chars, long ith) -{ - /* check if spp is same as the first set in other data sets */ - long cursp, curchs; - - if (eoln(infile)) - scan_eoln(infile); - fscanf(infile, "%ld%ld", &cursp, &curchs); - if (cursp != spp) { - printf( - "\n\nERROR: Inconsistent number of species in data set %ld\n\n", ith); - exxit(-1); - } - *chars = curchs; -} /* samenumsp */ - - -void samenumsp2(long ith) -{ - /* check if spp is same as the first set in other data sets */ - long cursp; - - if (eoln(infile)) - scan_eoln(infile); - if (fscanf(infile, "%ld", &cursp) != 1) { - printf("\n\nERROR: Unable to read number of species in data set %ld\n", - ith); - printf( - "The input file is incorrect (perhaps it was not saved text only).\n"); - exxit(-1); - } - if (cursp != spp) { - printf( - "\n\nERROR: Inconsistent number of species in data set %ld\n\n", ith); - exxit(-1); - } -} /* samenumsp2 */ - - -void readoptions(long *extranum, const char *options) -{ /* read option characters from input file */ - Char ch; - - while (!(eoln(infile))) { - ch = gettc(infile); - uppercase(&ch); - if (strchr(options, ch) != NULL) - (* extranum)++; - else if (!(ch == ' ' || ch == '\t')) { - printf("BAD OPTION CHARACTER: %c\n", ch); - exxit(-1); - } - } - scan_eoln(infile); -} /* readoptions */ - - -void matchoptions(Char *ch, const char *options) -{ /* match option characters to those in auxiliary options line */ - - *ch = gettc(infile); - uppercase(ch); - if (strchr(options, *ch) == NULL) { - printf("ERROR: Incorrect auxiliary options line"); - printf(" which starts with %c\n", *ch); - exxit(-1); - } -} /* matchoptions */ - - -void inputweightsold(long chars, steptr weight, boolean *weights) -{ - Char ch; - int i; - - for (i = 1; i < nmlngth ; i++) - getc(infile); - - for (i = 0; i < chars; i++) { - do { - if (eoln(infile)) - scan_eoln(infile); - ch = gettc(infile); - if (ch == '\n') - ch = ' '; - } while (ch == ' '); - weight[i] = 1; - if (isdigit(ch)) - weight[i] = ch - '0'; - else if (isalpha(ch)) { - uppercase(&ch); - weight[i] = ch - 'A' + 10; - } else { - printf("\n\nERROR: Bad weight character: %c\n\n", ch); - exxit(-1); - } - } - scan_eoln(infile); - *weights = true; -} /*inputweightsold*/ - - -void inputweights(long chars, steptr weight, boolean *weights) -{ - /* input the character weights, 0-9 and A-Z for weights 0 - 35 */ - Char ch; - long i; - - for (i = 0; i < chars; i++) { - do { - if (eoln(weightfile)) - scan_eoln(weightfile); - ch = gettc(weightfile); - if (ch == '\n') - ch = ' '; - } while (ch == ' '); - weight[i] = 1; - if (isdigit(ch)) - weight[i] = ch - '0'; - else if (isalpha(ch)) { - uppercase(&ch); - weight[i] = ch - 'A' + 10; - } else { - printf("\n\nERROR: Bad weight character: %c\n\n", ch); - exxit(-1); - } - } - scan_eoln(weightfile); - *weights = true; -} /* inputweights */ - - -void inputweights2(long a, long b, long *weightsum, - steptr weight, boolean *weights, const char *prog) -{ - /* input the character weights, 0 or 1 */ - Char ch; - long i; - - *weightsum = 0; - for (i = a; i < b; i++) { - do { - if (eoln(weightfile)) - scan_eoln(weightfile); - ch = gettc(weightfile); - } while (ch == ' '); - weight[i] = 1; - if (ch == '0' || ch == '1') - weight[i] = ch - '0'; - else { - printf("\n\nERROR: Bad weight character: %c -- ", ch); - printf("weights in %s must be 0 or 1\n", prog); - exxit(-1); - } - *weightsum += weight[i]; - } - *weights = true; - scan_eoln(weightfile); -} /* inputweights2 */ - - -void printweights(FILE *filename, long inc, long chars, - steptr weight, const char *letters) -{ - /* print out the weights of sites */ - long i, j; - boolean letterweights; - - letterweights = false; - for (i = 0; i < chars; i++) - if (weight[i] > 9) - letterweights = true; - fprintf(filename, "\n %s are weighted as follows:",letters); - if (letterweights) - fprintf(filename, " (A = 10, B = 11, etc.)\n"); - else - putc('\n', filename); - for (i = 0; i < chars; i++) { - if (i % 60 == 0) { - putc('\n', filename); - for (j = 1; j <= nmlngth + 3; j++) - putc(' ', filename); - } - if (weight[i+inc] < 10) - fprintf(filename, "%ld", weight[i + inc]); - else - fprintf(filename, "%c", 'A'-10+(int)weight[i + inc]); - if ((i+1) % 5 == 0 && (i+1) % 60 != 0) - putc(' ', filename); - } - fprintf(filename, "\n\n"); -} /* printweights */ - - -void inputcategs(long a, long b, steptr category, long categs,const char *prog) -{ - /* input the categories, 1-9 */ - Char ch; - long i; - - for (i = a; i < b; i++) { - do { - if (eoln(catfile)) - scan_eoln(catfile); - ch = gettc(catfile); - } while (ch == ' '); - if ((ch >= '1') && (ch <= ('0'+categs))) - category[i] = ch - '0'; - else { - printf("\n\nERROR: Bad category character: %c", ch); - printf(" -- categories in %s are currently 1-%ld\n", prog, categs); - exxit(-1); - } - } - scan_eoln(catfile); -} /* inputcategs */ - - -void printcategs(FILE *filename, long chars, steptr category, - const char *letters) -{ - /* print out the sitewise categories */ - long i, j; - - fprintf(filename, "\n %s are:\n",letters); - for (i = 0; i < chars; i++) { - if (i % 60 == 0) { - putc('\n', filename); - for (j = 1; j <= nmlngth + 3; j++) - putc(' ', filename); - } - fprintf(filename, "%ld", category[i]); - if ((i+1) % 10 == 0 && (i+1) % 60 != 0) - putc(' ', filename); - } - fprintf(filename, "\n\n"); -} /* printcategs */ - - -void inputfactors(long chars, Char *factor, boolean *factors) -{ - /* reads the factor symbols */ - long i; - - for (i = 0; i < (chars); i++) { - if (eoln(factfile)) - scan_eoln(factfile); - factor[i] = gettc(factfile); - if (factor[i] == '\n') - factor[i] = ' '; - } - scan_eoln(factfile); - *factors = true; -} /* inputfactors */ - - -void printfactors(FILE *filename, long chars, Char *factor, const char *letters) -{ - /* print out list of factor symbols */ - long i; - - fprintf(filename, "Factors%s:\n\n", letters); - for (i = 1; i <= nmlngth - 5; i++) - putc(' ', filename); - for (i = 1; i <= (chars); i++) { - newline(filename, i, 55, nmlngth + 3); - putc(factor[i - 1], filename); - if (i % 5 == 0) - putc(' ', filename); - } - putc('\n', filename); -} /* printfactors */ - - -void headings(long chars, const char *letters1, const char *letters2) -{ - long i, j; - - putc('\n', outfile); - j = nmlngth + (chars + (chars - 1) / 10) / 2 - 5; - if (j < nmlngth - 1) - j = nmlngth - 1; - if (j > 37) - j = 37; - fprintf(outfile, "Name"); - for (i = 1; i <= j; i++) - putc(' ', outfile); - fprintf(outfile, "%s\n", letters1); - fprintf(outfile, "----"); - for (i = 1; i <= j; i++) - putc(' ', outfile); - fprintf(outfile, "%s\n\n", letters2); -} /* headings */ - - -void initname(long i) -{ - /* read in species name */ - long j; - - for (j = 0; j < nmlngth; j++) { - if (eoff(infile) | eoln(infile)){ - printf("\n\nERROR: end-of-line or end-of-file"); - printf(" in the middle of species name for species %ld\n\n", i+1); - exxit(-1); - } - nayme[i][j] = gettc(infile); - if ((nayme[i][j] == '(') || (nayme[i][j] == ')') || (nayme[i][j] == ':') - || (nayme[i][j] == ',') || (nayme[i][j] == ';') || (nayme[i][j] == '[') - || (nayme[i][j] == ']')) { - printf("\nERROR: Species name may not contain characters ( ) : ; , [ ] \n"); - printf(" In name of species number %ld there is character %c\n\n", - i+1, nayme[i][j]); - exxit(-1); - } - } -} /* initname */ - - -void findtree(boolean *found,long *pos,long nextree,long *place,bestelm *bestrees) -{ - /* finds tree given by array place in array bestrees by binary search */ - /* used by dnacomp, dnapars, dollop, mix, & protpars */ - long i, lower, upper; - boolean below, done; - - below = false; - lower = 1; - upper = nextree - 1; - (*found) = false; - while (!(*found) && lower <= upper) { - (*pos) = (lower + upper) / 2; - i = 3; - done = false; - while (!done) { - done = (i > spp); - if (!done) - done = (place[i - 1] != bestrees[(*pos) - 1].btree[i - 1]); - if (!done) - i++; - } - (*found) = (i > spp); - if (*found) - break; - below = (place[i - 1] < bestrees[(*pos )- 1].btree[i - 1]); - if (below) - upper = (*pos) - 1; - else - lower = (*pos) + 1; - } - if (!(*found) && !below) - (*pos)++; -} /* findtree */ - - -void addtree(long pos,long *nextree,boolean collapse,long *place,bestelm *bestrees) -{ - /* puts tree from array place in its proper position in array bestrees */ - /* used by dnacomp, dnapars, dollop, mix, & protpars */ - long i; - - for (i = *nextree - 1; i >= pos; i--){ - memcpy(bestrees[i].btree, bestrees[i - 1].btree, spp * sizeof(long)); - bestrees[i].gloreange = bestrees[i - 1].gloreange; - bestrees[i - 1].gloreange = false; - bestrees[i].locreange = bestrees[i - 1].locreange; - bestrees[i - 1].locreange = false; - bestrees[i].collapse = bestrees[i - 1].collapse; - } - for (i = 0; i < spp; i++) - bestrees[pos - 1].btree[i] = place[i]; - bestrees[pos - 1].collapse = collapse; - (*nextree)++; -} /* addtree */ - - -long findunrearranged(bestelm *bestrees, long nextree, boolean glob) -{ - /* finds bestree with either global or local field false */ - long i; - - if (glob) { - for (i = 0; i < nextree - 1; i++) - if (!bestrees[i].gloreange) - return i; - } else { - for (i = 0; i < nextree - 1; i++) - if (!bestrees[i].locreange) - return i; - } - return -1; -} /* findunrearranged */ - - -boolean torearrange(bestelm *bestrees, long nextree) -{ /* sees if any best tree is yet to be rearranged */ - - if (findunrearranged(bestrees, nextree, true) >= 0) - return true; - else if (findunrearranged(bestrees, nextree, false) >= 0) - return true; - else - return false; -} /* torearrange */ - - -void reducebestrees(bestelm *bestrees, long *nextree) -{ - /* finds best trees with collapsible branches and deletes them */ - long i, j; - - i = 0; - j = *nextree - 2; - do { - while (!bestrees[i].collapse && i < *nextree - 1) i++; - while (bestrees[j].collapse && j >= 0) j--; - if (i < j) { - memcpy(bestrees[i].btree, bestrees[j].btree, spp * sizeof(long)); - bestrees[i].gloreange = bestrees[j].gloreange; - bestrees[i].locreange = bestrees[j].locreange; - bestrees[i].collapse = false; - bestrees[j].collapse = true; - } - } while (i < j); - *nextree = i + 1; -} /* reducebestrees */ - - -void shellsort(double *a, long *b, long n) -{ /* Shell sort keeping a, b in same order */ - /* used by dnapenny, dolpenny, & penny */ - long gap, i, j, itemp; - double rtemp; - - gap = n / 2; - while (gap > 0) { - for (i = gap + 1; i <= n; i++) { - j = i - gap; - while (j > 0) { - if (a[j - 1] > a[j + gap - 1]) { - rtemp = a[j - 1]; - a[j - 1] = a[j + gap - 1]; - a[j + gap - 1] = rtemp; - itemp = b[j - 1]; - b[j - 1] = b[j + gap - 1]; - b[j + gap - 1] = itemp; - } - j -= gap; - } - } - gap /= 2; - } -} /* shellsort */ - - -void getch(Char *c, long *parens, FILE *treefile) -{ /* get next nonblank character */ - - do { - if (eoln(treefile)) - scan_eoln(treefile); - (*c) = gettc(treefile); - - if ((*c) == '\n' || (*c) == '\t') - (*c) = ' '; - } while ( *c == ' ' && !eoff(treefile) ); - if ((*c) == '(') - (*parens)++; - if ((*c) == ')') - (*parens)--; -} /* getch */ - - -void getch2(Char *c, long *parens) -{ /* get next nonblank character */ - do { - if (eoln(intree)) - scan_eoln(intree); - *c = gettc(intree); - if (*c == '\n' || *c == '\t') - *c = ' '; - } while (!(*c != ' ' || eoff(intree))); - if (*c == '(') - (*parens)++; - if (*c == ')') - (*parens)--; -} /* getch2 */ - - -void findch(Char c, Char *ch, long which) -{ /* scan forward until find character c */ - boolean done; - long dummy_parens; - done = false; - while (!done) { - if (c == ',') { - if (*ch == '(' || *ch == ')' || *ch == ';') { - printf( - "\n\nERROR in user tree %ld: unmatched parenthesis or missing comma\n\n", - which); - exxit(-1); - } else if (*ch == ',') - done = true; - } else if (c == ')') { - if (*ch == '(' || *ch == ',' || *ch == ';') { - printf("\n\nERROR in user tree %ld: ", which); - printf("unmatched parenthesis or non-bifurcated node\n\n"); - exxit(-1); - } else { - if (*ch == ')') - done = true; - } - } else if (c == ';') { - if (*ch != ';') { - printf("\n\nERROR in user tree %ld: ", which); - printf("unmatched parenthesis or missing semicolon\n\n"); - exxit(-1); - } else - done = true; - } - if (*ch != ')' && done) - continue; - getch(ch, &dummy_parens, intree); - } -} /* findch */ - - -void findch2(Char c, long *lparens, long *rparens, Char *ch) -{ /* skip forward in user tree until find character c */ - boolean done; - long dummy_parens; - done = false; - while (!done) { - if (c == ',') { - if (*ch == '(' || *ch == ')' || *ch == ':' || *ch == ';') { - printf("\n\nERROR in user tree: "); - printf("unmatched parenthesis, missing comma"); - printf(" or non-trifurcated base\n\n"); - exxit(-1); - } else if (*ch == ',') - done = true; - } else if (c == ')') { - if (*ch == '(' || *ch == ',' || *ch == ':' || *ch == ';') { - printf( - "\n\nERROR in user tree: unmatched parenthesis or non-bifurcated node\n\n"); - exxit(-1); - } else if (*ch == ')') { - (*rparens)++; - if ((*lparens) > 0 && (*lparens) == (*rparens)) { - if ((*lparens) == spp - 2) { - getch(ch, &dummy_parens, intree); - if (*ch != ';') { - printf( "\n\nERROR in user tree: "); - printf("unmatched parenthesis or missing semicolon\n\n"); - exxit(-1); - } - } - } - done = true; - } - } - if (*ch != ')' && done) - continue; - if (*ch == ')') - getch(ch, &dummy_parens, intree); - } -} /* findch2 */ - - -void processlength(double *valyew, double *divisor, Char *ch, - boolean *minusread, FILE *treefile, long *parens) -{ /* read a branch length from a treefile */ - long digit, ordzero; - boolean pointread; - - ordzero = '0'; - *minusread = false; - pointread = false; - *valyew = 0.0; - *divisor = 1.0; - getch(ch, parens, treefile); - digit = (long)(*ch - ordzero); - while ( ((digit <= 9) && (digit >= 0)) || *ch == '.' || *ch == '-') { - if (*ch == '.' ) - pointread = true; - else if (*ch == '-' ) - *minusread = true; - else { - *valyew = *valyew * 10.0 + digit; - if (pointread) - *divisor *= 10.0; - } - getch(ch, parens, treefile); - digit = (long)(*ch - ordzero); - } - if (*minusread) - *valyew = -(*valyew); -} /* processlength */ - - -void writename(long start, long n, long *enterorder) -{ /* write species name and number in entry order */ - long i, j; - - for (i = start; i < start+n; i++) { - printf(" %3ld. ", i+1); - for (j = 0; j < nmlngth; j++) - putchar(nayme[enterorder[i] - 1][j]); - putchar('\n'); - fflush(stdout); - } -} /* writename */ - - -void memerror() -{ - printf("Error allocating memory\n"); - exxit(-1); -} /* memerror */ - - -void odd_malloc(long x) -{ /* error message if attempt to malloc too little or too much memory */ - printf ("ERROR: a function asked for an inappropriate amount of memory:"); - printf (" %ld bytes\n", x); - printf (" This can mean one of two things:\n"); - printf (" 1. The input file is incorrect"); - printf (" (perhaps it was not saved as Text Only),\n"); - printf (" 2. There is a bug in the program.\n"); - printf (" Please check your input file carefully.\n"); - printf (" If it seems to be a bug, please mail joe@gs.washington.edu\n"); - printf (" with the name of the program, your computer system type,\n"); - printf (" a full description of the problem, and with the input data file.\n"); - printf (" (which should be in the body of the message, not as an Attachment).\n"); - - /* abort() can be used to crash */ - - exxit(-1); -} - - -MALLOCRETURN *mymalloc(long x) -{ /* wrapper for malloc, allowing error message if too little, too much */ - MALLOCRETURN *new_block; - - if ((x <= 0) || - (x > TOO_MUCH_MEMORY)) - odd_malloc(x); - - new_block = (MALLOCRETURN *)calloc(1,x); - - if (!new_block) { - memerror(); - return (MALLOCRETURN *) new_block; - } else - return (MALLOCRETURN *) new_block; -} /* mymalloc */ - - -void gnu(node **grbg, node **p) -{ /* this and the following are do-it-yourself garbage collectors. - Make a new node or pull one off the garbage list */ - - if (*grbg != NULL) { - *p = *grbg; - *grbg = (*grbg)->next; - } else - *p = (node *)Malloc(sizeof(node)); - - (*p)->back = NULL; - (*p)->next = NULL; - (*p)->tip = false; - (*p)->times_in_tree = 0.0; - (*p)->r = 0.0; - (*p)->theta = 0.0; - (*p)->x = NULL; - (*p)->protx = NULL; /* for the sake of proml */ -} /* gnu */ - - -void chuck(node **grbg, node *p) -{ /* collect garbage on p -- put it on front of garbage list */ - p->back = NULL; - p->next = *grbg; - *grbg = p; -} /* chuck */ - - -void zeronumnuc(node *p, long endsite) -{ - long i,j; - - for (i = 0; i < endsite; i++) - for (j = (long)A; j <= (long)O; j++) - p->numnuc[i][j] = 0; -} /* zeronumnuc */ - - -void zerodiscnumnuc(node *p, long endsite) -{ - long i,j; - - for (i = 0; i < endsite; i++) - for (j = (long)zero; j <= (long)seven; j++) - p->discnumnuc[i][j] = 0; -} /* zerodiscnumnuc */ - - -void allocnontip(node *p, long *zeros, long endsite) -{ /* allocate an interior node */ - /* used by dnacomp, dnapars, & dnapenny */ - - p->numsteps = (steptr)Malloc(endsite*sizeof(long)); - p->oldnumsteps = (steptr)Malloc(endsite*sizeof(long)); - p->base = (baseptr)Malloc(endsite*sizeof(long)); - p->oldbase = (baseptr)Malloc(endsite*sizeof(long)); - p->numnuc = (nucarray *)Malloc(endsite*sizeof(nucarray)); - memcpy(p->base, zeros, endsite*sizeof(long)); - memcpy(p->numsteps, zeros, endsite*sizeof(long)); - memcpy(p->oldbase, zeros, endsite*sizeof(long)); - memcpy(p->oldnumsteps, zeros, endsite*sizeof(long)); - zeronumnuc(p, endsite); -} /* allocnontip */ - - -void allocdiscnontip(node *p, long *zeros, unsigned char *zeros2, long endsite) -{ /* allocate an interior node */ - /* used by pars */ - - p->numsteps = (steptr)Malloc(endsite*sizeof(long)); - p->oldnumsteps = (steptr)Malloc(endsite*sizeof(long)); - p->discbase = (discbaseptr)Malloc(endsite*sizeof(unsigned char)); - p->olddiscbase = (discbaseptr)Malloc(endsite*sizeof(unsigned char)); - p->discnumnuc = (discnucarray *)Malloc(endsite*sizeof(discnucarray)); - memcpy(p->discbase, zeros2, endsite*sizeof(unsigned char)); - memcpy(p->numsteps, zeros, endsite*sizeof(long)); - memcpy(p->olddiscbase, zeros2, endsite*sizeof(unsigned char)); - memcpy(p->oldnumsteps, zeros, endsite*sizeof(long)); - zerodiscnumnuc(p, endsite); -} /* allocdiscnontip */ - - -void allocnode(node **anode, long *zeros, long endsite) -{ /* allocate a node */ - /* used by dnacomp, dnapars, & dnapenny */ - - *anode = (node *)Malloc(sizeof(node)); - allocnontip(*anode, zeros, endsite); -} /* allocnode */ - - -void allocdiscnode(node **anode, long *zeros, unsigned char *zeros2, - long endsite) -{ /* allocate a node */ - /* used by pars */ - - *anode = (node *)Malloc(sizeof(node)); - allocdiscnontip(*anode, zeros, zeros2, endsite); -} /* allocdiscnontip */ - - -void gnutreenode(node **grbg, node **p, long i, long endsite, long *zeros) -{ /* this and the following are do-it-yourself garbage collectors. - Make a new node or pull one off the garbage list */ - - if (*grbg != NULL) { - *p = *grbg; - *grbg = (*grbg)->next; - memcpy((*p)->numsteps, zeros, endsite*sizeof(long)); - memcpy((*p)->oldnumsteps, zeros, endsite*sizeof(long)); - memcpy((*p)->base, zeros, endsite*sizeof(long)); - memcpy((*p)->oldbase, zeros, endsite*sizeof(long)); - zeronumnuc(*p, endsite); - } else - allocnode(p, zeros, endsite); - (*p)->back = NULL; - (*p)->next = NULL; - (*p)->tip = false; - (*p)->visited = false; - (*p)->index = i; - (*p)->numdesc = 0; - (*p)->sumsteps = 0.0; -} /* gnutreenode */ - - -void gnudisctreenode(node **grbg, node **p, long i, - long endsite, long *zeros, unsigned char *zeros2) -{ /* this and the following are do-it-yourself garbage collectors. - Make a new node or pull one off the garbage list */ - - if (*grbg != NULL) { - *p = *grbg; - *grbg = (*grbg)->next; - memcpy((*p)->numsteps, zeros, endsite*sizeof(long)); - memcpy((*p)->oldnumsteps, zeros, endsite*sizeof(long)); - memcpy((*p)->discbase, zeros2, endsite*sizeof(unsigned char)); - memcpy((*p)->olddiscbase, zeros2, endsite*sizeof(unsigned char)); - zerodiscnumnuc(*p, endsite); - } else - allocdiscnode(p, zeros, zeros2, endsite); - (*p)->back = NULL; - (*p)->next = NULL; - (*p)->tip = false; - (*p)->visited = false; - (*p)->index = i; - (*p)->numdesc = 0; - (*p)->sumsteps = 0.0; -} /* gnudisctreenode */ - - -void chucktreenode(node **grbg, node *p) -{ /* collect garbage on p -- put it on front of garbage list */ - - p->back = NULL; - p->next = *grbg; - *grbg = p; -} /* chucktreenode */ - - -void setupnode(node *p, long i) -{ /* initialization of node pointers, variables */ - - p->next = NULL; - p->back = NULL; - p->times_in_tree = (double) i * 1.0; - p->index = i; - p->tip = false; -} /* setupnode */ - - -long count_sibs (node *p) -{ /* Count the number of nodes in a ring, return the total number of */ - /* nodes excluding the one passed into the function (siblings) */ - node *q; - long return_int = 0; - - if (p->tip) { - printf ("Error: the function count_sibs called on a tip. This is a bug.\n"); - exxit (-1); - } - - q = p->next; - while (q != p) { - if (q == NULL) { - printf ("Error: a loop of nodes was not closed.\n"); - exxit (-1); - } else { - return_int++; - q = q->next; - } - } - - return return_int; -} /* count_sibs */ - - -void inittrav (node *p) -{ /* traverse to set pointers uninitialized on inserting */ - long i, num_sibs; - node *sib_ptr; - - if (p == NULL) - return; - if (p->tip) - return; - num_sibs = count_sibs (p); - sib_ptr = p; - for (i=0; i < num_sibs; i++) { - sib_ptr = sib_ptr->next; - sib_ptr->initialized = false; - inittrav(sib_ptr->back); - } -} /* inittrav */ - - -void commentskipper(FILE ***intree, long *bracket) -{ /* skip over comment bracket contents in reading tree */ - char c; - - c = gettc(**intree); - - while (c != ']') { - - if(feof(**intree)) { - printf("\n\nERROR: Unmatched comment brackets\n\n"); - exxit(-1); - } - - if(c == '[') { - (*bracket)++; - commentskipper(intree, bracket); - } - c = gettc(**intree); - } - (*bracket)--; -} /* commentskipper */ - - -long countcomma(FILE **treefile, long *comma) -{ - /* Modified by Dan F. 11/10/96 */ - - /* The next line inserted so this function leaves the file pointing - to where it found it, not just re-winding it. */ - long orig_position = ftell (*treefile); - - Char c; - long lparen = 0; - long bracket = 0; - (*comma) = 0; - - - for (;;){ - c = getc(*treefile); - if (feof(*treefile)) - break; - if (c == ';') - break; - if (c == ',') - (*comma)++; - if (c == '(') - lparen++; - if (c == '[') { - bracket++; - commentskipper(&treefile, &bracket); - } - } - - /* Don't just rewind, */ - /* rewind (*treefile); */ - /* Re-set to where it pointed when the function was called */ - - fseek (*treefile, orig_position, SEEK_SET); - - return lparen + (*comma); -} /*countcomma*/ -/* countcomma rewritten so it passes back both lparen+comma to allocate nodep - and a pointer to the comma variable. This allows the tree to know how many - species exist, and the tips to be placed in the front of the nodep array */ - - -long countsemic(FILE **treefile) -{ /* Used to determine the number of user trees. Return - either a: the number of semicolons in the file outside comments - or b: the first integer in the file */ - Char c; - long return_val, semic = 0; - long bracket = 0; - - /* Eat all whitespace */ - c = gettc(*treefile); - while ((c == ' ') || - (c == '\t') || - (c == '\n')) { - c = gettc(*treefile); - } - - /* Then figure out if the first non-white character is a digit; if - so, return it */ - if (isdigit (c)) { - ungetc(c, *treefile); - fscanf((*treefile), "%ld", &return_val); - } else { - - /* Loop past all characters, count the number of semicolons - outside of comments */ - for (;;){ - c = fgetc(*treefile); - if (feof(*treefile)) - break; - if (c == ';') - semic++; - if (c == '[') { - bracket++; - commentskipper(&treefile, &bracket); - } - } - return_val = semic; - } - - rewind (*treefile); - return return_val; -} /* countsemic */ - - -void hookup(node *p, node *q) -{ /* hook together two nodes */ - - p->back = q; - q->back = p; -} /* hookup */ - - -void link_trees(long local_nextnum, long nodenum, long local_nodenum, - pointarray nodep) -{ - if(local_nextnum == 0) - hookup(nodep[nodenum],nodep[local_nodenum]); - else if(local_nextnum == 1) - hookup(nodep[nodenum], nodep[local_nodenum]->next); - else if(local_nextnum == 2) - hookup(nodep[nodenum],nodep[local_nodenum]->next->next); - else - printf("Error in Link_Trees()"); -} /* link_trees() */ - - -void allocate_nodep(pointarray *nodep, FILE **treefile, long *precalc_tips) -{ /* pre-compute space and allocate memory for nodep */ - - long numnodes; /* returns number commas & ( */ - long numcom = 0; /* returns number commas */ - - numnodes = countcomma(treefile, &numcom) + 1; - *nodep = (pointarray)Malloc(2*numnodes*sizeof(node *)); - - (*precalc_tips) = numcom + 1; /* this will be used in placing the - tip nodes in the front region of - nodep. Used for species check? */ -} /* allocate_nodep -plc */ - - -void malloc_pheno (node *p, long endsite, long rcategs) -{ /* Allocate the phenotype arrays; used by dnaml */ - long i; - - p->x = (phenotype)Malloc(endsite*sizeof(ratelike)); - p->underflows = Malloc(endsite * sizeof(double)); - for (i = 0; i < endsite; i++) - p->x[i] = (ratelike)Malloc(rcategs*sizeof(sitelike)); -} /* malloc_pheno */ - - -void malloc_ppheno (node *p,long endsite, long rcategs) -{ - /* Allocate the phenotype arrays; used by proml */ - long i; - - p->protx = (pphenotype)Malloc(endsite*sizeof(pratelike)); - p->underflows = Malloc(endsite*sizeof(double)); - - for (i = 0; i < endsite; i++) - p->protx[i] = (pratelike)Malloc(rcategs*sizeof(psitelike)); -} /* malloc_ppheno */ - - -long take_name_from_tree (Char *ch, Char *str, FILE *treefile) -{ - /* This loop takes in the name from the tree. - Return the length of the name string. */ - - long name_length = 0; - - do { - if ((*ch) == '_') - (*ch) = ' '; - str[name_length++] = (*ch); - if (eoln(treefile)) - scan_eoln(treefile); - (*ch) = gettc(treefile); - if (*ch == '\n') - *ch = ' '; - } while ((*ch) != ':' && (*ch) != ',' && (*ch) != ')' && - (*ch) != '[' && (*ch) != ';' && name_length <= MAXNCH); - return name_length; -} /* take_name_from_tree */ - - -void match_names_to_data (Char *str, pointarray treenode, node **p, long spp) -{ - /* This loop matches names taken from treefile to indexed names in - the data file */ - - boolean found; - long i, n; - - n = 1; - do { - found = true; - for (i = 0; i < nmlngth; i++) { - found = (found && ((str[i] == nayme[n - 1][i]) || - (((nayme[n - 1][i] == '_') && (str[i] == ' ')) || - ((nayme[n - 1][i] == ' ') && (str[i] == '\0'))))); - } - - if (found) - *p = treenode[n - 1]; - else - n++; - - } while (!(n > spp || found)); - - if (n > spp) { - printf("\n\nERROR: Cannot find species: "); - for (i = 0; (str[i] != '\0') && (i < MAXNCH); i++) - putchar(str[i]); - printf(" in data file\n\n"); - exxit(-1); - } -} /* match_names_to_data */ - - -void addelement(node **p, node *q, Char *ch, long *parens, FILE *treefile, - pointarray treenode, boolean *goteof, boolean *first, pointarray nodep, - long *nextnode, long *ntips, boolean *haslengths, node **grbg, - initptr initnode,boolean unifok, long maxnodes) -{ - /* Recursive procedure adds nodes to user-defined tree - This is the main (new) tree-reading procedure */ - - node *pfirst; - long i, len = 0, nodei = 0; - boolean notlast; - Char str[MAXNCH]; - node *r; - long furs = 0; - - if ((*ch) == '(') { - (*nextnode)++; /* get ready to use new interior node */ - nodei = *nextnode; /* do what needs to be done at bottom */ - if ( maxnodes != -1 && nodei > maxnodes) { - printf("ERROR in input tree file: Attempting to allocate too\n"); - printf("many nodes. This is usually caused by a unifurcation."); - printf("To use this tree with this program use Retree to read\n"); - printf("and write this tree.\n"); - exxit(-1); - } - (*initnode)(p, grbg, q, len, nodei, ntips, - parens, bottom, treenode, nodep, str, ch, treefile); - pfirst = (*p); - notlast = true; - while (notlast) { /* loop through immediate descendants */ - furs++; - (*initnode)(&(*p)->next, grbg, q, - len, nodei, ntips, parens, nonbottom, treenode, - nodep, str, ch, treefile); - /* ... doing what is done before each */ - r = (*p)->next; - getch(ch, parens, treefile); /* look for next character */ - - /* handle blank names */ - if((*ch) == ',' || (*ch) == ':'){ - ungetc((*ch), treefile); - *ch = 0; - } else if((*ch)==')'){ - ungetc((*ch), treefile); - (*parens)++; - *ch = 0; - } - - addelement(&(*p)->next->back, (*p)->next, ch, parens, treefile, - treenode, goteof, first, nodep, nextnode, ntips, - haslengths, grbg, initnode,unifok,maxnodes); - - (*initnode)(&r, grbg, q, len, nodei, ntips, - parens, hslength, treenode, nodep, str, ch, treefile); - /* do what is done after each about length */ - pfirst->numdesc++; /* increment number of descendants */ - *p = r; /* make r point back to p */ - - if ((*ch) == ')') { - notlast = false; - do { - getch(ch, parens, treefile); - } while ((*ch) != ',' && (*ch) != ')' && - (*ch) != '[' && (*ch) != ';' && (*ch) != ':'); - } - } - if ( furs <= 1 && !unifok ) { - printf("ERROR in input tree file: A Unifurcation was detetected.\n"); - printf("To use this tree with this program use retree to read and"); - printf(" write this tree\n"); - exxit(-1); - } - - (*p)->next = pfirst; - (*p) = pfirst; - - } else if ((*ch) != ')') { /* if it's a species name */ - for (i = 0; i < MAXNCH; i++) /* fill string with nulls */ - str[i] = '\0'; - - len = take_name_from_tree (ch, str, treefile); /* get the name */ - - if ((*ch) == ')') - (*parens)--; /* decrement count of open parentheses */ - (*initnode)(p, grbg, q, len, nodei, ntips, - parens, tip, treenode, nodep, str, ch, treefile); - /* do what needs to be done at a tip */ - } else - getch(ch, parens, treefile); - if (q != NULL) - hookup(q, (*p)); /* now hook up */ - (*initnode)(p, grbg, q, len, nodei, ntips, - parens, iter, treenode, nodep, str, ch, treefile); - /* do what needs to be done to variable iter */ - if ((*ch) == ':') - (*initnode)(p, grbg, q, len, nodei, ntips, - parens, length, treenode, nodep, str, ch, treefile); - /* do what needs to be done with length */ - else if ((*ch) != ';' && (*ch) != '[') - (*initnode)(p, grbg, q, len, nodei, ntips, - parens, hsnolength, treenode, nodep, str, ch, treefile); - /* ... or what needs to be done when no length */ - if ((*ch) == '[') - (*initnode)(p, grbg, q, len, nodei, ntips, - parens, treewt, treenode, nodep, str, ch, treefile); - /* ... for processing a tree weight */ - else if ((*ch) == ';') /* ... and at end of tree */ - (*initnode)(p, grbg, q, len, nodei, ntips, - parens, unittrwt, treenode, nodep, str, ch, treefile); -} /* addelement */ - - -void treeread (FILE *treefile, node **root, pointarray treenode, - boolean *goteof, boolean *first, pointarray nodep, - long *nextnode, boolean *haslengths, node **grbg, initptr initnode, - boolean unifok,long maxnodes) -{ - /* read in user-defined tree and set it up */ - char ch; - long parens = 0; - long ntips = 0; - - (*goteof) = false; - (*nextnode) = spp; - - /* eat blank lines */ - while (eoln(treefile) && !eoff(treefile)) - scan_eoln(treefile); - - if (eoff(treefile)) { - (*goteof) = true; - return; - } - - getch(&ch, &parens, treefile); - - while (ch != '(') { - /* Eat everything in the file (i.e. digits, tabs) until you - encounter an open-paren */ - getch(&ch, &parens, treefile); - } - (*haslengths) = true; - addelement(root, NULL, &ch, &parens, treefile, - treenode, goteof, first, nodep, nextnode, &ntips, - haslengths, grbg, initnode,unifok,maxnodes); - - /* Eat blank lines and end of current line*/ - do { - scan_eoln(treefile); - } - while (eoln(treefile) && !eoff(treefile)); - - (*first) = false; - if (parens != 0) { - printf("\n\nERROR in tree file: unmatched parentheses\n\n"); - exxit(-1); - } -} /* treeread */ - - -void addelement2(node *q, Char *ch, long *parens, FILE *treefile, - pointarray treenode, boolean lngths, double *trweight, boolean *goteof, - long *nextnode, long *ntips, long no_species, boolean *haslengths, - boolean unifok,long maxnodes) -{ - /* recursive procedure adds nodes to user-defined tree - -- old-style bifurcating-only version */ - node *pfirst = NULL, *p; - long i, len, current_loop_index; - boolean notlast, minusread; - Char str[MAXNCH]; - double valyew, divisor; - long furs = 0; - - if ((*ch) == '(') { - - current_loop_index = (*nextnode) + spp; - (*nextnode)++; - - if ( maxnodes != -1 && current_loop_index > maxnodes) { - printf("ERROR in intree file: Attempting to allocate too many nodes\n"); - printf("This is usually caused by a unifurcation. To use this\n"); - printf("intree with this program use retree to read and write\n"); - printf("this tree.\n"); - exxit(-1); - } - /* This is an assignment of an interior node */ - p = treenode[current_loop_index]; - pfirst = p; - notlast = true; - while (notlast) { - furs++; - /* This while loop goes through a circle (triad for - bifurcations) of nodes */ - p = p->next; - /* added to ensure that non base nodes in loops have indices */ - p->index = current_loop_index + 1; - - getch(ch, parens, treefile); - - addelement2(p, ch, parens, treefile, treenode, lngths, trweight, - goteof, nextnode, ntips, no_species, haslengths,unifok,maxnodes); - - if ((*ch) == ')') { - notlast = false; - do { - getch(ch, parens, treefile); - } while ((*ch) != ',' && (*ch) != ')' && - (*ch) != '[' && (*ch) != ';' && (*ch) != ':'); - } - } - if ( furs <= 1 && !unifok ) { - printf("ERROR in intree file: A Unifurcation was detetected.\n"); - printf("To use this intree with this program use retree to read and"); - printf(" write this tree\n"); - exxit(-1); - } - - } else if ((*ch) != ')') { - for (i = 0; i < MAXNCH; i++) - str[i] = '\0'; - len = take_name_from_tree (ch, str, treefile); - match_names_to_data (str, treenode, &p, spp); - pfirst = p; - if ((*ch) == ')') - (*parens)--; - (*ntips)++; - strncpy (p->nayme, str, len); - } else - getch(ch, parens, treefile); - - if ((*ch) == '[') { /* getting tree weight from last comment field */ - if (!eoln(treefile)) { - fscanf(treefile, "%lf", trweight); - getch(ch, parens, treefile); - if (*ch != ']') { - printf("\n\nERROR: Missing right square bracket\n\n"); - exxit(-1); - } - else { - getch(ch, parens, treefile); - if (*ch != ';') { - printf("\n\nERROR: Missing semicolon after square brackets\n\n"); - exxit(-1); - } - } - } - } - else if ((*ch) == ';') { - (*trweight) = 1.0 ; - if (!eoln(treefile)) - printf("WARNING: tree weight set to 1.0\n"); - } - else - (*haslengths) = ((*haslengths) && q == NULL); - - if (q != NULL) - hookup(q, pfirst); - - if ((*ch) == ':') { - processlength(&valyew, &divisor, ch, - &minusread, treefile, parens); - if (q != NULL) { - if (!minusread) - q->oldlen = valyew / divisor; - else - q->oldlen = 0.0; - if (lngths) { - q->v = valyew / divisor; - q->back->v = q->v; - q->iter = false; - q->back->iter = false; - q->back->iter = false; - } - } - } - -} /* addelement2 */ - - -void treeread2 (FILE *treefile, node **root, pointarray treenode, - boolean lngths, double *trweight, boolean *goteof, - boolean *haslengths, long *no_species,boolean unifok,long maxnodes) -{ - /* read in user-defined tree and set it up - -- old-style bifurcating-only version */ - char ch; - long parens = 0; - long ntips = 0; - long nextnode; - - (*goteof) = false; - nextnode = 0; - - /* Eats all blank lines at start of file */ - while (eoln(treefile) && !eoff(treefile)) - scan_eoln(treefile); - - if (eoff(treefile)) { - (*goteof) = true; - return; - } - - getch(&ch, &parens, treefile); - - while (ch != '(') { - /* Eat everything in the file (i.e. digits, tabs) until you - encounter an open-paren */ - getch(&ch, &parens, treefile); - } - - addelement2(NULL, &ch, &parens, treefile, treenode, lngths, trweight, - goteof, &nextnode, &ntips, (*no_species), haslengths,unifok,maxnodes); - (*root) = treenode[*no_species]; - - /*eat blank lines */ - while (eoln(treefile) && !eoff(treefile)) - scan_eoln(treefile); - - (*root)->oldlen = 0.0; - - if (parens != 0) { - printf("\n\nERROR in tree file: unmatched parentheses\n\n"); - exxit(-1); - } -} /* treeread2 */ - - -void exxit(int exitcode) -{ -#ifdef WIN32 - if (exitcode == 0) -#endif - exit (exitcode); -#ifdef WIN32 - else { - puts ("Hit Enter or Return to close program."); - puts(" You may have to hit Enter or Return twice."); - getchar (); - getchar (); - phyRestoreConsoleAttributes(); - exit (exitcode); - } -#endif -} /* exxit */ - - -char gettc(FILE* file) -{ /* catch eof's so that other functions not expecting an eof - * won't have to worry about it */ - int ch; - - ch=getc(file); - - if (ch == EOF ) { - puts("Unexpected End of File"); - exxit(-1); - } - - if ( ch == '\r' ) { - ch = getc(file); - if ( ch != '\n' ) - ungetc(ch,file); - ch = '\n'; - } - return ch; -} /* gettc */ - -void unroot(tree *t, long nonodes) -{ - /* used by fitch, restml and contml */ - if (t->start->back == NULL) { - if (t->start->next->back->tip) - t->start = t->start->next->next->back; - else t->start = t->start->next->back; - } - if (t->start->next->back == NULL) { - if (t->start->back->tip) - t->start = t->start->next->next->back; - else t->start = t->start->back; - } - if (t->start->next->next->back == NULL) { - if (t->start->back->tip) - t->start = t->start->next->back; - else t->start = t->start->back; - } - - - unroot_r(t->start,t->nodep,nonodes); - unroot_r(t->start->back, t->nodep, nonodes); -} - -void unroot_here(node* root, node** nodep, long nonodes) -{ - node* tmpnode; - double newl; - /* used by unroot */ - /* assumes bifurcation this is ok in the programs that use it */ - - - newl = root->next->oldlen + root->next->next->oldlen; - root->next->back->oldlen = newl; - root->next->next->back->oldlen = newl; - - newl = root->next->v + root->next->next->v; - root->next->back->v = newl; - root->next->next->back->v = newl; - - root->next->back->back=root->next->next->back; - root->next->next->back->back = root->next->back; - - while ( root->index != nonodes ) { - tmpnode = nodep[ root->index ]; - nodep[root->index] = root; - root->index++; - root->next->index++; - root->next->next->index++; - nodep[root->index - 2] = tmpnode; - tmpnode->index--; - tmpnode->next->index--; - tmpnode->next->next->index--; - } -} - -void unroot_r(node* p, node** nodep, long nonodes) -{ - /* used by unroot */ - node *q; - - if ( p->tip) return; - - q = p->next; - while ( q != p ) { - if (q->back == NULL) - unroot_here(q,nodep,nonodes); - else unroot_r(q->back,nodep,nonodes); - q = q->next; - } -} - -void clear_connections(tree *t, long nonodes) -{ - long i; - for ( i = 0 ; i < nonodes ; i++) { - if ( i > spp) { - t->nodep[i]->next->back = NULL; - t->nodep[i]->next->v = 0; - t->nodep[i]->next->next->back = NULL; - t->nodep[i]->next->next->v = 0; - } - t->nodep[i]->back = NULL; - t->nodep[i]->v = 0; - } -} - -#ifdef WIN32 -void phySaveConsoleAttributes() -{ - GetConsoleScreenBufferInfo( hConsoleOutput, &savecsbi ); -} /* PhySaveConsoleAttributes */ - - -void phySetConsoleAttributes() -{ - hConsoleOutput = GetStdHandle(STD_OUTPUT_HANDLE); - - phySaveConsoleAttributes(); - - SetConsoleTextAttribute(hConsoleOutput, - BACKGROUND_GREEN | BACKGROUND_BLUE | BACKGROUND_INTENSITY); -} /* phySetConsoleAttributes */ - - -void phyRestoreConsoleAttributes() -{ - COORD coordScreen = { 0, 0 }; - DWORD cCharsWritten; - DWORD dwConSize; - - dwConSize = savecsbi.dwSize.X * savecsbi.dwSize.Y; - - SetConsoleTextAttribute(hConsoleOutput, savecsbi.wAttributes); - - FillConsoleOutputAttribute( hConsoleOutput, savecsbi.wAttributes, - dwConSize, coordScreen, &cCharsWritten ); -} /* phyRestoreConsoleAttributes */ - - -void phyFillScreenColor() -{ - COORD coordScreen = { 0, 0 }; - DWORD cCharsWritten; - CONSOLE_SCREEN_BUFFER_INFO csbi; /* to get buffer info */ - DWORD dwConSize; - - GetConsoleScreenBufferInfo( hConsoleOutput, &csbi ); - dwConSize = csbi.dwSize.X * csbi.dwSize.Y; - - FillConsoleOutputAttribute( hConsoleOutput, csbi.wAttributes, - dwConSize, coordScreen, &cCharsWritten ); -} /* PhyFillScreenColor */ - - -void phyClearScreen() -{ - COORD coordScreen = { 0, 0 }; /* here's where we'll home the - cursor */ - DWORD cCharsWritten; - CONSOLE_SCREEN_BUFFER_INFO csbi; /* to get buffer info */ - DWORD dwConSize; /* number of character cells in - the current buffer */ - - /* get the number of character cells in the current buffer */ - - GetConsoleScreenBufferInfo( hConsoleOutput, &csbi ); - dwConSize = csbi.dwSize.X * csbi.dwSize.Y; - - /* fill the entire screen with blanks */ - - FillConsoleOutputCharacter( hConsoleOutput, (TCHAR) ' ', - dwConSize, coordScreen, &cCharsWritten ); - - /* get the current text attribute */ - - GetConsoleScreenBufferInfo( hConsoleOutput, &csbi ); - - /* now set the buffer's attributes accordingly */ - - FillConsoleOutputAttribute( hConsoleOutput, csbi.wAttributes, - dwConSize, coordScreen, &cCharsWritten ); - - /* put the cursor at (0, 0) */ - - SetConsoleCursorPosition( hConsoleOutput, coordScreen ); - return; -} /* PhyClearScreen */ -#endif - diff --git a/forester/archive/RIO/others/phylip_mod/src/phylip.h b/forester/archive/RIO/others/phylip_mod/src/phylip.h deleted file mode 100644 index 205d9de..0000000 --- a/forester/archive/RIO/others/phylip_mod/src/phylip.h +++ /dev/null @@ -1,607 +0,0 @@ -/*Modified by Christian Zmasek. Use at your own risk.*/ - - -#ifndef _PHYLIP_H_ -#define _PHYLIP_H_ - -/* version 3.6. (c) Copyright 1993-2004 by the University of Washington. - Written by Joseph Felsenstein, Akiko Fuseki, Sean Lamont, Andrew Keeffe, - Mike Palczewski, Doug Buxton and Dan Fineman. - Permission is granted to copy and use this program provided no fee is - charged for it and provided that this copyright notice is not removed. */ - -#define VERSION "3.65" - -/* machine-specific stuff: - based on a number of factors in the library stdlib.h, we will try - to determine what kind of machine/compiler this program is being - built on. However, it doesn't always succeed. However, if you have - ANSI conforming C, it will probably work. - - We will try to figure out machine type - based on defines in stdio, and compiler-defined things as well.: */ - -#include -#include -#ifdef WIN32 -#include - -void phyClearScreen(void); -void phySaveConsoleAttributes(void); -void phySetConsoleAttributes(void); -void phyRestoreConsoleAttributes(void); -void phyFillScreenColor(void); - -#endif - -#ifdef GNUDOS -#define DJGPP -#define DOS -#endif - -#ifdef THINK_C -#define MAC -#endif -#ifdef __MWERKS__ -#ifndef WIN32 -#define MAC -#endif -#endif - -#ifdef __CMS_OPEN -#define CMS -#define EBCDIC true -#define INFILE "infile data" -#define OUTFILE "outfile data" -#define FONTFILE "fontfile data" -#define PLOTFILE "plotfile data" -#define INTREE "intree data" -#define INTREE2 "intree data 2" -#define OUTTREE "outtree data" -#define CATFILE "categories data" -#define WEIGHTFILE "weights data" -#define ANCFILE "ancestors data" -#define MIXFILE "mixture data" -#define FACTFILE "factors data" -#else -#define EBCDIC false -#define INFILE "infile" -#define OUTFILE "outfile" -#define FONTFILE "fontfile" /* on unix this might be /usr/local/lib/fontfile */ -#define PLOTFILE "plotfile" -#define INTREE "intree" -#define INTREE2 "intree2" -#define OUTTREE "outtree" -#define CATFILE "categories" -#define WEIGHTFILE "weights" -#define ANCFILE "ancestors" -#define MIXFILE "mixture" -#define FACTFILE "factors" -#endif - -#ifdef L_ctermid /* try and detect for sysV or V7. */ -#define SYSTEM_FIVE -#endif - -#ifdef sequent -#define SYSTEM_FIVE -#endif - -#ifndef SYSTEM_FIVE -#include -# if defined(_STDLIB_H_) || defined(_H_STDLIB) || defined(H_SCCSID) || defined(unix) -# define UNIX -# define MACHINE_TYPE "BSD Unix C" -# endif -#endif - - -#ifdef __STDIO_LOADED -#define VMS -#define MACHINE_TYPE "VAX/VMS C" -#endif - -#ifdef __WATCOMC__ -#define QUICKC -#define WATCOM -#define DOS -#include "graph.h" -#endif -/* watcom-c has graphics library calls that are almost identical to * - * quick-c, so the "QUICKC" symbol name stays. */ - - -#ifdef _QC -#define MACHINE_TYPE "MS-DOS / Quick C" -#define QUICKC -#include "graph.h" -#define DOS -#endif - -#ifdef _DOS_MODE -#define MACHINE_TYPE "MS-DOS /Microsoft C " -#define DOS /* DOS is always defined if on a DOS machine */ -#define MSC /* MSC is defined for microsoft C */ -#endif - -#ifdef __MSDOS__ /* TURBO c compiler, ONLY (no other DOS C compilers) */ -#define DOS -#define TURBOC -#include -#include -#endif - -#ifdef DJGPP /* DJ Delorie's original gnu C/C++ port */ -#include -#endif - -#ifndef MACHINE_TYPE -#define MACHINE_TYPE "ANSI C" -#endif - -#ifdef DOS -#define MALLOCRETURN void -#else -#define MALLOCRETURN void -#endif -#ifdef VMS -#define signed /* signed doesn't exist in VMS */ -#endif - -/* default screen types */ -/* if on a DOS but not a Windows system can use IBM PC screen controls */ -#ifdef DOS -#ifndef WIN32 -#define IBMCRT true -#define ANSICRT false -#endif -#endif -/* if on a Mac cannot use screen controls */ -#ifdef MAC -#define IBMCRT false -#define ANSICRT false -#endif -/* if on a Windows system can use IBM PC screen controls */ -#ifdef WIN32 -#define IBMCRT true -#define ANSICRT false -#endif -/* otherwise, let's assume we are on a Linux or Unix system - with ANSI terminal controls */ -#ifndef MAC -#ifndef DOS -#ifndef WIN32 -#define IBMCRT false -#define ANSICRT true -#endif -#endif -#endif - -#ifdef DJGPP -#undef MALLOCRETURN -#define MALLOCRETURN void -#endif - - -/* includes: */ -#ifdef UNIX -#include -#else -#include -#endif - -#include -#include - -#ifdef MAC -#ifdef DRAW -#include "interface.h" -#else -#include "macface.h" -#endif -#define getch gettch -#endif - -/* directory delimiters */ -#ifdef MAC -#define DELIMITER ':' -#else -#ifdef WIN32 -#define DELIMITER '\\' -#else -#define DELIMITER '/' -#endif -#endif - - -#define FClose(file) if (file) fclose(file) ; file=NULL -#define Malloc(x) mymalloc((long)x) - -typedef void *Anyptr; -#define Signed signed -#define Const const -#define Volatile volatile -#define Char char /* Characters (not bytes) */ -#define Static static /* Private global funcs and vars */ -#define Local static /* Nested functions */ - -typedef unsigned char boolean; - -#define true 1 -#define false 0 -#define SETBITS 31 - -MALLOCRETURN *mymalloc(long); - -#define FNMLNGTH 200 /* length of array to store a file name */ -#define MAXNCH 26 /*changed from to 20 to 26 by CZ 2006-07-28 */ -#define nmlngth 26 /*changed from to 10 to 26 by CZ 2006-07-28 */ /* number of characters in species name */ -#define maxcategs 9 /* maximum number of site types */ -#define maxcategs2 11 /* maximum number of site types + 2 */ -#define point "." -#define pointe '.' -#define down 2 -#define MAXSHIMOTREES 100 - -#define smoothings 4 /* number of passes through smoothing algorithm */ -#define iterations 4 /* number of iterates for each branch */ -#define epsilon 0.0001 /* small number used in makenewv */ -#define EPSILON 0.00001 /* small number used in hermite root-finding */ -#define initialv 0.1 /* starting branch length unless otherwise */ -#define over 60 /* maximum width all branches of tree on screen */ -#define SQRTPI 1.7724538509055160273 -#define SQRT2 1.4142135623730950488 - -#define NLRSAVES 5 /* number of views that need to be saved during local * - * rearrangement */ - -typedef long *steptr; -typedef long longer[6]; -typedef char naym[MAXNCH]; -typedef long *bitptr; -typedef double raterootarray[maxcategs2][maxcategs2]; - -typedef struct bestelm { - long *btree; - boolean gloreange; - boolean locreange; - boolean collapse; -} bestelm; - -extern FILE *infile, *outfile, *intree, *intree2, *outtree, - *weightfile, *catfile, *ancfile, *mixfile, *factfile; -extern long spp, words, bits; -extern boolean ibmpc, ansi, tranvsp; -extern naym *nayme; /* names of species */ - - -#define ebcdic EBCDIC - -typedef Char plotstring[MAXNCH]; - -/* Approx. 1GB, used to test for memory request errors */ -#define TOO_MUCH_MEMORY 1000000000 - - -/* The below pre-processor commands define the type used to store - group arrays. We can't use #elif for metrowerks, so we use - cascaded if statements */ -#include - -/* minimum double we feel safe with, anything less will be considered - underflow */ -#define MIN_DOUBLE 10e-100 - -/* K&R says that there should be a plus in front of the number, but no - machine we've seen actually uses one; we'll include it just in - case. */ -#define MAX_32BITS 2147483647 -#define MAX_32BITS_PLUS +2147483647 - -/* If ints are 4 bytes, use them */ -#if INT_MAX == MAX_32BITS -typedef int group_type; - -#else - #if INT_MAX == MAX_32BITS_PLUS - typedef int group_type; - - #else - /* Else, if longs are 4 bytes, use them */ - #if LONG_MAX == MAX_32BITS - typedef long group_type; - - #else - #if LONG_MAX == MAX_32BITS_PLUS - typedef long group_type; - - /* Default to longs */ - #else - typedef long group_type; - #endif - - #endif - #endif -#endif - -/* for many programs */ - -#define maxuser 1000 /* maximum number of user-defined trees */ - -typedef Char **sequence; - -typedef enum { - A, C, G, T, O -} bases; - -typedef enum { - alanine, arginine, asparagine, aspartic, cysteine, - glutamine, glutamic, glycine, histidine, isoleucine, - leucine, lysine, methionine, phenylalanine, proline, - serine, threonine, tryptophan, tyrosine, valine -} acids; - -/* for Pars */ - -typedef enum { - zero = 0, one, two, three, four, five, six, seven -} discbases; - -/* for Protpars */ - -typedef enum { - ala, arg, asn, asp, cys, gln, glu, gly, his, ileu, leu, lys, met, phe, pro, - ser1, ser2, thr, trp, tyr, val, del, stop, asx, glx, ser, unk, quest -} aas; - -typedef double sitelike[(long)T - (long)A + 1]; /* used in dnaml, dnadist */ -typedef double psitelike[(long)valine - (long)alanine + 1]; - /* used in proml */ - -typedef long *baseptr; /* baseptr used in dnapars, dnacomp & dnapenny */ -typedef long *baseptr2; /* baseptr used in dnamove */ -typedef unsigned char *discbaseptr; /* discbaseptr used in pars */ -typedef sitelike *ratelike; /* used in dnaml ... */ -typedef psitelike *pratelike; /* used in proml */ -typedef ratelike *phenotype; /* phenotype used in dnaml, dnamlk, dnadist */ -typedef pratelike *pphenotype; /* phenotype used in proml */ -typedef double *sitelike2; -typedef sitelike2 *phenotype2; /* phenotype2 used in restml */ -typedef double *phenotype3; /* for continuous char programs */ - -typedef double *vector; /* used in distance programs */ - -typedef long nucarray[(long)O - (long)A + 1]; -typedef long discnucarray[(long)seven - (long)zero + 1]; - -typedef enum { nocollap, tocollap, undefined } collapstates; - -typedef enum { bottom, nonbottom, hslength, tip, iter, length, - hsnolength, treewt, unittrwt } initops; - - -typedef double **transmatrix; -typedef transmatrix *transptr; /* transptr used in restml */ - -typedef long sitearray[3]; -typedef sitearray *seqptr; /* seqptr used in protpars */ - -typedef struct node { - struct node *next, *back; - plotstring nayme; - long naymlength, tipsabove, index; - double times_in_tree; /* Previously known as cons_index */ - double xcoord, ycoord; - long long_xcoord, long_ycoord; /* for use in cons. */ - double oldlen, length, r, theta, oldtheta, width, depth, - tipdist, lefttheta, righttheta; - group_type *nodeset; /* used by accumulate -plc */ - long ymin, ymax; /* used by printree -plc */ - boolean haslength; /* haslength used in dnamlk */ - boolean iter; /* iter used in dnaml, fitch & restml */ - boolean initialized; /* initialized used in dnamlk & restml */ - long branchnum; /* branchnum used in restml */ - phenotype x; /* x used in dnaml, dnamlk, dnadist */ - phenotype2 x2; /* x2 used in restml */ - phenotype3 view; /* contml etc */ - pphenotype protx; /* protx used in proml */ - aas *seq; /* the sequence used in protpars */ - seqptr siteset; /* temporary storage for aa's used in protpars*/ - double v, deltav, ssq; /* ssq used only in contrast */ - double bigv; /* bigv used in contml */ - double tyme, oldtyme; /* used in dnamlk */ - double t; /* time in kitsch */ - boolean sametime; /* bookkeeps scrunched nodes in kitsch */ - double weight; /* weight of node used by scrunch in kitsch */ - boolean processed; /* used by evaluate in kitsch */ - boolean deleted; /* true if node is deleted (retree) */ - boolean hasname; /* true if tip has a name (retree) */ - double beyond; /* distance beyond this node to most distant tip */ - /* (retree) */ - boolean deadend; /* true if no undeleted nodes beyond this node */ - /* (retree) */ - boolean onebranch; /* true if there is one undeleted node beyond */ - /* this node (retree) */ - struct node *onebranchnode; - /* if there is, a pointer to that node (retree)*/ - double onebranchlength; /* if there is, the distance from here to there*/ - /* (retree) */ - boolean onebranchhaslength; /* true if there is a valid combined length*/ - /* from here to there (retree) */ - collapstates collapse; /* used in dnapars & dnacomp */ - boolean tip; - boolean bottom; /* used in dnapars & dnacomp, disc char */ - boolean visited; /* used in dnapars & dnacomp disc char */ - baseptr base; /* the sequence in dnapars/comp/penny */ - discbaseptr discbase; /* the sequence in pars */ - baseptr2 base2; /* the sequence in dnamove */ - baseptr oldbase; /* record previous sequence */ - discbaseptr olddiscbase; /* record previous sequence */ - long numdesc; /* number of immediate descendants */ - nucarray *numnuc; /* bookkeeps number of nucleotides */ - discnucarray *discnumnuc; /* bookkeeps number of nucleotides */ - steptr numsteps; /* bookkeeps steps */ - steptr oldnumsteps; /* record previous steps */ - double sumsteps; /* bookkeeps sum of steps */ - nucarray cumlengths; /* bookkeeps cummulative minimum lengths */ - discnucarray disccumlengths; /* bookkeeps cummulative minimum lengths */ - nucarray numreconst; /* bookkeeps number of reconstructions */ - discnucarray discnumreconst; /* bookkeeps number of reconstructions */ - vector d, w; /* for distance matrix programs */ - double dist; /* dist used in fitch */ - bitptr stateone, statezero; /* discrete char programs */ - long maxpos; /* maxpos used in Clique */ - Char state; /* state used in Dnamove, Dolmove & Move */ - double* underflows; /* used to record underflow */ -} node; - -typedef node **pointarray; - -typedef struct tree { - pointarray nodep; - double likelihood; - transptr trans; /* restml */ - long *freetrans; /* restml */ - long transindex; /* restml */ - node *start; - node *root; -} tree; - -typedef void (*initptr)(node **, node **, node *, long, long, - long *, long *, initops, pointarray, - pointarray, Char *, Char *, FILE *); - -#ifndef OLDC -/* function prototypes */ -void scan_eoln(FILE *); -boolean eoff(FILE *); -boolean eoln(FILE *); -int filexists(char *); -const char* get_command_name (const char *); -void getstryng(char *); -void openfile(FILE **,const char *,const char *,const char *,const char *, - char *); -void cleerhome(void); -void loopcount(long *, long); -double randum(longer); -void randumize(longer, long *); -double normrand(longer); -long readlong(const char *); - -void uppercase(Char *); -void initseed(long *, long *, longer); -void initjumble(long *, long *, longer, long *); -void initoutgroup(long *, long); -void initthreshold(double *); -void initcatn(long *); -void initcategs(long, double *); -void initprobcat(long, double *, double *); -double logfac (long); -double halfroot(double (*func)(long , double), long, double, double); -double hermite(long, double); -void initlaguerrecat(long, double, double *, double *); -void root_hermite(long, double *); -void hermite_weight(long, double *, double *); -void inithermitcat(long, double, double *, double *); -void lgr(long, double, raterootarray); -double glaguerre(long, double, double); -void initgammacat(long, double, double *, double *); -void inithowmany(long *, long); -void inithowoften(long *); - -void initlambda(double *); -void initfreqs(double *, double *, double *, double *); -void initratio(double *); -void initpower(double *); -void initdatasets(long *); -void justweights(long *); -void initterminal(boolean *, boolean *); -void initnumlines(long *); -void initbestrees(bestelm *, long, boolean); -void newline(FILE *, long, long, long); - -void inputnumbers(long *, long *, long *, long); -void inputnumbersold(long *, long *, long *, long); -void inputnumbers2(long *, long *, long n); -void inputnumbers3(long *, long *); -void samenumsp(long *, long); -void samenumsp2(long); -void readoptions(long *, const char *); -void matchoptions(Char *, const char *); -void inputweights(long, steptr, boolean *); -void inputweightsold(long, steptr, boolean *); -void inputweights2(long, long, long *, steptr, boolean *, const char *); -void printweights(FILE *, long, long, steptr, const char *); - -void inputcategs(long, long, steptr, long, const char *); -void printcategs(FILE *, long, steptr, const char *); -void inputfactors(long, Char *, boolean *); -void inputfactorsnew(long, Char *, boolean *); -void printfactors(FILE *, long, Char *, const char *); -void headings(long, const char *, const char *); -void initname(long); -void findtree(boolean *,long *,long,long *,bestelm *); -void addtree(long,long *,boolean,long *,bestelm *); -long findunrearranged(bestelm *, long, boolean); -boolean torearrange(bestelm *, long); - -void reducebestrees(bestelm *, long *); -void shellsort(double *, long *, long); -void getch(Char *, long *, FILE *); -void getch2(Char *, long *); -void findch(Char, Char *, long); -void findch2(Char, long *, long *, Char *); -void findch3(Char, Char *, long, long); -void processlength(double *,double *,Char *,boolean *,FILE *,long *); -void writename(long, long, long *); -void memerror(void); - -void odd_malloc(long); - -void gnu(node **, node **); -void chuck(node **, node *); -void zeronumnuc(node *, long); -void zerodiscnumnuc(node *, long); -void allocnontip(node *, long *, long); -void allocdiscnontip(node *, long *, unsigned char *, long ); -void allocnode(node **, long *, long); -void allocdiscnode(node **, long *, unsigned char *, long ); -void gnutreenode(node **, node **, long, long, long *); -void gnudisctreenode(node **, node **, long , long, long *, - unsigned char *); - -void chucktreenode(node **, node *); -void setupnode(node *, long); -long count_sibs (node *); -void inittrav (node *); -void commentskipper(FILE ***, long *); -long countcomma(FILE **, long *); -long countsemic(FILE **); -void hookup(node *, node *); -void link_trees(long, long , long, pointarray); -void allocate_nodep(pointarray *, FILE **, long *); - -void malloc_pheno(node *, long, long); -void malloc_ppheno(node *, long, long); -long take_name_from_tree (Char *, Char *, FILE *); -void match_names_to_data (Char *, pointarray, node **, long); -void addelement(node **, node *, Char *, long *, FILE *, pointarray, - boolean *, boolean *, pointarray, long *, long *, boolean *, - node **, initptr,boolean,long); -void treeread (FILE *, node **, pointarray, boolean *, boolean *, - pointarray, long *, boolean *, node **, initptr,boolean,long); -void addelement2(node *, Char *, long *, FILE *, pointarray, boolean, - double *, boolean *, long *, long *, long, boolean *,boolean, - long); -void treeread2 (FILE *, node **, pointarray, boolean, double *, - boolean *, boolean *, long *,boolean,long); -void exxit (int); -void countup(long *loopcount, long maxcount); -char gettc(FILE* file); -void unroot_r(node* p,node ** nodep, long nonodes); -void unroot(tree* t,long nonodes); -void unroot_here(node* root, node** nodep, long nonodes); -void clear_connections(tree *t, long nonodes); -void init(int argc, char** argv); -#endif /* OLDC */ -#endif /* _PHYLIP_H_ */ diff --git a/forester/archive/RIO/others/phylip_mod/src/proml.c b/forester/archive/RIO/others/phylip_mod/src/proml.c deleted file mode 100644 index 0302897..0000000 --- a/forester/archive/RIO/others/phylip_mod/src/proml.c +++ /dev/null @@ -1,3202 +0,0 @@ - -#include "phylip.h" -#include "seq.h" - -/* version 3.6. (c) Copyright 1993-2004 by the University of Washington. - Written by Joseph Felsenstein, Lucas Mix, Akiko Fuseki, Sean Lamont, - Andrew Keeffe, Dan Fineman, and Patrick Colacurcio. - Permission is granted to copy and use this program provided no fee is - charged for it and provided that this copyright notice is not removed. */ - - -typedef long vall[maxcategs]; -typedef double contribarr[maxcategs]; - -#ifndef OLDC -/* function prototypes */ -void init_protmats(void); -void getoptions(void); -void makeprotfreqs(void); -void allocrest(void); -void doinit(void); -void inputoptions(void); -void input_protdata(long); -void makeweights(void); -void prot_makevalues(long, pointarray, long, long, sequence, steptr); -void prot_inittable(void); - -void alloc_pmatrix(long); -void getinput(void); -void inittravtree(node *); -void prot_nuview(node *); -void prot_slopecurv(node *, double, double *, double *, double *); -void makenewv(node *); -void update(node *); -void smooth(node *); -void make_pmatrix(double **, double **, double **, long, double, - double, double *, double **); -double prot_evaluate(node *, boolean); - -void treevaluate(void); -void promlcopy(tree *, tree *, long, long); -void proml_re_move(node **, node **); -void insert_(node *, node *, boolean); -void addtraverse(node *, node *, boolean); -void rearrange(node *, node *); -void proml_coordinates(node *, double, long *, double *); -void proml_printree(void); -void sigma(node *, double *, double *, double *); -void describe(node *); - -void prot_reconstr(node *, long); -void rectrav(node *, long, long); -void summarize(void); -void initpromlnode(node **, node **, node *, long, long, long *, long *, - initops, pointarray, pointarray, Char *, Char *, FILE *); -void dnaml_treeout(node *); -void buildnewtip(long, tree *); -void buildsimpletree(tree *); -void free_all_protx (long, pointarray); -void maketree(void); -void clean_up(void); -void globrearrange(void); -void proml_unroot(node* root, node** nodep, long nonodes) ; -void reallocsites(void); -void prot_freetable(void); -void free_pmatrix(long sib); -void alloclrsaves(void); -void freelrsaves(void); -void resetlrsaves(void); -/* function prototypes */ -#endif - - -long rcategs; -boolean haslengths; -long oldendsite=0; - -Char infilename[100], outfilename[100], intreename[100], outtreename[100], - catfilename[100], weightfilename[100]; -double *rate, *rrate, *probcat; -long nonodes2, sites, weightsum, categs, - datasets, ith, njumble, jumb; -long inseed, inseed0, parens; -boolean global, jumble, weights, trout, usertree, inserting = false, - ctgry, rctgry, auto_, hypstate, progress, mulsets, justwts, firstset, - improve, smoothit, polishing, lngths, gama, invar, usepmb, usepam, usejtt; -tree curtree, bestree, bestree2, priortree; -node *qwhere, *grbg, *addwhere; -double cv, alpha, lambda, invarfrac, bestyet; -long *enterorder; -steptr aliasweight; -contribarr *contribution, like, nulike, clai; -double **term, **slopeterm, **curveterm; -longer seed; -char *progname; -char aachar[26]="ARNDCQEGHILKMFPSTWYVBZX?*-"; -node **lrsaves; - -/* Local variables for maketree, propagated globally for c version: */ -long k, nextsp, numtrees, maxwhich, mx, mx0, mx1, shimotrees; -double dummy, maxlogl; -boolean succeeded, smoothed; -double **l0gf; -double *l0gl; -double **tbl; -Char ch, ch2; -long col; -vall *mp; - - -/* Variables introduced to allow for protein probability calculations */ -long max_num_sibs; /* maximum number of siblings used in a */ - /* nuview calculation. determines size */ - /* final size of pmatrices */ -double *eigmat; /* eig matrix variable */ -double **probmat; /* prob matrix variable */ -double ****dpmatrix; /* derivative of pmatrix */ -double ****ddpmatrix; /* derivative of xpmatrix */ -double *****pmatrices; /* matrix of probabilities of protien */ - /* conversion. The 5 subscripts refer */ - /* to sibs, rcategs, categs, final and */ - /* initial states, respectively. */ -double freqaa[20]; /* amino acid frequencies */ - -/* this JTT matrix decomposition thanks to Elisabeth Tillier */ -static double jtteigmat[] = -{0.0, -0.7031123, -0.6484345, -0.6086499, -0.5514432, --0.772664, -0.8643413, -1.0620756, -0.9965552, -1.1671808, --1.2222418,-0.4589201, -1.3103714, -1.4048038, -0.3170582, --0.347935, -1.5311677, -1.6021194, -1.7991454, -1.8911888}; - -static double jttprobmat[20][20] = -{{0.076999996, 0.051000003, 0.043000004, 0.051999998, 0.019999996, 0.041, - 0.061999994, 0.073999997, 0.022999999, 0.052000004, 0.090999997, 0.058999988, - 0.024000007, 0.04, 0.050999992, 0.069, 0.059000006, 0.014000008, 0.032000004, - 0.066000005}, - {0.015604455, -0.068062363, 0.020106264, 0.070723273, 0.011702977, 0.009674053, - 0.074000798, -0.169750458, 0.005560808, -0.008208636, -0.012305869, - -0.063730179, -0.005674643, -0.02116828, 0.104586169, 0.016480839, 0.016765139, - 0.005936994, 0.006046367, -0.0082877}, - {-0.049778281, -0.007118197, 0.003801272, 0.070749616, 0.047506147, - 0.006447017, 0.090522425, -0.053620432, -0.008508175, 0.037170603, - 0.051805545, 0.015413608, 0.019939916, -0.008431976, -0.143511376, - -0.052486072, -0.032116542, -0.000860626, -0.02535993, 0.03843545}, - {-0.028906423, 0.092952047, -0.009615343, -0.067870117, 0.031970392, - 0.048338335, -0.054396304, -0.135916654, 0.017780083, 0.000129242, - 0.031267424, 0.116333586, 0.007499746, -0.032153596, 0.033517051, - -0.013719269, -0.00347293, -0.003291821, -0.02158326, -0.008862168}, - {0.037181176, -0.023106564, -0.004482225, -0.029899635, 0.118139633, - -0.032298569, -0.04683198, 0.05566988, -0.012622847, 0.002023096, - -0.043921088, -0.04792557, -0.003452711, -0.037744513, 0.020822974, - 0.036580187, 0.02331425, -0.004807711, -0.017504496, 0.01086673}, - {0.044754061, -0.002503471, 0.019452517, -0.015611487, -0.02152807, - -0.013131425, -0.03465365, -0.047928912, 0.020608851, 0.067843095, - -0.122130014, 0.002521499, 0.013021646, -0.082891087, -0.061590119, - 0.016270856, 0.051468938, 0.002079063, 0.081019713, 0.082927944}, - {0.058917882, 0.007320741, 0.025278141, 0.000357541, -0.002831285, - -0.032453034, -0.010177288, -0.069447924, -0.034467324, 0.011422358, - -0.128478324, 0.04309667, -0.015319944, 0.113302422, -0.035052393, - 0.046885372, 0.06185183, 0.00175743, -0.06224497, 0.020282093}, - {-0.014562092, 0.022522921, -0.007094389, 0.03480089, -0.000326144, - -0.124039037, 0.020577906, -0.005056454, -0.081841576, -0.004381786, - 0.030826152, 0.091261631, 0.008878828, -0.02829487, 0.042718836, - -0.011180886, -0.012719227, -0.000753926, 0.048062375, -0.009399129}, - {0.033789571, -0.013512235, 0.088010984, 0.017580292, -0.006608005, - -0.037836971, -0.061344686, -0.034268357, 0.018190209, -0.068484614, - 0.120024744, -0.00319321, -0.001349477, -0.03000546, -0.073063759, - 0.081912399, 0.0635245, 0.000197, -0.002481798, -0.09108114}, - {-0.113947615, 0.019230545, 0.088819683, 0.064832765, 0.001801467, - -0.063829682, -0.072001633, 0.018429333, 0.057465965, 0.043901014, - -0.048050874, -0.001705918, 0.022637173, 0.017404665, 0.043877902, - -0.017089594, -0.058489485, 0.000127498, -0.029357194, 0.025943972}, - {0.01512923, 0.023603725, 0.006681954, 0.012360216, -0.000181447, - -0.023011838, -0.008960024, -0.008533239, 0.012569835, 0.03216118, - 0.061986403, -0.001919083, -0.1400832, -0.010669741, -0.003919454, - -0.003707024, -0.026806029, -0.000611603, -0.001402648, 0.065312824}, - {-0.036405351, 0.020816769, 0.011408213, 0.019787053, 0.038897829, - 0.017641789, 0.020858533, -0.006067252, 0.028617353, -0.064259496, - -0.081676567, 0.024421823, -0.028751676, 0.07095096, -0.024199434, - -0.007513119, -0.028108766, -0.01198095, 0.111761119, -0.076198809}, - {0.060831772, 0.144097327, -0.069151377, 0.023754576, -0.003322955, - -0.071618574, 0.03353154, -0.02795295, 0.039519769, -0.023453968, - -0.000630308, -0.098024591, 0.017672997, 0.003813378, -0.009266499, - -0.011192111, 0.016013873, -0.002072968, -0.010022044, -0.012526904}, - {-0.050776604, 0.092833081, 0.044069596, 0.050523021, -0.002628417, - 0.076542572, -0.06388631, -0.00854892, -0.084725311, 0.017401063, - -0.006262541, -0.094457679, -0.002818678, -0.0044122, -0.002883973, - 0.028729685, -0.004961596, -0.001498627, 0.017994575, -0.000232779}, - {-0.01894566, -0.007760205, -0.015160993, -0.027254587, 0.009800903, - -0.013443561, -0.032896517, -0.022734138, -0.001983861, 0.00256111, - 0.024823166, -0.021256768, 0.001980052, 0.028136263, -0.012364384, - -0.013782446, -0.013061091, 0.111173981, 0.021702122, 0.00046654}, - {-0.009444193, -0.042106824, -0.02535015, -0.055125574, 0.006369612, - -0.02945416, -0.069922064, -0.067221068, -0.003004999, 0.053624311, - 0.128862984, -0.057245803, 0.025550508, 0.087741073, -0.001119043, - -0.012036202, -0.000913488, -0.034864475, 0.050124813, 0.055534723}, - {0.145782464, -0.024348311, -0.031216873, 0.106174443, 0.00202862, - 0.02653866, -0.113657267, -0.00755018, 0.000307232, -0.051241158, - 0.001310685, 0.035275877, 0.013308898, 0.002957626, -0.002925034, - -0.065362319, -0.071844582, 0.000475894, -0.000112419, 0.034097762}, - {0.079840455, 0.018769331, 0.078685899, -0.084329807, -0.00277264, - -0.010099754, 0.059700608, -0.019209715, -0.010442992, -0.042100476, - -0.006020556, -0.023061786, 0.017246106, -0.001572858, -0.006703785, - 0.056301316, -0.156787357, -0.000303638, 0.001498195, 0.051363455}, - {0.049628261, 0.016475144, 0.094141653, -0.04444633, 0.005206131, - -0.001827555, 0.02195624, 0.013066683, -0.010415582, -0.022338403, - 0.007837197, -0.023397671, -0.002507095, 0.005177694, 0.017109561, - -0.202340113, 0.069681441, 0.000120736, 0.002201146, 0.004670849}, - {0.089153689, 0.000233354, 0.010826822, -0.004273519, 0.001440618, - 0.000436077, 0.001182351, -0.002255508, -0.000700465, 0.150589876, - -0.003911914, -0.00050154, -0.004564983, 0.00012701, -0.001486973, - -0.018902754, -0.054748555, 0.000217377, -0.000319302, -0.162541651}}; - -/* this PMB matrix decomposition due to Elisabeth Tillier */ -static double pmbeigmat[20] = -{0.0000001586972220,-1.8416770496147100, -1.6025046986139100,-1.5801012515121300, --1.4987794099715900,-1.3520794233801900,-1.3003469390479700,-1.2439503327631300, --1.1962574080244200,-1.1383730501367500,-1.1153278910708000,-0.4934843510654760, --0.5419014550215590,-0.9657997830826700,-0.6276075673757390,-0.6675927795018510, --0.6932641383465870,-0.8897872681859630,-0.8382698977371710,-0.8074694642446040}; - -static double pmbprobmat[20][20] = -{{0.0771762457248147,0.0531913844998640,0.0393445076407294,0.0466756566755510, -0.0286348361997465,0.0312327748383639,0.0505410248721427,0.0767106611472993, -0.0258916271688597,0.0673140562194124,0.0965705469252199,0.0515979465932174, -0.0250628079438675,0.0503492018628350,0.0399908189418273,0.0641898881894471, -0.0517539616710987,0.0143507440546115,0.0357994592438322,0.0736218495862984}, -{0.0368263046116572,-0.0006728917107827,0.0008590805287740,-0.0002764255356960, -0.0020152937187455,0.0055743720652960,0.0003213317669367,0.0000449190281568, --0.0004226254397134,0.1805040629634510,-0.0272246813586204,0.0005904606533477, --0.0183743200073889,-0.0009194625608688,0.0008173657533167,-0.0262629806302238, -0.0265738757209787,0.0002176606241904,0.0021315644838566,-0.1823229927207580}, -{-0.0194800075560895,0.0012068088610652,-0.0008803318319596,-0.0016044273960017, --0.0002938633803197,-0.0535796754602196,0.0155163896648621,-0.0015006360762140, -0.0021601372013703,0.0268513218744797,-0.1085292493742730,0.0149753083138452, -0.1346457366717310,-0.0009371698759829,0.0013501708044116,0.0346352293103622, --0.0276963770242276,0.0003643142783940,0.0002074817333067,-0.0174108903914110}, -{0.0557839400850153,0.0023271577185437,0.0183481103396687,0.0023339480096311, -0.0002013267015151,-0.0227406863569852,0.0098644845475047,0.0064721276774396, -0.0001389408104210,-0.0473713878768274,-0.0086984445005797,0.0026913674934634, -0.0283724052562196,0.0001063665179457,0.0027442574779383,-0.1875312134708470, -0.1279864877057640,0.0005103347834563,0.0003155113168637,0.0081451082759554}, -{0.0037510125027265,0.0107095920636885,0.0147305410328404,-0.0112351252180332, --0.0001500408626446,-0.1523450933729730,0.0611532413339872,-0.0005496748939503, -0.0048714378736644,-0.0003826320053999,0.0552010244407311,0.0482555671001955, --0.0461664995115847,-0.0021165008617978,-0.0004574454232187,0.0233755883688949, --0.0035484915422384,0.0009090698422851,0.0013840637687758,-0.0073895139302231}, -{-0.0111512564930024,0.1025460064723080,0.0396772456883791,-0.0298408501361294, --0.0001656742634733,-0.0079876311843289,0.0712644184507945,-0.0010780604625230, --0.0035880882043592,0.0021070399334252,0.0016716329894279,-0.1810123023850110, -0.0015141703608724,-0.0032700852781804,0.0035503782441679,0.0118634302028026, -0.0044561606458028,-0.0001576678495964,0.0023470722225751,-0.0027457045397157}, -{0.1474525743949170,-0.0054432538500293,0.0853848892349828,-0.0137787746207348, --0.0008274830358513,0.0042248844582553,0.0019556229305563,-0.0164191435175148, --0.0024501858854849,0.0120908948084233,-0.0381456105972653,0.0101271614855119, --0.0061945941321859,0.0178841099895867,-0.0014577779202600,-0.0752120602555032, --0.1426985695849920,0.0002862275078983,-0.0081191734261838,0.0313401149422531}, -{0.0542034611735289,-0.0078763926211829,0.0060433542506096,0.0033396210615510, -0.0013965072374079,0.0067798903832256,-0.0135291136622509,-0.0089982442731848, --0.0056744537593887,-0.0766524225176246,0.1881210263933930,-0.0065875518675173, -0.0416627569300375,-0.0953804133524747,-0.0012559228448735,0.0101622644292547, --0.0304742453119050,0.0011702318499737,0.0454733434783982,-0.1119239362388150}, -{0.1069409037912470,0.0805064400880297,-0.1127352030714600,0.1001181253523260, --0.0021480427488769,-0.0332884841459003,-0.0679837575848452,-0.0043812841356657, -0.0153418716846395,-0.0079441315103188,-0.0121766182046363,-0.0381127991037620, --0.0036338726532673,0.0195324059593791,-0.0020165963699984,-0.0061222685010268, --0.0253761448771437,-0.0005246410999057,-0.0112205170502433,0.0052248485517237}, -{-0.0325247648326262,0.0238753651653669,0.0203684886605797,0.0295666232678825, --0.0003946714764213,-0.0157242718469554,-0.0511737848084862,0.0084725632040180, --0.0167068828528921,0.0686962159427527,-0.0659702890616198,-0.0014289912494271, --0.0167000964093416,-0.1276689083678200,0.0036575057830967,-0.0205958145531018, -0.0000368919612829,0.0014413626622426,0.1064360941926030,0.0863372661517408}, -{-0.0463777468104402,0.0394712148670596,0.1118686750747160,0.0440711686389031, --0.0026076286506751,-0.0268454015202516,-0.1464943067133240,-0.0137514051835380, --0.0094395514284145,-0.0144124844774228,0.0249103379323744,-0.0071832157138676, -0.0035592787728526,0.0415627419826693,0.0027040097365669,0.0337523666612066, -0.0316121324137152,-0.0011350177559026,-0.0349998884574440,-0.0302651879823361}, -{0.0142360925194728,0.0413145623127025,0.0324976427846929,0.0580930922002398, --0.0586974207121084,0.0202001168873069,0.0492204086749069,0.1126593173463060, -0.0116620013776662,-0.0780333711712066,-0.1109786767320410,0.0407775100936731, --0.0205013161312652,-0.0653458585025237,0.0347351829703865,0.0304448983224773, -0.0068813748197884,-0.0189002309261882,-0.0334507528405279,-0.0668143558699485}, -{-0.0131548829657936,0.0044244322828034,-0.0050639951827271,-0.0038668197633889, --0.1536822386530220,0.0026336969165336,0.0021585651200470,-0.0459233839062969, -0.0046854727140565,0.0393815434593599,0.0619554007991097,0.0027456299925622, -0.0117574347936383,0.0373018612990383,0.0024818527553328,-0.0133956606027299, --0.0020457128424105,0.0154178819990401,0.0246524142683911,0.0275363065682921}, -{-0.1542307272455030,0.0364861558267547,-0.0090880407008181,0.0531673937889863, -0.0157585615170580,0.0029986538457297,0.0180194047699875,0.0652152443589317, -0.0266842840376180,0.0388457366405908,0.0856237634510719,0.0126955778952183, -0.0099593861698250,-0.0013941794862563,0.0294065511237513,-0.1151906949298290, --0.0852991447389655,0.0028699120202636,-0.0332087026659522,0.0006811857297899}, -{0.0281300736924501,-0.0584072081898638,-0.0178386569847853,-0.0536470338171487, --0.0186881656029960,-0.0240008730656106,-0.0541064820498883,0.2217137098936020, --0.0260500001542033,0.0234505236798375,0.0311127151218573,-0.0494139126682672, -0.0057093465049849,0.0124937286655911,-0.0298322975915689,0.0006520211333102, --0.0061018680727128,-0.0007081999479528,-0.0060523759094034,0.0215845995364623}, -{0.0295321046399105,-0.0088296411830544,-0.0065057049917325,-0.0053478115612781, --0.0100646496794634,-0.0015473619084872,0.0008539960632865,-0.0376381933046211, --0.0328135588935604,0.0672161874239480,0.0667626853916552,-0.0026511651464901, -0.0140451514222062,-0.0544836996133137,0.0427485157912094,0.0097455780205802, -0.0177309072915667,-0.0828759701187452,-0.0729504795471370,0.0670731961252313}, -{0.0082646581043963,-0.0319918630534466,-0.0188454445200422,-0.0374976353856606, -0.0037131290686848,-0.0132507796987883,-0.0306958830735725,-0.0044119395527308, --0.0140786756619672,-0.0180512599925078,-0.0208243802903953,-0.0232202769398931, --0.0063135878270273,0.0110442171178168,0.1824538048228460,-0.0006644614422758, --0.0069909097436659,0.0255407650654681,0.0099119399501151,-0.0140911517070698}, -{0.0261344441524861,-0.0714454044548650,0.0159436926233439,0.0028462736216688, --0.0044572637889080,-0.0089474834434532,-0.0177570282144517,-0.0153693244094452, -0.1160919467206400,0.0304911481385036,0.0047047513411774,-0.0456535116423972, -0.0004491494948617,-0.0767108879444462,-0.0012688533741441,0.0192445965934123, -0.0202321954782039,0.0281039933233607,-0.0590403018490048,0.0364080426546883}, -{0.0115826306265004,0.1340228176509380,-0.0236200652949049,-0.1284484655137340, --0.0004742338006503,0.0127617346949511,-0.0428560878860394,0.0060030732454125, -0.0089182609926781,0.0085353834972860,0.0048464809638033,0.0709740071429510, -0.0029940462557054,-0.0483434904493132,-0.0071713680727884,-0.0036840391887209, -0.0031454003250096,0.0246243550241551,-0.0449551277644180,0.0111449232769393}, -{0.0140356721886765,-0.0196518236826680,0.0030517022326582,0.0582672093364850, --0.0000973895685457,0.0021704767224292,0.0341806268602705,-0.0152035987563018, --0.0903198657739177,0.0259623214586925,0.0155832497882743,-0.0040543568451651, -0.0036477631918247,-0.0532892744763217,-0.0142569373662724,0.0104500681408622, -0.0103483945857315,0.0679534422398752,-0.0768068882938636,0.0280289727046158}} -; - - -static double pameigmat[] = {0.0, -0.2350753691875762, -0.2701991863800379, - -0.2931612442853115, -0.4262492032364507, -0.5395980482561625, - -0.7141172690079523, -0.7392844756151318, -0.7781761342200766, - -0.810032066366362, -0.875299712761124, -1.048227332164386, - -1.109594097332267, -1.298616073142234, -1.342036228188581, - -1.552599145527578, -1.658762802054814, -1.74893445623765, - -1.933280832903272, -2.206353522613025}; - -static double pamprobmat[20][20] = - {{0.087683339901135, 0.04051291829598762, 0.04087846315185977, - 0.04771603459744777, 0.03247095396561266, 0.03784612688594957, - 0.0504933695604875, 0.0898249006830755, 0.03285885059543713, - 0.0357514442352119, 0.0852464099207521, 0.07910313444070642, - 0.01488243946396588, 0.04100101908956829, 0.05158026947089499, - 0.06975497205982451, 0.05832757042475474, 0.00931264523877807, - 0.03171540880870517, 0.06303972920984541}, - {0.01943453646811026, -0.004492574160484092, 0.007694891061220776, - 0.01278399096887701, 0.0106157418450234, 0.007542140341575122, - 0.01326994069032819, 0.02615565199894889, 0.003123125764490066, - 0.002204507682495444, -0.004782898215768979, 0.01204241965177619, - 0.0007847400096924341, -0.03043626073172116, 0.01221202591902536, - 0.01100527004684405, 0.01116495631339549, -0.0925364931988571, - -0.02622065387931562, 0.00843494142432107}, - {0.01855357100209072, 0.01493642835763868, 0.0127983090766285, - 0.0200533250704364, -0.1681898360107787, 0.01551657969909255, - 0.02128060163107209, 0.03100633591848964, 0.00845480845269879, - 0.000927149370785571, 0.00937207565817036, 0.03490557769673472, - 0.00300443019551563, -0.02590837220264415, 0.01329376859943192, - 0.006854110889741407, 0.01102593860528263, 0.003360844186685888, - -0.03459712356647764, 0.003351477369404443}, - {0.02690642688200102, 0.02131745801890152, 0.0143626616005213, - 0.02405101425725929, 0.05041008641436849, 0.01430925051050233, - 0.02362114036816964, 0.04688381789373886, 0.005250115453626377, - -0.02040112168595516, -0.0942720776915669, 0.03773004996758644, - -0.00822831940782616, -0.1164872809439224, 0.02286281877257392, - 0.02849551240669926, 0.01468856796295663, 0.02377110964207936, - -0.094380545436577, -0.02089068498518036}, - {0.00930172577225213, 0.01493463068441099, 0.020186920775608, - 0.02892154953912524, -0.01224593358361567, 0.01404228329986624, - 0.02671186617119041, 0.04537535161795231, 0.02229995804098249, - -0.04635704133961575, -0.1966910360247138, 0.02796648065439046, - -0.02263484732621436, 0.0440490503242072, 0.01148782948302166, - 0.01989170531824069, 0.001306805142981245, -0.005676690969116321, - 0.07680476281625202, -0.07967537039721849}, - {0.06602274245435476, -0.0966661981471856, -0.005241648783844579, - 0.00859135188171146, -0.007762129660943368, -0.02888965572526196, - 0.003592291525888222, 0.1668410669287673, -0.04082039290551406, - 0.005233775047553415, -0.01758244726137135, -0.1493955762326898, - -0.00855819137835548, 0.004211419253492328, 0.01929306335052688, - 0.03008056746359405, 0.0190444422412472, 0.005577189741419315, - 0.0000874156155112068, 0.02634091459108298}, - {0.01933897472880726, 0.05874583569377844, -0.02293534606228405, - -0.07206314017962175, -0.004580681581546643, -0.0628814337610561, - -0.0850783812795136, 0.07988417636610614, -0.0852798990133397, - 0.01649047166155952, -0.05416647263757423, 0.1089834536254064, - 0.005093403979413865, 0.02520300254161142, 0.0005951431406455604, - 0.02441251821224675, 0.02796099482240553, -0.002574933994926502, - -0.007172237553012804, 0.03002455129086954}, - {0.04041118479094272, -0.002476225672095412, -0.01494505811263243, - -0.03759443758599911, -0.00892246902492875, -0.003634714029239211, - -0.03085671837973749, -0.126176309029931, 0.005814031139083794, - 0.01313561962646063, -0.04760487162503322, -0.0490563712725484, - -0.005082243450421558, -0.01213634309383557, 0.1806666927079249, - 0.02111663336185495, 0.02963486860587087, -0.0000175020101657785, - 0.01197155383597686, 0.0357526792184636}, - {-0.01184769557720525, 0.01582776076338872, -0.006570708266564639, - -0.01471915653734024, 0.00894343616503608, 0.00562664968033149, - -0.01465878888356943, 0.05365282692645818, 0.00893509735776116, - -0.05879312944436473, 0.0806048683392995, -0.007722897986905326, - -0.001819943882718859, 0.0942535573077267, 0.07483883782251654, - 0.004354639673913651, -0.02828804845740341, -0.001318222184691827, - -0.07613149604246563, -0.1251675867732172}, - {0.00834167031558193, -0.01509357596974962, 0.007098172811092488, - 0.03127677418040319, 0.001992448468465455, 0.00915441566808454, - 0.03430175973499201, -0.0730648147535803, -0.001402707145575659, - 0.04780949194330815, -0.1115035603461273, -0.01292297197609604, - -0.005056270550868528, 0.1112053349612027, -0.03801929822379964, - -0.001191241001736563, 0.01872874622910247, 0.0005314214903865993, - -0.0882576318311789, 0.07607183599610171}, - {-0.01539460099727769, 0.04988596184297883, -0.01187240760647617, - -0.06987843637091853, -0.002490472846497859, 0.01009857892494956, - -0.07473588067847209, 0.0906009925879084, 0.1243612446505172, - 0.02152806401345371, -0.03504879644860233, -0.06680752427613573, - -0.005574485153629651, 0.001518282948127752, -0.01999168507510701, - -0.01478606199529457, -0.02203749419458996, -0.00132680708294333, - -0.01137505997867614, 0.05332658773667142}, - {-0.06104378736432388, 0.0869446603393548, -0.03298331234537257, - 0.03128515657456024, 0.003906358569208259, 0.03578694104193928, - 0.06241936133189683, 0.06182827284921748, -0.05566564263245907, - 0.02640868588189002, -0.01349751243059039, -0.05507866642582638, - -0.006671347738489326, -0.001470096466016046, 0.05185743641479938, - -0.07494697511168257, -0.1175185439057584, -0.001188074094105709, - 0.00937934805737347, 0.05024773745437657}, - {-0.07252555582124737, -0.116554459356382, 0.003605361887406413, - -0.00836518656029184, 0.004615715410745561, 0.005105376617651312, - -0.00944938657024391, 0.05602449420950007, 0.02722719610561933, - 0.01959357494748446, -0.0258655103753962, 0.1440733975689835, - 0.01446782819722976, 0.003718896062070054, 0.05825843045655135, - -0.06230154142733073, -0.07833704962300169, 0.003160836143568724, - -0.001169873777936648, 0.03471745590503304}, - {-0.03204352258752698, 0.01019272923862322, 0.04509668708733181, - 0.05756522429120813, -0.0004601149081726732, -0.0984718150777423, - -0.01107826100664925, -0.005680277810520585, 0.01962359392320817, - 0.01550006899131986, 0.05143956925922197, 0.02462476682588468, - -0.0888843861002653, -0.00171553583659411, 0.01606331750661664, - 0.001176847743518958, -0.02070972978912828, -0.000341523293579971, - -0.002654732745607882, 0.02075709428885848}, - {0.03595199666430258, -0.02800219615234468, -0.04341570015493925, - -0.0748275906176658, 0.0001051403676377422, 0.1137431321746627, - 0.005852087565974318, 0.003443037513847801, -0.02481931657706633, - -0.003651181839831423, 0.03195794176786321, 0.04135411406392523, - -0.07562030263210619, 0.001769332364699, -0.01984381173403915, - -0.005029750745010152, 0.02649253902476472, 0.000518085571702734, - 0.001062936684474851, 0.01295950668914449}, - {-0.16164552322896, -0.0006050035060464324, 0.0258380054414968, - 0.003188424740960557, -0.0002058911341821877, 0.03157555987384681, - -0.01678913462596107, 0.03096216145389774, -0.0133791110666919, - 0.1125249625204277, -0.00769017706442472, -0.02653938062180483, - -0.002555329863523985, -0.00861833362947954, 0.01775148884754278, - 0.02529310679774722, 0.0826243417011238, -0.0001036728183032624, - 0.001963562313294209, -0.0935900561309786}, - {0.1652394174588469, -0.002814245280784351, -0.0328982001821263, - -0.02000104712964131, 0.0002208121995725443, -0.02733462178511839, - 0.02648078162927627, -0.01788316626401427, 0.01630747623755998, - 0.1053849023838147, -0.005447706553811218, 0.01810876922536839, - -0.001808914710282444, -0.007687912115607397, -0.01332593672114388, - -0.02110750894891371, -0.07456116592983384, 0.000219072589592394, - 0.001270886972191055, -0.1083616930749109}, - {0.02453279389716254, -0.005820072356487439, 0.100260287284095, - 0.01277522280305745, -0.003184943445296999, 0.05814689527984152, - -0.0934012278200201, -0.03017986487349484, -0.03136625380994165, - 0.00988668352785117, -0.00358900410973142, -0.02017443675004764, - 0.000915384582922184, -0.001460963415183106, -0.01370112443251124, - 0.1130040979284457, -0.1196161771323699, -0.0005800211204222045, - -0.0006153403201024954, 0.00416806428223025}, - {-0.0778089244252535, -0.007055161182430869, -0.0349307504860869, - -0.0811915584276571, -0.004689825871599125, -0.03726108871471753, - 0.1072225647141469, -0.00917015113070944, 0.01381628985996913, - -0.00123227881492089, 0.001815954515275675, 0.005708744099349901, - -0.0001448985044877925, -0.001306578795561384, -0.006992743514185243, - 0.1744720240732789, -0.05353628497814023, -0.0007613684227234787, - -0.0003550282315997644, 0.01340106423804634}, - {-0.0159527329868513, -0.007622151568160798, -0.1389875105184963, - 0.1165051999914764, -0.002217810389087748, 0.01550003226513692, - -0.07427664222230566, -0.003371438498619264, 0.01385754771325365, - 0.004759020167383304, 0.001624078805220564, 0.02011638303109029, - -0.001717827082842178, -0.0007424036708598594, -0.003978884451898934, - 0.0866418927301209, -0.01280817739158123, -0.00023039242454603, - 0.002309205802479111, 0.0005926106991001195}}; - - -void init_protmats() -{ - long l, m; - - eigmat = (double *) Malloc (20 * sizeof(double)); - for (l = 0; l <= 19; l++) - if (usejtt) - eigmat[l] = jtteigmat[l]; - else { - if (usepmb) - eigmat[l] = pmbeigmat[l]; - else - eigmat[l] = pameigmat[l]; - } - probmat = (double **) Malloc (20 * sizeof(double *)); - for (l = 0; l <= 19; l++) - for (m= 0; m <= 19; m++) - if (usejtt) - probmat[l] = jttprobmat[l]; - else { - if (usepmb) - probmat[l] = pmbprobmat[l]; - else - probmat[l] = pamprobmat[l]; - } -} /* init_protmats */ - - -void getoptions() -{ - /* interactively set options */ - long i, loopcount, loopcount2; - Char ch; - boolean didchangecat, didchangercat; - double probsum; - - fprintf(outfile, "\nAmino acid sequence Maximum Likelihood"); - fprintf(outfile, " method, version %s\n\n",VERSION); - putchar('\n'); - ctgry = false; - didchangecat = false; - rctgry = false; - didchangercat = false; - categs = 1; - rcategs = 1; - auto_ = false; - gama = false; - global = false; - hypstate = false; - improve = false; - invar = false; - jumble = false; - njumble = 1; - lngths = false; - lambda = 1.0; - outgrno = 1; - outgropt = false; - trout = true; - usertree = false; - weights = false; - printdata = false; - progress = true; - treeprint = true; - usejtt = true; - usepmb = false; - usepam = false; - interleaved = true; - loopcount = 0; - for (;;){ - cleerhome(); - printf("Amino acid sequence Maximum Likelihood"); - printf(" method, version %s\n\n",VERSION); - printf("Settings for this run:\n"); - printf(" U Search for best tree? %s\n", - (usertree ? "No, use user trees in input file" : "Yes")); - if (usertree) { - printf(" L Use lengths from user trees? %s\n", - (lngths ? "Yes" : "No")); - } - printf(" P JTT, PMB or PAM probability model? %s\n", - usejtt ? "Jones-Taylor-Thornton" : - usepmb ? "Henikoff/Tillier PMB" : "Dayhoff PAM"); - printf(" C One category of sites?"); - if (!ctgry || categs == 1) - printf(" Yes\n"); - else - printf(" %ld categories of sites\n", categs); - printf(" R Rate variation among sites?"); - if (!rctgry) - printf(" constant rate of change\n"); - else { - if (gama) - printf(" Gamma distributed rates\n"); - else { - if (invar) - printf(" Gamma+Invariant sites\n"); - else - printf(" user-defined HMM of rates\n"); - } - printf(" A Rates at adjacent sites correlated?"); - if (!auto_) - printf(" No, they are independent\n"); - else - printf(" Yes, mean block length =%6.1f\n", 1.0 / lambda); - } - printf(" W Sites weighted? %s\n", - (weights ? "Yes" : "No")); - if (!usertree) { - printf(" S Speedier but rougher analysis? %s\n", - (improve ? "No, not rough" : "Yes")); - printf(" G Global rearrangements? %s\n", - (global ? "Yes" : "No")); - } - if (!usertree) { - printf(" J Randomize input order of sequences?"); - if (jumble) - printf(" Yes (seed =%8ld,%3ld times)\n", inseed0, njumble); - else - printf(" No. Use input order\n"); - } - printf(" O Outgroup root? %s%3ld\n", - (outgropt ? "Yes, at sequence number" : - "No, use as outgroup species"),outgrno); - printf(" M Analyze multiple data sets?"); - if (mulsets) - printf(" Yes, %2ld %s\n", datasets, - (justwts ? "sets of weights" : "data sets")); - else - printf(" No\n"); - printf(" I Input sequences interleaved? %s\n", - (interleaved ? "Yes" : "No, sequential")); - printf(" 0 Terminal type (IBM PC, ANSI, none)? %s\n", - (ibmpc ? "IBM PC" : ansi ? "ANSI" : "(none)")); - printf(" 1 Print out the data at start of run %s\n", - (printdata ? "Yes" : "No")); - printf(" 2 Print indications of progress of run %s\n", - (progress ? "Yes" : "No")); - printf(" 3 Print out tree %s\n", - (treeprint ? "Yes" : "No")); - printf(" 4 Write out trees onto tree file? %s\n", - (trout ? "Yes" : "No")); - printf(" 5 Reconstruct hypothetical sequences? %s\n", - (hypstate ? "Yes" : "No")); - printf("\n Y to accept these or type the letter for one to change\n"); -#ifdef WIN32 - phyFillScreenColor(); -#endif - scanf("%c%*[^\n]", &ch); - getchar(); - if (ch == '\n') - ch = ' '; - uppercase(&ch); - if (ch == 'Y') - break; - if (strchr("UPLCRAWSGJOMI012345",ch) != NULL){ - switch (ch) { - - case 'C': - ctgry = !ctgry; - if (ctgry) { - printf("\nSitewise user-assigned categories:\n\n"); - initcatn(&categs); - if (rate){ - free(rate); - } - rate = (double *) Malloc(categs * sizeof(double)); - didchangecat = true; - initcategs(categs, rate); - } - break; - - case 'P': - if (usejtt) { - usejtt = false; - usepmb = true; - } else { - if (usepmb) { - usepmb = false; - usepam = true; - } else { - usepam = false; - usejtt = true; - } - } - break; - - case 'R': - if (!rctgry) { - rctgry = true; - gama = true; - } else { - if (gama) { - gama = false; - invar = true; - } else { - if (invar) - invar = false; - else - rctgry = false; - } - } - break; - - case 'A': - auto_ = !auto_; - if (auto_) - initlambda(&lambda); - break; - - case 'W': - weights = !weights; - break; - - case 'S': - improve = !improve; - break; - - case 'G': - global = !global; - break; - - case 'J': - jumble = !jumble; - if (jumble) - initjumble(&inseed, &inseed0, seed, &njumble); - else njumble = 1; - break; - - case 'L': - lngths = !lngths; - break; - - case 'O': - outgropt = !outgropt; - if (outgropt) - initoutgroup(&outgrno, spp); - break; - - case 'U': - usertree = !usertree; - break; - - case 'M': - mulsets = !mulsets; - if (mulsets) { - printf("Multiple data sets or multiple weights?"); - loopcount2 = 0; - do { - printf(" (type D or W)\n"); -#ifdef WIN32 - phyFillScreenColor(); -#endif - scanf("%c%*[^\n]", &ch2); - getchar(); - if (ch2 == '\n') - ch2 = ' '; - uppercase(&ch2); - countup(&loopcount2, 10); - } while ((ch2 != 'W') && (ch2 != 'D')); - justwts = (ch2 == 'W'); - if (justwts) - justweights(&datasets); - else - initdatasets(&datasets); - if (!jumble) { - jumble = true; - initjumble(&inseed, &inseed0, seed, &njumble); - } - } - break; - - case 'I': - interleaved = !interleaved; - break; - - case '0': - initterminal(&ibmpc, &ansi); - break; - - case '1': - printdata = !printdata; - break; - - case '2': - progress = !progress; - break; - - case '3': - treeprint = !treeprint; - break; - - case '4': - trout = !trout; - break; - - case '5': - hypstate = !hypstate; - break; - } - } else - printf("Not a possible option!\n"); - countup(&loopcount, 100); - } - if (gama || invar) { - loopcount = 0; - do { - printf( -"\nCoefficient of variation of substitution rate among sites (must be positive)\n"); - printf( - " In gamma distribution parameters, this is 1/(square root of alpha)\n"); -#ifdef WIN32 - phyFillScreenColor(); -#endif - scanf("%lf%*[^\n]", &cv); - getchar(); - countup(&loopcount, 10); - } while (cv <= 0.0); - alpha = 1.0 / (cv * cv); - } - if (!rctgry) - auto_ = false; - if (rctgry) { - printf("\nRates in HMM"); - if (invar) - printf(" (including one for invariant sites)"); - printf(":\n"); - initcatn(&rcategs); - if (probcat){ - free(probcat); - free(rrate); - } - probcat = (double *) Malloc(rcategs * sizeof(double)); - rrate = (double *) Malloc(rcategs * sizeof(double)); - didchangercat = true; - if (gama) - initgammacat(rcategs, alpha, rrate, probcat); - else { - if (invar) { - loopcount = 0; - do { - printf("Fraction of invariant sites?\n"); - scanf("%lf%*[^\n]", &invarfrac); - getchar(); - countup (&loopcount, 10); - } while ((invarfrac <= 0.0) || (invarfrac >= 1.0)); - initgammacat(rcategs-1, alpha, rrate, probcat); - for (i = 0; i < rcategs-1; i++) - probcat[i] = probcat[i]*(1.0-invarfrac); - probcat[rcategs-1] = invarfrac; - rrate[rcategs-1] = 0.0; - } else { - initcategs(rcategs, rrate); - initprobcat(rcategs, &probsum, probcat); - } - } - } - if (!didchangercat){ - rrate = (double *) Malloc(rcategs*sizeof(double)); - probcat = (double *) Malloc(rcategs*sizeof(double)); - rrate[0] = 1.0; - probcat[0] = 1.0; - } - if (!didchangecat) { - rate = (double *) Malloc(categs*sizeof(double)); - rate[0] = 1.0; - } - init_protmats(); -} /* getoptions */ - - -void makeprotfreqs() -{ - /* calculate amino acid frequencies based on eigmat */ - long i, mineig; - - mineig = 0; - for (i = 0; i <= 19; i++) - if (fabs(eigmat[i]) < fabs(eigmat[mineig])) - mineig = i; - memcpy(freqaa, probmat[mineig], 20 * sizeof(double)); - for (i = 0; i <= 19; i++) - freqaa[i] = fabs(freqaa[i]); -} /* makeprotfreqs */ - -void reallocsites() -{ - long i; - for (i = 0; i < spp; i++) - y[i] = (Char *) Malloc(sites*sizeof(Char)); - - free(category); - free(weight); - free(alias); - free(ally); - free(location); - free(aliasweight); - - category = (long *) Malloc(sites*sizeof(long)); - weight = (long *) Malloc(sites*sizeof(long)); - alias = (long *) Malloc(sites*sizeof(long)); - ally = (long *) Malloc(sites*sizeof(long)); - location = (long *) Malloc(sites*sizeof(long)); - aliasweight = (long *) Malloc(sites*sizeof(long)); - for (i = 0; i < sites; i++) - category[i] = 1; - for (i = 0; i < sites; i++) - weight[i] = 1; - makeweights(); -} - -void allocrest() -{ - long i; - - y = (Char **) Malloc(spp*sizeof(Char *)); - for (i = 0; i < spp; i++) - y[i] = (Char *) Malloc(sites*sizeof(Char)); - nayme = (naym *) Malloc(spp*sizeof(naym)); - enterorder = (long *) Malloc(spp*sizeof(long)); - category = (long *) Malloc(sites*sizeof(long)); - weight = (long *) Malloc(sites*sizeof(long)); - alias = (long *) Malloc(sites*sizeof(long)); - ally = (long *) Malloc(sites*sizeof(long)); - location = (long *) Malloc(sites*sizeof(long)); - aliasweight = (long *) Malloc(sites*sizeof(long)); -} /* allocrest */ - - -void doinit() -{ /* initializes variables */ - inputnumbers(&spp, &sites, &nonodes2, 1); - getoptions(); - if (!usertree) - nonodes2--; - makeprotfreqs(); - if (printdata) - fprintf(outfile, "%2ld species, %3ld sites\n", spp, sites); - alloctree(&curtree.nodep, nonodes2, usertree); - allocrest(); - if (usertree) - return; - alloctree(&bestree.nodep, nonodes2, 0); - alloctree(&priortree.nodep, nonodes2, 0); - if (njumble <= 1) - return; - alloctree(&bestree2.nodep, nonodes2, 0); -} /* doinit */ - - -void inputoptions() -{ - long i; - - if (!firstset) { - samenumsp(&sites, ith); - reallocsites(); - } - if (firstset) { - for (i = 0; i < sites; i++) - category[i] = 1; - for (i = 0; i < sites; i++) - weight[i] = 1; - } - if (justwts || weights) - inputweights(sites, weight, &weights); - weightsum = 0; - for (i = 0; i < sites; i++) - weightsum += weight[i]; - if ((ctgry && categs > 1) && (firstset || !justwts)) { - inputcategs(0, sites, category, categs, "ProML"); - if (printdata) - printcategs(outfile, sites, category, "Site categories"); - } - if (weights && printdata) - printweights(outfile, 0, sites, weight, "Sites"); - fprintf(outfile, "%s model of amino acid change\n\n", - (usejtt ? "Jones-Taylor-Thornton" : - usepmb ? "Henikoff/Tillier PMB" : "Dayhoff PAM")); -} /* inputoptions */ - - -void input_protdata(long chars) -{ - /* input the names and sequences for each species */ - /* used by proml */ - long i, j, k, l, basesread, basesnew; - Char charstate; - boolean allread, done; - - if (printdata) - headings(chars, "Sequences", "---------"); - basesread = 0; - basesnew = 0; - allread = false; - while (!(allread)) { - /* eat white space -- if the separator line has spaces on it*/ - do { - charstate = gettc(infile); - } while (charstate == ' ' || charstate == '\t'); - ungetc(charstate, infile); - if (eoln(infile)) - scan_eoln(infile); - i = 1; - while (i <= spp) { - if ((interleaved && basesread == 0) || !interleaved) - initname(i - 1); - j = (interleaved) ? basesread : 0; - done = false; - while (!done && !eoff(infile)) { - if (interleaved) - done = true; - while (j < chars && !(eoln(infile) || eoff(infile))) { - charstate = gettc(infile); - if (charstate == '\n' || charstate == '\t') - charstate = ' '; - if (charstate == ' ' || (charstate >= '0' && charstate <= '9')) - continue; - uppercase(&charstate); - if ((strchr("ABCDEFGHIKLMNPQRSTVWXYZ*?-", charstate)) == NULL) { - printf("ERROR: bad amino acid: %c at position %ld of species %ld\n", - charstate, j+1, i); - if (charstate == '.') { - printf(" Periods (.) may not be used as gap characters.\n"); - printf(" The correct gap character is (-)\n"); - } - exxit(-1); - } - j++; - y[i - 1][j - 1] = charstate; - } - if (interleaved) - continue; - if (j < chars) - scan_eoln(infile); - else if (j == chars) - done = true; - } - if (interleaved && i == 1) - basesnew = j; - - scan_eoln(infile); - - if ((interleaved && j != basesnew) || - (!interleaved && j != chars)) { - printf("ERROR: SEQUENCES OUT OF ALIGNMENT AT POSITION %ld.\n", j); - exxit(-1); - } - i++; - } - - if (interleaved) { - basesread = basesnew; - allread = (basesread == chars); - } else - allread = (i > spp); - } - if (!printdata) - return; - for (i = 1; i <= ((chars - 1) / 60 + 1); i++) { - for (j = 1; j <= spp; j++) { - for (k = 0; k < nmlngth; k++) - putc(nayme[j - 1][k], outfile); - fprintf(outfile, " "); - l = i * 60; - if (l > chars) - l = chars; - for (k = (i - 1) * 60 + 1; k <= l; k++) { - if (j > 1 && y[j - 1][k - 1] == y[0][k - 1]) - charstate = '.'; - else - charstate = y[j - 1][k - 1]; - putc(charstate, outfile); - if (k % 10 == 0 && k % 60 != 0) - putc(' ', outfile); - } - putc('\n', outfile); - } - putc('\n', outfile); - } - putc('\n', outfile); -} /* input_protdata */ - - -void makeweights() -{ - /* make up weights vector to avoid duplicate computations */ - long i; - - for (i = 1; i <= sites; i++) { - alias[i - 1] = i; - ally[i - 1] = 0; - aliasweight[i - 1] = weight[i - 1]; - location[i - 1] = 0; - } - sitesort2 (sites, aliasweight); - sitecombine2(sites, aliasweight); - sitescrunch2(sites, 1, 2, aliasweight); - for (i = 1; i <= sites; i++) { - if (aliasweight[i - 1] > 0) - endsite = i; - } - for (i = 1; i <= endsite; i++) { - location[alias[i - 1] - 1] = i; - ally[alias[i - 1] - 1] = alias[i - 1]; - } - term = (double **) Malloc(endsite * sizeof(double *)); - for (i = 0; i < endsite; i++) - term[i] = (double *) Malloc(rcategs * sizeof(double)); - slopeterm = (double **) Malloc(endsite * sizeof(double *)); - for (i = 0; i < endsite; i++) - slopeterm[i] = (double *) Malloc(rcategs * sizeof(double)); - curveterm = (double **) Malloc(endsite * sizeof(double *)); - for (i = 0; i < endsite; i++) - curveterm[i] = (double *) Malloc(rcategs * sizeof(double)); - mp = (vall *) Malloc(sites*sizeof(vall)); - contribution = (contribarr *) Malloc(endsite*sizeof(contribarr)); -} /* makeweights */ - - -void prot_makevalues(long categs, pointarray treenode, long endsite, - long spp, sequence y, steptr alias) -{ - /* set up fractional likelihoods at tips */ - /* a version of makevalues2 found in seq.c */ - /* used by proml */ - long i, j, k, l; - long b; - - for (k = 0; k < endsite; k++) { - j = alias[k]; - for (i = 0; i < spp; i++) { - for (l = 0; l < categs; l++) { - memset(treenode[i]->protx[k][l], 0, sizeof(double)*20); - switch (y[i][j - 1]) { - - case 'A': - treenode[i]->protx[k][l][0] = 1.0; - break; - - case 'R': - treenode[i]->protx[k][l][(long)arginine - (long)alanine] = 1.0; - break; - - case 'N': - treenode[i]->protx[k][l][(long)asparagine - (long)alanine] = 1.0; - break; - - case 'D': - treenode[i]->protx[k][l][(long)aspartic - (long)alanine] = 1.0; - break; - - case 'C': - treenode[i]->protx[k][l][(long)cysteine - (long)alanine] = 1.0; - break; - - case 'Q': - treenode[i]->protx[k][l][(long)glutamine - (long)alanine] = 1.0; - break; - - case 'E': - treenode[i]->protx[k][l][(long)glutamic - (long)alanine] = 1.0; - break; - - case 'G': - treenode[i]->protx[k][l][(long)glycine - (long)alanine] = 1.0; - break; - - case 'H': - treenode[i]->protx[k][l][(long)histidine - (long)alanine] = 1.0; - break; - - case 'I': - treenode[i]->protx[k][l][(long)isoleucine - (long)alanine] = 1.0; - break; - - case 'L': - treenode[i]->protx[k][l][(long)leucine - (long)alanine] = 1.0; - break; - - case 'K': - treenode[i]->protx[k][l][(long)lysine - (long)alanine] = 1.0; - break; - - case 'M': - treenode[i]->protx[k][l][(long)methionine - (long)alanine] = 1.0; - break; - - case 'F': - treenode[i]->protx[k][l][(long)phenylalanine - (long)alanine] = 1.0; - break; - - case 'P': - treenode[i]->protx[k][l][(long)proline - (long)alanine] = 1.0; - break; - - case 'S': - treenode[i]->protx[k][l][(long)serine - (long)alanine] = 1.0; - break; - - case 'T': - treenode[i]->protx[k][l][(long)threonine - (long)alanine] = 1.0; - break; - - case 'W': - treenode[i]->protx[k][l][(long)tryptophan - (long)alanine] = 1.0; - break; - - case 'Y': - treenode[i]->protx[k][l][(long)tyrosine - (long)alanine] = 1.0; - break; - - case 'V': - treenode[i]->protx[k][l][(long)valine - (long)alanine] = 1.0; - break; - - case 'B': - treenode[i]->protx[k][l][(long)asparagine - (long)alanine] = 1.0; - treenode[i]->protx[k][l][(long)aspartic - (long)alanine] = 1.0; - break; - - case 'Z': - treenode[i]->protx[k][l][(long)glutamine - (long)alanine] = 1.0; - treenode[i]->protx[k][l][(long)glutamic - (long)alanine] = 1.0; - break; - - case 'X': /* unknown aa */ - for (b = 0; b <= 19; b++) - treenode[i]->protx[k][l][b] = 1.0; - break; - - case '?': /* unknown aa */ - for (b = 0; b <= 19; b++) - treenode[i]->protx[k][l][b] = 1.0; - break; - - case '*': /* stop codon symbol */ - for (b = 0; b <= 19; b++) - treenode[i]->protx[k][l][b] = 1.0; - break; - - case '-': /* deletion event-absent data or aa */ - for (b = 0; b <= 19; b++) - treenode[i]->protx[k][l][b] = 1.0; - break; - } - } - } - } -} /* prot_makevalues */ - - -void free_pmatrix(long sib) -{ - long j,k,l; - - for (j = 0; j < rcategs; j++) { - for (k = 0; k < categs; k++) { - for (l = 0; l < 20; l++) - free(pmatrices[sib][j][k][l]); - free(pmatrices[sib][j][k]); - } - free(pmatrices[sib][j]); - } - free(pmatrices[sib]); -} - -void alloc_pmatrix(long sib) -{ - /* Allocate memory for a new pmatrix. Called iff num_sibs>max_num_sibs */ - long j, k, l; - double ****temp_matrix; - - temp_matrix = (double ****) Malloc (rcategs * sizeof(double ***)); - for (j = 0; j < rcategs; j++) { - temp_matrix[j] = (double ***) Malloc(categs * sizeof(double **)); - for (k = 0; k < categs; k++) { - temp_matrix[j][k] = (double **) Malloc(20 * sizeof (double *)); - for (l = 0; l < 20; l++) - temp_matrix[j][k][l] = (double *) Malloc(20 * sizeof(double)); - } - } - pmatrices[sib] = temp_matrix; - max_num_sibs++; -} /* alloc_pmatrix */ - -void prot_freetable() -{ - long i,j,k,l; - for (j = 0; j < rcategs; j++) { - for (k = 0; k < categs; k++) { - for (l = 0; l < 20; l++) - free(ddpmatrix[j][k][l]); - free(ddpmatrix[j][k]); - } - free(ddpmatrix[j]); - } - free(ddpmatrix); - - for (j = 0; j < rcategs; j++) { - for (k = 0; k < categs; k++) { - for (l = 0; l < 20; l++) - free(dpmatrix[j][k][l]); - free(dpmatrix[j][k]); - } - free(dpmatrix[j]); - } - free(dpmatrix); - - - for (j = 0; j < rcategs; j++) - free(tbl[j]); - free(tbl); - - for ( i = 0 ; i < max_num_sibs ; i++ ) - free_pmatrix(i); - free(pmatrices); -} - -void prot_inittable() -{ - /* Define a lookup table. Precompute values and print them out in tables */ - /* Allocate memory for the pmatrices, dpmatices and ddpmatrices */ - long i, j, k, l; - double sumrates; - - /* Allocate memory for pmatrices, the array of pointers to pmatrices */ - - pmatrices = (double *****) Malloc ( spp * sizeof(double ****)); - - /* Allocate memory for the first 2 pmatrices, the matrix of conversion */ - /* probabilities, but only once per run (aka not on the second jumble. */ - - alloc_pmatrix(0); - alloc_pmatrix(1); - - /* Allocate memory for one dpmatrix, the first derivative matrix */ - - dpmatrix = (double ****) Malloc( rcategs * sizeof(double ***)); - for (j = 0; j < rcategs; j++) { - dpmatrix[j] = (double ***) Malloc( categs * sizeof(double **)); - for (k = 0; k < categs; k++) { - dpmatrix[j][k] = (double **) Malloc( 20 * sizeof(double *)); - for (l = 0; l < 20; l++) - dpmatrix[j][k][l] = (double *) Malloc( 20 * sizeof(double)); - } - } - - /* Allocate memory for one ddpmatrix, the second derivative matrix */ - ddpmatrix = (double ****) Malloc( rcategs * sizeof(double ***)); - for (j = 0; j < rcategs; j++) { - ddpmatrix[j] = (double ***) Malloc( categs * sizeof(double **)); - for (k = 0; k < categs; k++) { - ddpmatrix[j][k] = (double **) Malloc( 20 * sizeof(double *)); - for (l = 0; l < 20; l++) - ddpmatrix[j][k][l] = (double *) Malloc( 20 * sizeof(double)); - } - } - - /* Allocate memory and assign values to tbl, the matrix of possible rates*/ - - tbl = (double **) Malloc( rcategs * sizeof(double *)); - for (j = 0; j < rcategs; j++) - tbl[j] = (double *) Malloc( categs * sizeof(double)); - - for (j = 0; j < rcategs; j++) - for (k = 0; k < categs; k++) - tbl[j][k] = rrate[j]*rate[k]; - - sumrates = 0.0; - for (i = 0; i < endsite; i++) { - for (j = 0; j < rcategs; j++) - sumrates += aliasweight[i] * probcat[j] - * tbl[j][category[alias[i] - 1] - 1]; - } - sumrates /= (double)sites; - for (j = 0; j < rcategs; j++) - for (k = 0; k < categs; k++) { - tbl[j][k] /= sumrates; - } - - if(jumb > 1) - return; - - if (gama) { - fprintf(outfile, "\nDiscrete approximation to gamma distributed rates\n"); - fprintf(outfile, - " Coefficient of variation of rates = %f (alpha = %f)\n", - cv, alpha); - } - if (rcategs > 1) { - fprintf(outfile, "\nStates in HMM Rate of change Probability\n\n"); - for (i = 0; i < rcategs; i++) - if (probcat[i] < 0.0001) - fprintf(outfile, "%9ld%16.3f%20.6f\n", i+1, rrate[i], probcat[i]); - else if (probcat[i] < 0.001) - fprintf(outfile, "%9ld%16.3f%19.5f\n", i+1, rrate[i], probcat[i]); - else if (probcat[i] < 0.01) - fprintf(outfile, "%9ld%16.3f%18.4f\n", i+1, rrate[i], probcat[i]); - else - fprintf(outfile, "%9ld%16.3f%17.3f\n", i+1, rrate[i], probcat[i]); - putc('\n', outfile); - if (auto_) - fprintf(outfile, - "Expected length of a patch of sites having the same rate = %8.3f\n", - 1/lambda); - putc('\n', outfile); - } - if (categs > 1) { - fprintf(outfile, "\nSite category Rate of change\n\n"); - for (k = 0; k < categs; k++) - fprintf(outfile, "%9ld%16.3f\n", k+1, rate[k]); - } - if ((rcategs > 1) || (categs >> 1)) - fprintf(outfile, "\n\n"); -} /* prot_inittable */ - - -void getinput() -{ - /* reads the input data */ - if (!justwts || firstset) - inputoptions(); - if (!justwts || firstset) - input_protdata(sites); - if ( !firstset ) freelrsaves(); - makeweights(); - alloclrsaves(); - setuptree2(curtree); - if (!usertree) { - setuptree2(bestree); - setuptree2(priortree); - if (njumble > 1) - setuptree2(bestree2); - } - prot_allocx(nonodes2, rcategs, curtree.nodep, usertree); - if (!usertree) { - prot_allocx(nonodes2, rcategs, bestree.nodep, 0); - prot_allocx(nonodes2, rcategs, priortree.nodep, 0); - if (njumble > 1) - prot_allocx(nonodes2, rcategs, bestree2.nodep, 0); - } - prot_makevalues(rcategs, curtree.nodep, endsite, spp, y, alias); -} /* getinput */ - - -void inittravtree(node *p) -{ - /* traverse tree to set initialized and v to initial values */ - node* q; - - p->initialized = false; - p->back->initialized = false; - if ((!lngths) || p->iter) { - p->v = initialv; - p->back->v = initialv; - } - - if ( !p->tip ) { - q = p->next; - while ( q != p ) { - inittravtree(q->back); - q = q->next; - } - } -} /* inittravtree */ - - -void prot_nuview(node *p) -{ - long i, j, k, l, m, num_sibs, sib_index; - node *sib_ptr, *sib_back_ptr; - psitelike prot_xx, x2; - double lw, prod7; - double **pmat; - double maxx; - double correction; - - /* Figure out how many siblings the current node has */ - /* and be sure that pmatrices is large enough */ - num_sibs = count_sibs(p); - for (i = 0; i < num_sibs; i++) - if (pmatrices[i] == NULL) - alloc_pmatrix(i); - - /* Recursive calls, should be called for all children */ - sib_ptr = p; - for (i=0 ; i < num_sibs; i++) { - sib_ptr = sib_ptr->next; - sib_back_ptr = sib_ptr->back; - - if (!sib_back_ptr->tip && - !sib_back_ptr->initialized) - prot_nuview(sib_back_ptr); - } - - /* Make pmatrices for all possible combinations of category, rcateg */ - /* and sib */ - sib_ptr = p; /* return to p */ - for (sib_index=0; sib_index < num_sibs; sib_index++) { - sib_ptr = sib_ptr->next; - sib_back_ptr = sib_ptr->back; - - lw = sib_back_ptr->v; - - for (j = 0; j < rcategs; j++) - for (k = 0; k < categs; k++) - make_pmatrix(pmatrices[sib_index][j][k], NULL, NULL, 0, lw, - tbl[j][k], eigmat, probmat); - } - - for (i = 0; i < endsite; i++) { - maxx = 0; - correction = 0; - - k = category[alias[i]-1] - 1; - for (j = 0; j < rcategs; j++) { - - /* initialize to 1 all values of prot_xx */ - for (m = 0; m <= 19; m++) - prot_xx[m] = 1; - - sib_ptr = p; /* return to p */ - /* loop through all sibs and calculate likelihoods for all possible*/ - /* amino acid combinations */ - for (sib_index=0; sib_index < num_sibs; sib_index++) { - sib_ptr = sib_ptr->next; - sib_back_ptr = sib_ptr->back; - - if ( j == 0) - correction += sib_back_ptr->underflows[i]; - - memcpy(x2, sib_back_ptr->protx[i][j], sizeof(psitelike)); - pmat = pmatrices[sib_index][j][k]; - for (m = 0; m <= 19; m++) { - prod7 = 0; - for (l = 0; l <= 19; l++) - prod7 += (pmat[m][l] * x2[l]); - prot_xx[m] *= prod7; - if ( prot_xx[m] > maxx && sib_index == (num_sibs - 1)) - maxx = prot_xx[m]; - } - } - /* And the final point of this whole function: */ - memcpy(p->protx[i][j], prot_xx, sizeof(psitelike)); - } - p->underflows[i] = 0; - if ( maxx < MIN_DOUBLE ) - fix_protx(p,i,maxx,rcategs); - p->underflows[i] += correction; - } - - p->initialized = true; -} /* prot_nuview */ - - -void prot_slopecurv(node *p,double y,double *like,double *slope,double *curve) -{ - /* compute log likelihood, slope and curvature at node p */ - long i, j, k, l, m, lai; - double sum, sumc, sumterm, lterm, sumcs, sumcc, sum2, slope2, curve2; - double frexm = 0; /* frexm = freqaa[m]*x1[m] */ - /* frexml = frexm*x2[l] */ - double prod4m, prod5m, prod6m; /* elements of prod4-5 for */ - /* each m */ - double **pmat, **dpmat, **ddpmat; /* local pointers to global*/ - /* matrices */ - double prod4, prod5, prod6; - contribarr thelike, nulike, nuslope, nucurve, - theslope, thecurve, clai, cslai, cclai; - node *q; - psitelike x1, x2; - - q = p->back; - sum = 0.0; - for (j = 0; j < rcategs; j++) { - for (k = 0; k < categs; k++) { - make_pmatrix(pmatrices[0][j][k], dpmatrix[j][k], ddpmatrix[j][k], - 2, y, tbl[j][k], eigmat, probmat); - } - } - for (i = 0; i < endsite; i++) { - k = category[alias[i]-1] - 1; - for (j = 0; j < rcategs; j++) { - memcpy(x1, p->protx[i][j], sizeof(psitelike)); - memcpy(x2, q->protx[i][j], sizeof(psitelike)); - pmat = pmatrices[0][j][k]; - dpmat = dpmatrix[j][k]; - ddpmat = ddpmatrix[j][k]; - prod4 = 0.0; - prod5 = 0.0; - prod6 = 0.0; - for (m = 0; m <= 19; m++) { - prod4m = 0.0; - prod5m = 0.0; - prod6m = 0.0; - frexm = x1[m] * freqaa[m]; - for (l = 0; l <= 19; l++) { - prod4m += x2[l] * pmat[m][l]; - prod5m += x2[l] * dpmat[m][l]; - prod6m += x2[l] * ddpmat[m][l]; - } - prod4 += frexm * prod4m; - prod5 += frexm * prod5m; - prod6 += frexm * prod6m; - } - term[i][j] = prod4; - slopeterm[i][j] = prod5; - curveterm[i][j] = prod6; - } - sumterm = 0.0; - for (j = 0; j < rcategs; j++) - sumterm += probcat[j] * term[i][j]; - if (sumterm <= 0.0) - sumterm = 0.000000001; /* ? shouldn't get here ?? */ - lterm = log(sumterm) + p->underflows[i] + q->underflows[i]; - for (j = 0; j < rcategs; j++) { - term[i][j] = term[i][j] / sumterm; - slopeterm[i][j] = slopeterm[i][j] / sumterm; - curveterm[i][j] = curveterm[i][j] / sumterm; - } - sum += (aliasweight[i] * lterm); - } - for (i = 0; i < rcategs; i++) { - thelike[i] = 1.0; - theslope[i] = 0.0; - thecurve[i] = 0.0; - } - for (i = 0; i < sites; i++) { - sumc = 0.0; - sumcs = 0.0; - sumcc = 0.0; - for (k = 0; k < rcategs; k++) { - sumc += probcat[k] * thelike[k]; - sumcs += probcat[k] * theslope[k]; - sumcc += probcat[k] * thecurve[k]; - } - sumc *= lambda; - sumcs *= lambda; - sumcc *= lambda; - if ((ally[i] > 0) && (location[ally[i]-1] > 0)) { - lai = location[ally[i] - 1]; - memcpy(clai, term[lai - 1], rcategs*sizeof(double)); - memcpy(cslai, slopeterm[lai - 1], rcategs*sizeof(double)); - memcpy(cclai, curveterm[lai - 1], rcategs*sizeof(double)); - if (weight[i] > 1) { - for (j = 0; j < rcategs; j++) { - if (clai[j] > 0.0) - clai[j] = exp(weight[i]*log(clai[j])); - else clai[j] = 0.0; - if (cslai[j] > 0.0) - cslai[j] = exp(weight[i]*log(cslai[j])); - else cslai[j] = 0.0; - if (cclai[j] > 0.0) - cclai[j] = exp(weight[i]*log(cclai[j])); - else cclai[j] = 0.0; - } - } - for (j = 0; j < rcategs; j++) { - nulike[j] = ((1.0 - lambda) * thelike[j] + sumc) * clai[j]; - nuslope[j] = ((1.0 - lambda) * theslope[j] + sumcs) * clai[j] - + ((1.0 - lambda) * thelike[j] + sumc) * cslai[j]; - nucurve[j] = ((1.0 - lambda) * thecurve[j] + sumcc) * clai[j] - + 2.0 * ((1.0 - lambda) * theslope[j] + sumcs) * cslai[j] - + ((1.0 - lambda) * thelike[j] + sumc) * cclai[j]; - } - } else { - for (j = 0; j < rcategs; j++) { - nulike[j] = ((1.0 - lambda) * thelike[j] + sumc); - nuslope[j] = ((1.0 - lambda) * theslope[j] + sumcs); - nucurve[j] = ((1.0 - lambda) * thecurve[j] + sumcc); - } - } - memcpy(thelike, nulike, rcategs*sizeof(double)); - memcpy(theslope, nuslope, rcategs*sizeof(double)); - memcpy(thecurve, nucurve, rcategs*sizeof(double)); - } - sum2 = 0.0; - slope2 = 0.0; - curve2 = 0.0; - for (i = 0; i < rcategs; i++) { - sum2 += probcat[i] * thelike[i]; - slope2 += probcat[i] * theslope[i]; - curve2 += probcat[i] * thecurve[i]; - } - sum += log(sum2); - (*like) = sum; - (*slope) = slope2 / sum2; - (*curve) = (curve2 - slope2 * slope2 / sum2) / sum2; -} /* prot_slopecurv */ - - -void makenewv(node *p) -{ - /* Newton-Raphson algorithm improvement of a branch length */ - long it, ite; - double y, yold=0, yorig, like, slope, curve, oldlike=0; - boolean done, firsttime, better; - node *q; - - q = p->back; - y = p->v; - yorig = y; - done = false; - firsttime = true; - it = 1; - ite = 0; - while ((it < iterations) && (ite < 20) && (!done)) { - prot_slopecurv(p, y, &like, &slope, &curve); - better = false; - if (firsttime) { - yold = y; - oldlike = like; - firsttime = false; - better = true; - } else { - if (like > oldlike) { - yold = y; - oldlike = like; - better = true; - it++; - } - } - if (better) { - y = y + slope/fabs(curve); - if (y < epsilon) - y = epsilon; - } else { - if (fabs(y - yold) < epsilon) - ite = 20; - y = (y + (7 * yold)) / 8; - } - ite++; - done = fabs(y-yold) < epsilon; - } - smoothed = (fabs(yold-yorig) < epsilon) && (yorig > 1000.0*epsilon); - p->v = yold; - q->v = yold; - curtree.likelihood = oldlike; -} /* makenewv */ - - -void update(node *p) -{ - if (!p->tip && !p->initialized) - prot_nuview(p); - if (!p->back->tip && !p->back->initialized) - prot_nuview(p->back); - if ((!usertree) || (usertree && !lngths) || p->iter) { - makenewv(p); - if ( smoothit ) { - inittrav(p); - inittrav(p->back); - } - else if ( inserting && !p->tip ) { - p->next->initialized = false; - p->next->next->initialized = false; - } - } -} /* update */ - - -void smooth(node *p) -{ - long i, num_sibs; - node *sib_ptr; - - smoothed = false; - update(p); - if (p->tip) - return; - - num_sibs = count_sibs(p); - sib_ptr = p; - - for (i=0; i < num_sibs; i++) { - sib_ptr = sib_ptr->next; - - if (polishing || (smoothit && !smoothed)) { - smooth(sib_ptr->back); - p->initialized = false; - sib_ptr->initialized = false; - } - } -} /* smooth */ - - -void make_pmatrix(double **matrix, double **dmat, double **ddmat, - long derivative, double lz, double rat, - double *eigmat, double **probmat) -{ - /* Computes the R matrix such that matrix[m][l] is the joint probability */ - /* of m and l. */ - /* Computes a P matrix such that matrix[m][l] is the conditional */ - /* probability of m given l. This is accomplished by dividing all terms */ - /* in the R matrix by freqaa[m], the frequency of l. */ - - long k, l, m; /* (l) original character state */ - /* (m) final character state */ - /* (k) lambda counter */ - double p0, p1, p2, q; - double elambdat[20], delambdat[20], ddelambdat[20]; - /* exponential term for matrix */ - /* and both derivative matrices */ - for (k = 0; k <= 19; k++) { - elambdat[k] = exp(lz * rat * eigmat[k]); - if(derivative != 0) { - delambdat[k] = (elambdat[k] * rat * eigmat[k]); - ddelambdat[k] = (delambdat[k] * rat * eigmat[k]); - } - } - for (m = 0; m <= 19; m++) { - for (l = 0; l <= 19; l++) { - p0 = 0.0; - p1 = 0.0; - p2 = 0.0; - for (k = 0; k <= 19; k++) { - q = probmat[k][m] * probmat[k][l]; - p0 += (q * elambdat[k]); - if(derivative !=0) { - p1 += (q * delambdat[k]); - p2 += (q * ddelambdat[k]); - } - } - matrix[m][l] = p0 / freqaa[m]; - if(derivative != 0) { - dmat[m][l] = p1 / freqaa[m]; - ddmat[m][l] = p2 / freqaa[m]; - } - } - } -} /* make_pmatrix */ - - -double prot_evaluate(node *p, boolean saveit) -{ - contribarr tterm; - double sum, sum2, sumc, y, prod4, prodl, frexm, sumterm, lterm; - double **pmat; - long i, j, k, l, m, lai; - node *q; - psitelike x1, x2; - - sum = 0.0; - q = p->back; - y = p->v; - for (j = 0; j < rcategs; j++) - for (k = 0; k < categs; k++) - make_pmatrix(pmatrices[0][j][k],NULL,NULL,0,y,tbl[j][k],eigmat,probmat); - for (i = 0; i < endsite; i++) { - k = category[alias[i]-1] - 1; - for (j = 0; j < rcategs; j++) { - memcpy(x1, p->protx[i][j], sizeof(psitelike)); - memcpy(x2, q->protx[i][j], sizeof(psitelike)); - prod4 = 0.0; - pmat = pmatrices[0][j][k]; - for (m = 0; m <= 19; m++) { - prodl = 0.0; - for (l = 0; l <= 19; l++) - prodl += (pmat[m][l] * x2[l]); - frexm = x1[m] * freqaa[m]; - prod4 += (prodl * frexm); - } - tterm[j] = prod4; - } - sumterm = 0.0; - for (j = 0; j < rcategs; j++) - sumterm += probcat[j] * tterm[j]; - if (sumterm < 0.0) - sumterm = 0.00000001; /* ??? */ - lterm = log(sumterm) + p->underflows[i] + q->underflows[i]; - for (j = 0; j < rcategs; j++) - clai[j] = tterm[j] / sumterm; - memcpy(contribution[i], clai, rcategs*sizeof(double)); - if (saveit && !auto_ && usertree && (which <= shimotrees)) - l0gf[which - 1][i] = lterm; - sum += aliasweight[i] * lterm; - } - for (j = 0; j < rcategs; j++) - like[j] = 1.0; - for (i = 0; i < sites; i++) { - sumc = 0.0; - for (k = 0; k < rcategs; k++) - sumc += probcat[k] * like[k]; - sumc *= lambda; - if ((ally[i] > 0) && (location[ally[i]-1] > 0)) { - lai = location[ally[i] - 1]; - memcpy(clai, contribution[lai - 1], rcategs*sizeof(double)); - for (j = 0; j < rcategs; j++) - nulike[j] = ((1.0 - lambda) * like[j] + sumc) * clai[j]; - } else { - for (j = 0; j < rcategs; j++) - nulike[j] = ((1.0 - lambda) * like[j] + sumc); - } - memcpy(like, nulike, rcategs*sizeof(double)); - } - sum2 = 0.0; - for (i = 0; i < rcategs; i++) - sum2 += probcat[i] * like[i]; - sum += log(sum2); - curtree.likelihood = sum; - if (!saveit || auto_ || !usertree) - return sum; - if(which <= shimotrees) - l0gl[which - 1] = sum; - if (which == 1) { - maxwhich = 1; - maxlogl = sum; - return sum; - } - if (sum > maxlogl) { - maxwhich = which; - maxlogl = sum; - } - return sum; -} /* prot_evaluate */ - - -void treevaluate() -{ - /* evaluate a user tree */ - long i; - - inittravtree(curtree.start); - polishing = true; - smoothit = true; - for (i = 1; i <= smoothings * 4; i++) - smooth (curtree.start); - dummy = prot_evaluate(curtree.start, true); -} /* treevaluate */ - - -void promlcopy(tree *a, tree *b, long nonodes, long categs) -{ - /* copy tree a to tree b */ - long i, j=0; - node *p, *q; - - for (i = 0; i < spp; i++) { - prot_copynode(a->nodep[i], b->nodep[i], categs); - if (a->nodep[i]->back) { - if (a->nodep[i]->back == a->nodep[a->nodep[i]->back->index - 1]) - b->nodep[i]->back = b->nodep[a->nodep[i]->back->index - 1]; - else if (a->nodep[i]->back == a->nodep[a->nodep[i]->back->index - 1]->next -) - b->nodep[i]->back = b->nodep[a->nodep[i]->back->index - 1]->next; - else - b->nodep[i]->back = b->nodep[a->nodep[i]->back->index - 1]->next->next; - } - else b->nodep[i]->back = NULL; - } - for (i = spp; i < nonodes; i++) { - p = a->nodep[i]; - q = b->nodep[i]; - for (j = 1; j <= 3; j++) { - prot_copynode(p, q, categs); - if (p->back) { - if (p->back == a->nodep[p->back->index - 1]) - q->back = b->nodep[p->back->index - 1]; - else if (p->back == a->nodep[p->back->index - 1]->next) - q->back = b->nodep[p->back->index - 1]->next; - else - q->back = b->nodep[p->back->index - 1]->next->next; - } - else - q->back = NULL; - p = p->next; - q = q->next; - } - } - b->likelihood = a->likelihood; - b->start = a->start; /* start used in dnaml only */ - b->root = a->root; /* root used in dnamlk only */ -} /* promlcopy */ - - -void proml_re_move(node **p, node **q) -{ - /* remove p and record in q where it was */ - long i; - - /** assumes bifurcations */ - *q = (*p)->next->back; - hookup(*q, (*p)->next->next->back); - (*p)->next->back = NULL; - (*p)->next->next->back = NULL; - (*q)->v += (*q)->back->v; - (*q)->back->v = (*q)->v; - if ( smoothit ) { - inittrav((*q)); - inittrav((*q)->back); - inittrav((*p)->back); - } - if ( smoothit ) { - for ( i = 0 ; i < smoothings ; i++ ) { - smooth(*q); - smooth((*q)->back); - } - } - else - smooth(*q); -} /* proml_re_move */ - - -void insert_(node *p, node *q, boolean dooinit) -{ - /* Insert q near p */ - long i, j, num_sibs; - node *r, *sib_ptr; - - r = p->next->next; - hookup(r, q->back); - hookup(p->next, q); - q->v = 0.5 * q->v; - q->back->v = q->v; - r->v = q->v; - r->back->v = r->v; - p->initialized = false; - if (dooinit) { - inittrav(p); - inittrav(q); - inittrav(q->back); - } - i = 1; - inserting = true; - while (i <= smoothings) { - smooth(p); - if (!p->tip) { - num_sibs = count_sibs(p); - sib_ptr = p; - for (j=0; j < num_sibs; j++) { - smooth(sib_ptr->next->back); - sib_ptr = sib_ptr->next; - } - } - i++; - } - inserting = false; -} /* insert_ */ - - -void addtraverse(node *p, node *q, boolean contin) -{ - /* try adding p at q, proceed recursively through tree */ - long i, num_sibs; - double like, vsave = 0; - node *qback = NULL, *sib_ptr; - - if (!smoothit) { - vsave = q->v; - qback = q->back; - } - insert_(p, q, false); - like = prot_evaluate(p, false); - if (like > bestyet || bestyet == UNDEFINED) { - bestyet = like; - if (smoothit) { - addwhere = q; - promlcopy(&curtree, &bestree, nonodes2, rcategs); - } - else - qwhere = q; - succeeded = true; - } - if (smoothit) - promlcopy(&priortree, &curtree, nonodes2, rcategs); - else { - hookup (q, qback); - q->v = vsave; - q->back->v = vsave; - curtree.likelihood = bestyet; - } - if (!q->tip && contin) { - num_sibs = count_sibs(q); - if (q == curtree.start) - num_sibs++; - sib_ptr = q; - for (i=0; i < num_sibs; i++) { - addtraverse(p, sib_ptr->next->back, contin); - sib_ptr = sib_ptr->next; - } - } -} /* addtraverse */ - - -void globrearrange() -{ - /* does global rearrangements */ - tree globtree; - tree oldtree; - int i,j,k,l,num_sibs,num_sibs2; - node *where,*sib_ptr,*sib_ptr2; - double oldbestyet = curtree.likelihood; - int success = false; - - alloctree(&globtree.nodep,nonodes2,0); - alloctree(&oldtree.nodep,nonodes2,0); - setuptree2(globtree); - setuptree2(oldtree); - prot_allocx(nonodes2, rcategs, globtree.nodep, 0); - prot_allocx(nonodes2, rcategs, oldtree.nodep, 0); - promlcopy(&curtree,&globtree,nonodes2,rcategs); - promlcopy(&curtree,&oldtree,nonodes2,rcategs); - bestyet = curtree.likelihood; - for ( i = spp ; i < nonodes2 ; i++ ) { - num_sibs = count_sibs(curtree.nodep[i]); - sib_ptr = curtree.nodep[i]; - if ( (i - spp) % (( nonodes2 / 72 ) + 1 ) == 0 ) - putchar('.'); - fflush(stdout); - for ( j = 0 ; j <= num_sibs ; j++ ) { - proml_re_move(&sib_ptr,&where); - promlcopy(&curtree,&priortree,nonodes2,rcategs); - qwhere = where; - - if (where->tip) { - promlcopy(&oldtree,&curtree,nonodes2,rcategs); - promlcopy(&oldtree,&bestree,nonodes2,rcategs); - sib_ptr=sib_ptr->next; - continue; - } - else num_sibs2 = count_sibs(where); - sib_ptr2 = where; - for ( k = 0 ; k < num_sibs2 ; k++ ) { - addwhere = NULL; - addtraverse(sib_ptr,sib_ptr2->back,true); - if ( !smoothit ) { - if (succeeded && qwhere != where && qwhere != where->back) { - insert_(sib_ptr,qwhere,true); - smoothit = true; - for (l = 1; l<=smoothings; l++) { - smooth (where); - smooth (where->back); - } - smoothit = false; - success = true; - promlcopy(&curtree,&globtree,nonodes2,rcategs); - promlcopy(&priortree,&curtree,nonodes2,rcategs); - } - } - else if ( addwhere && where != addwhere && where->back != addwhere - && bestyet > globtree.likelihood) { - promlcopy(&bestree,&globtree,nonodes2,rcategs); - success = true; - } - sib_ptr2 = sib_ptr2->next; - } - promlcopy(&oldtree,&curtree,nonodes2,rcategs); - promlcopy(&oldtree,&bestree,nonodes2,rcategs); - sib_ptr = sib_ptr->next; - } - } - promlcopy(&globtree,&curtree,nonodes2,rcategs); - promlcopy(&globtree,&bestree,nonodes2,rcategs); - if (success && globtree.likelihood > oldbestyet) { - succeeded = true; - } - else { - succeeded = false; - } - bestyet = globtree.likelihood; - prot_freex(nonodes2,oldtree.nodep); - prot_freex(nonodes2,globtree.nodep); - freetree2(globtree.nodep,nonodes2); - freetree2(oldtree.nodep,nonodes2); -} /* globrearrange */ - - -void freelrsaves() -{ - long i,j; - for ( i = 0 ; i < NLRSAVES ; i++ ) { - for (j = 0; j < oldendsite; j++) - free(lrsaves[i]->protx[j]); - free(lrsaves[i]->protx); - free(lrsaves[i]->underflows); - free(lrsaves[i]); - } - free(lrsaves); -} - - -void alloclrsaves() -{ - long i,j; - lrsaves = Malloc(NLRSAVES * sizeof(node*)); - oldendsite = endsite; - for ( i = 0 ; i < NLRSAVES ; i++ ) { - lrsaves[i] = Malloc(sizeof(node)); - lrsaves[i]->protx = Malloc(endsite*sizeof(ratelike)); - lrsaves[i]->underflows = Malloc(endsite * sizeof (double)); - for (j = 0; j < endsite; j++) - lrsaves[i]->protx[j] = (pratelike)Malloc(rcategs*sizeof(psitelike)); - } -} /* alloclrsaves */ - - -void rearrange(node *p, node *pp) -{ - /* rearranges the tree locally moving pp around near p */ - long i, num_sibs; - node *q, *r, *sib_ptr; - node *rnb, *rnnb; - - if (!p->tip && !p->back->tip) { - curtree.likelihood = bestyet; - if (p->back->next != pp) - r = p->back->next; - else - r = p->back->next->next; - /* assumes bifurcations? */ - if (!smoothit) { - rnb = r->next->back; - rnnb = r->next->next->back; - prot_copynode(r,lrsaves[0],categs); - prot_copynode(r->next,lrsaves[1],categs); - prot_copynode(r->next->next,lrsaves[2],categs); - prot_copynode(p->next,lrsaves[3],categs); - prot_copynode(p->next->next,lrsaves[4],categs); - } - else - promlcopy(&curtree, &bestree, nonodes2, rcategs); - proml_re_move(&r, &q); - if (smoothit) - promlcopy(&curtree, &priortree, nonodes2, rcategs); - else - qwhere = q; - num_sibs = count_sibs (p); - sib_ptr = p; - for (i=0; i < num_sibs; i++) { - sib_ptr = sib_ptr->next; - addtraverse(r, sib_ptr->back, false); - } - if (smoothit) - promlcopy(&bestree, &curtree, nonodes2, rcategs); - else { - if (qwhere == q) { - hookup(rnb,r->next); - hookup(rnnb,r->next->next); - prot_copynode(lrsaves[0],r,categs); - prot_copynode(lrsaves[1],r->next,categs); - prot_copynode(lrsaves[2],r->next->next,categs); - prot_copynode(lrsaves[3],p->next,categs); - prot_copynode(lrsaves[4],p->next->next,categs); - rnb->v = r->next->v; - rnnb->v = r->next->next->v; - r->back->v = r->v; - curtree.likelihood = bestyet; - } - else { - insert_(r, qwhere, true); - smoothit = true; - for (i = 1; i<=smoothings; i++) { - smooth(r); - smooth(r->back); - } - smoothit = false; - promlcopy(&curtree, &bestree, nonodes2, rcategs); - } - } - } - if (!p->tip) { - num_sibs = count_sibs(p); - if (p == curtree.start) - num_sibs++; - sib_ptr = p; - for (i=0; i < num_sibs; i++) { - sib_ptr = sib_ptr->next; - rearrange(sib_ptr->back, p); - } - } -} /* rearrange */ - - -void proml_coordinates(node *p, double lengthsum, long *tipy, - double *tipmax) -{ - /* establishes coordinates of nodes */ - node *q, *first, *last; - double xx; - - if (p->tip) { - p->xcoord = (long)(over * lengthsum + 0.5); - p->ycoord = (*tipy); - p->ymin = (*tipy); - p->ymax = (*tipy); - (*tipy) += down; - if (lengthsum > (*tipmax)) - (*tipmax) = lengthsum; - return; - } - q = p->next; - do { - xx = q->v; - if (xx > 100.0) - xx = 100.0; - proml_coordinates(q->back, lengthsum + xx, tipy,tipmax); - q = q->next; - } while ((p == curtree.start || p != q) && - (p != curtree.start || p->next != q)); - first = p->next->back; - q = p; - while (q->next != p) - q = q->next; - last = q->back; - p->xcoord = (long)(over * lengthsum + 0.5); - if (p == curtree.start) - p->ycoord = p->next->next->back->ycoord; - else - p->ycoord = (first->ycoord + last->ycoord) / 2; - p->ymin = first->ymin; - p->ymax = last->ymax; -} /* proml_coordinates */ - - -void proml_printree() -{ - /* prints out diagram of the tree2 */ - long tipy; - double scale, tipmax; - long i; - - if (!treeprint) - return; - putc('\n', outfile); - tipy = 1; - tipmax = 0.0; - proml_coordinates(curtree.start, 0.0, &tipy, &tipmax); - scale = 1.0 / (long)(tipmax + 1.000); - for (i = 1; i <= (tipy - down); i++) - drawline2(i, scale, curtree); - putc('\n', outfile); -} /* proml_printree */ - - -void sigma(node *p, double *sumlr, double *s1, double *s2) -{ - /* compute standard deviation */ - double tt, aa, like, slope, curv; - - prot_slopecurv(p, p->v, &like, &slope, &curv); - tt = p->v; - p->v = epsilon; - p->back->v = epsilon; - aa = prot_evaluate(p, false); - p->v = tt; - p->back->v = tt; - (*sumlr) = prot_evaluate(p, false) - aa; - if (curv < -epsilon) { - (*s1) = p->v + (-slope - sqrt(slope * slope - 3.841 * curv)) / curv; - (*s2) = p->v + (-slope + sqrt(slope * slope - 3.841 * curv)) / curv; - } - else { - (*s1) = -1.0; - (*s2) = -1.0; - } -} /* sigma */ - - -void describe(node *p) -{ - /* print out information for one branch */ - long i, num_sibs; - node *q, *sib_ptr; - double sumlr, sigma1, sigma2; - - if (!p->tip && !p->initialized) - prot_nuview(p); - if (!p->back->tip && !p->back->initialized) - prot_nuview(p->back); - q = p->back; - if (q->tip) { - fprintf(outfile, " "); - for (i = 0; i < nmlngth; i++) - putc(nayme[q->index-1][i], outfile); - fprintf(outfile, " "); - } else - fprintf(outfile, " %4ld ", q->index - spp); - if (p->tip) { - for (i = 0; i < nmlngth; i++) - putc(nayme[p->index-1][i], outfile); - } else - fprintf(outfile, "%4ld ", p->index - spp); - fprintf(outfile, "%15.5f", q->v); - if (!usertree || (usertree && !lngths) || p->iter) { - sigma(q, &sumlr, &sigma1, &sigma2); - if (sigma1 <= sigma2) - fprintf(outfile, " ( zero, infinity)"); - else { - fprintf(outfile, " ("); - if (sigma2 <= 0.0) - fprintf(outfile, " zero"); - else - fprintf(outfile, "%9.5f", sigma2); - fprintf(outfile, ",%12.5f", sigma1); - putc(')', outfile); - } - if (sumlr > 1.9205) - fprintf(outfile, " *"); - if (sumlr > 2.995) - putc('*', outfile); - } - putc('\n', outfile); - if (!p->tip) { - num_sibs = count_sibs(p); - sib_ptr = p; - for (i=0; i < num_sibs; i++) { - sib_ptr = sib_ptr->next; - describe(sib_ptr->back); - } - } -} /* describe */ - - -void prot_reconstr(node *p, long n) -{ - /* reconstruct and print out acid at site n+1 at node p */ - long i, j, k, first, num_sibs = 0; - double f, sum, xx[20]; - node *q = NULL; - - if (p->tip) - putc(y[p->index-1][n], outfile); - else { - num_sibs = count_sibs(p); - if ((ally[n] == 0) || (location[ally[n]-1] == 0)) - putc('.', outfile); - else { - j = location[ally[n]-1] - 1; - sum = 0; - for (i = 0; i <= 19; i++) { - f = p->protx[j][mx-1][i]; - if (!p->tip) { - q = p; - for (k = 0; k < num_sibs; k++) { - q = q->next; - f *= q->protx[j][mx-1][i]; - } - } - f = sqrt(f); - xx[i] = f * freqaa[i]; - sum += xx[i]; - } - for (i = 0; i <= 19; i++) - xx[i] /= sum; - first = 0; - for (i = 0; i <= 19; i++) - if (xx[i] > xx[first]) - first = i; - if (xx[first] > 0.95) - putc(aachar[first], outfile); - else - putc(tolower(aachar[first]), outfile); - if (rctgry && rcategs > 1) - mx = mp[n][mx - 1]; - else - mx = 1; - } - } -} /* prot_reconstr */ - - -void rectrav(node *p, long m, long n) -{ - /* print out segment of reconstructed sequence for one branch */ - long i; - - putc(' ', outfile); - if (p->tip) { - for (i = 0; i < nmlngth; i++) - putc(nayme[p->index-1][i], outfile); - } else - fprintf(outfile, "%4ld ", p->index - spp); - fprintf(outfile, " "); - mx = mx0; - for (i = m; i <= n; i++) { - if ((i % 10 == 0) && (i != m)) - putc(' ', outfile); - prot_reconstr(p, i); - } - putc('\n', outfile); - if (!p->tip) { - rectrav(p->next->back, m, n); - rectrav(p->next->next->back, m, n); - } - mx1 = mx; -} /* rectrav */ - - -void summarize() -{ - /* print out branch length information and node numbers */ - long i, j, mm, num_sibs; - double mode, sum; - double like[maxcategs],nulike[maxcategs]; - double **marginal; - node *sib_ptr; - - if (!treeprint) - return; - fprintf(outfile, "\nremember: "); - if (outgropt) - fprintf(outfile, "(although rooted by outgroup) "); - fprintf(outfile, "this is an unrooted tree!\n\n"); - fprintf(outfile, "Ln Likelihood = %11.5f\n", curtree.likelihood); - fprintf(outfile, "\n Between And Length"); - if (!(usertree && lngths && haslengths)) - fprintf(outfile, " Approx. Confidence Limits"); - fprintf(outfile, "\n"); - fprintf(outfile, " ------- --- ------"); - if (!(usertree && lngths && haslengths)) - fprintf(outfile, " ------- ---------- ------"); - fprintf(outfile, "\n\n"); - for (i = spp; i < nonodes2; i++) { - /* So this works with arbitrary multifurcations */ - if (curtree.nodep[i]) { - num_sibs = count_sibs (curtree.nodep[i]); - sib_ptr = curtree.nodep[i]; - for (j = 0; j < num_sibs; j++) { - sib_ptr->initialized = false; - sib_ptr = sib_ptr->next; - } - } - } - - describe(curtree.start->back); - - /* So this works with arbitrary multifurcations */ - num_sibs = count_sibs(curtree.start); - sib_ptr = curtree.start; - for (i=0; i < num_sibs; i++) { - sib_ptr = sib_ptr->next; - describe(sib_ptr->back); - } - - fprintf(outfile, "\n"); - if (!(usertree && lngths && haslengths)) { - fprintf(outfile, " * = significantly positive, P < 0.05\n"); - fprintf(outfile, " ** = significantly positive, P < 0.01\n\n"); - } - dummy = prot_evaluate(curtree.start, false); - if (rctgry && rcategs > 1) { - for (i = 0; i < rcategs; i++) - like[i] = 1.0; - for (i = sites - 1; i >= 0; i--) { - sum = 0.0; - for (j = 0; j < rcategs; j++) { - nulike[j] = (1.0 - lambda + lambda * probcat[j]) * like[j]; - mp[i][j] = j + 1; - for (k = 1; k <= rcategs; k++) { - if (k != j + 1) { - if (lambda * probcat[k - 1] * like[k - 1] > nulike[j]) { - nulike[j] = lambda * probcat[k - 1] * like[k - 1]; - mp[i][j] = k; - } - } - } - if ((ally[i] > 0) && (location[ally[i]-1] > 0)) - nulike[j] *= contribution[location[ally[i] - 1] - 1][j]; - sum += nulike[j]; - } - for (j = 0; j < rcategs; j++) - nulike[j] /= sum; - memcpy(like, nulike, rcategs * sizeof(double)); - } - mode = 0.0; - mx = 1; - for (i = 1; i <= rcategs; i++) { - if (probcat[i - 1] * like[i - 1] > mode) { - mx = i; - mode = probcat[i - 1] * like[i - 1]; - } - } - mx0 = mx; - fprintf(outfile, - "Combination of categories that contributes the most to the likelihood:\n\n"); - for (i = 1; i <= nmlngth + 3; i++) - putc(' ', outfile); - for (i = 1; i <= sites; i++) { - fprintf(outfile, "%ld", mx); - if (i % 10 == 0) - putc(' ', outfile); - if (i % 60 == 0 && i != sites) { - putc('\n', outfile); - for (j = 1; j <= nmlngth + 3; j++) - putc(' ', outfile); - } - mx = mp[i - 1][mx - 1]; - } - fprintf(outfile, "\n\n"); - marginal = (double **) Malloc(sites*sizeof(double *)); - for (i = 0; i < sites; i++) - marginal[i] = (double *) Malloc(rcategs*sizeof(double)); - for (i = 0; i < rcategs; i++) - like[i] = 1.0; - for (i = sites - 1; i >= 0; i--) { - sum = 0.0; - for (j = 0; j < rcategs; j++) { - nulike[j] = (1.0 - lambda + lambda * probcat[j]) * like[j]; - for (k = 1; k <= rcategs; k++) { - if (k != j + 1) - nulike[j] += lambda * probcat[k - 1] * like[k - 1]; - } - if ((ally[i] > 0) && (location[ally[i]-1] > 0)) - nulike[j] *= contribution[location[ally[i] - 1] - 1][j]; - sum += nulike[j]; - } - for (j = 0; j < rcategs; j++) { - nulike[j] /= sum; - marginal[i][j] = nulike[j]; - } - memcpy(like, nulike, rcategs * sizeof(double)); - } - for (i = 0; i < rcategs; i++) - like[i] = 1.0; - for (i = 0; i < sites; i++) { - sum = 0.0; - for (j = 0; j < rcategs; j++) { - nulike[j] = (1.0 - lambda + lambda * probcat[j]) * like[j]; - for (k = 1; k <= rcategs; k++) { - if (k != j + 1) - nulike[j] += lambda * probcat[k - 1] * like[k - 1]; - } - marginal[i][j] *= like[j] * probcat[j]; - sum += nulike[j]; - } - for (j = 0; j < rcategs; j++) - nulike[j] /= sum; - memcpy(like, nulike, rcategs * sizeof(double)); - sum = 0.0; - for (j = 0; j < rcategs; j++) - sum += marginal[i][j]; - for (j = 0; j < rcategs; j++) - marginal[i][j] /= sum; - } - fprintf(outfile, "Most probable category at each site if > 0.95"); - fprintf(outfile, " probability (\".\" otherwise)\n\n"); - for (i = 1; i <= nmlngth + 3; i++) - putc(' ', outfile); - for (i = 0; i < sites; i++) { - sum = 0.0; - for (j = 0; j < rcategs; j++) - if (marginal[i][j] > sum) { - sum = marginal[i][j]; - mm = j; - } - if (sum >= 0.95) - fprintf(outfile, "%ld", mm+1); - else - putc('.', outfile); - if ((i+1) % 60 == 0) { - if (i != 0) { - putc('\n', outfile); - for (j = 1; j <= nmlngth + 3; j++) - putc(' ', outfile); - } - } - else if ((i+1) % 10 == 0) - putc(' ', outfile); - } - putc('\n', outfile); - for (i = 0; i < sites; i++) - free(marginal[i]); - free(marginal); - } - putc('\n', outfile); - if (hypstate) { - fprintf(outfile, "Probable sequences at interior nodes:\n\n"); - fprintf(outfile, " node "); - for (i = 0; (i < 13) && (i < ((sites + (sites-1)/10 - 39) / 2)); i++) - putc(' ', outfile); - fprintf(outfile, "Reconstructed sequence (caps if > 0.95)\n\n"); - if (!rctgry || (rcategs == 1)) - mx0 = 1; - for (i = 0; i < sites; i += 60) { - k = i + 59; - if (k >= sites) - k = sites - 1; - rectrav(curtree.start, i, k); - rectrav(curtree.start->back, i, k); - putc('\n', outfile); - mx0 = mx1; - } - } -} /* summarize */ - - -void initpromlnode(node **p, node **grbg, node *q, long len, long nodei, - long *ntips, long *parens, initops whichinit, - pointarray treenode, pointarray nodep, Char *str, - Char *ch, FILE *intree) -{ - /* initializes a node */ - boolean minusread; - double valyew, divisor; - - switch (whichinit) { - case bottom: - gnu(grbg, p); - (*p)->index = nodei; - (*p)->tip = false; - malloc_ppheno((*p), endsite, rcategs); - nodep[(*p)->index - 1] = (*p); - break; - case nonbottom: - gnu(grbg, p); - malloc_ppheno(*p, endsite, rcategs); - (*p)->index = nodei; - break; - case tip: - match_names_to_data(str, nodep, p, spp); - break; - case iter: - (*p)->initialized = false; - (*p)->v = initialv; - (*p)->iter = true; - if ((*p)->back != NULL){ - (*p)->back->iter = true; - (*p)->back->v = initialv; - (*p)->back->initialized = false; - } - break; - case length: - processlength(&valyew, &divisor, ch, &minusread, intree, parens); - (*p)->v = valyew / divisor; - (*p)->iter = false; - if ((*p)->back != NULL) { - (*p)->back->v = (*p)->v; - (*p)->back->iter = false; - } - break; - case hsnolength: - haslengths = false; - break; - default: /* cases hslength, treewt, unittrwt */ - break; /* should never occur */ - } -} /* initpromlnode */ - - -void dnaml_treeout(node *p) -{ - /* write out file with representation of final tree2 */ - /* Only works for bifurcations! */ - long i, n, w; - Char c; - double x; - node *q; - boolean inloop; - - if (p->tip) { - n = 0; - for (i = 1; i <= nmlngth; i++) { - if (nayme[p->index-1][i - 1] != ' ') - n = i; - } - for (i = 0; i < n; i++) { - c = nayme[p->index-1][i]; - if (c == ' ') - c = '_'; - putc(c, outtree); - } - col += n; - } else { - putc('(', outtree); - col++; - - inloop = false; - q = p->next; - do { - if (inloop) { - putc(',', outtree); - col++; - if (col > 45) { - putc('\n', outtree); - col = 0; - } - } - inloop = true; - dnaml_treeout(q->back); - q = q->next; - } while ((p == curtree.start || p != q) && - (p != curtree.start || p->next != q)); - - putc(')', outtree); - col++; - } - x = p->v; - if (x > 0.0) - w = (long)(0.43429448222 * log(x)); - else if (x == 0.0) - w = 0; - else - w = (long)(0.43429448222 * log(-x)) + 1; - if (w < 0) - w = 0; - if (p == curtree.start) - fprintf(outtree, ";\n"); - else { - fprintf(outtree, ":%*.5f", (int)(w + 7), x); - col += w + 8; - } -} /* dnaml_treeout */ - - -void buildnewtip(long m, tree *tr) -{ - node *p; - - p = tr->nodep[nextsp + spp - 3]; - hookup(tr->nodep[m - 1], p); - p->v = initialv; - p->back->v = initialv; -} /* buildnewtip */ - - -void buildsimpletree(tree *tr) -{ - hookup(tr->nodep[enterorder[0] - 1], tr->nodep[enterorder[1] - 1]); - tr->nodep[enterorder[0] - 1]->v = 1.0; - tr->nodep[enterorder[0] - 1]->back->v = 1.0; - tr->nodep[enterorder[1] - 1]->v = 1.0; - tr->nodep[enterorder[1] - 1]->back->v = 1.0; - buildnewtip(enterorder[2], tr); - insert_(tr->nodep[enterorder[2] - 1]->back, - tr->nodep[enterorder[0] - 1], false); -} /* buildsimpletree */ - - -void free_all_protx (long nonodes, pointarray treenode) -{ - /* used in proml */ - long i, j, k; - node *p; - - /* Zero thru spp are tips, */ - for (i = 0; i < spp; i++) { - for (j = 0; j < endsite; j++) - free(treenode[i]->protx[j]); - free(treenode[i]->protx); - } - - /* The rest are rings (i.e. triads) */ - for (i = spp; i < nonodes; i++) { - if (treenode[i] != NULL) { - p = treenode[i]; - do { - for (k = 0; k < endsite; k++) - free(p->protx[k]); - free(p->protx); - p = p->next; - } while (p != treenode[i]); - } - } -} /* free_all_protx */ - -void proml_unroot(node* root, node** nodep, long nonodes) -{ - node *r,*q,*tmpnode; - double newl; - long i; - long numsibs; - - numsibs = count_sibs(root); - - if ( numsibs > 2 ) { - q = root; - r = root; - while (!(q->next == root)) - q = q->next; - q->next = root->next; - root = q; - for(i=0 ; i < endsite ; i++){ - free(r->protx[i]); - r->protx[i] = NULL; - } - free(r->protx); - r->protx = NULL; - chucktreenode(&grbg, r); - curtree.nodep[spp] = q; - } else if ( root->next->next->next == root) { - newl = root->next->oldlen + root->next->next->oldlen; - root->next->back->oldlen = newl; - root->next->next->back->oldlen = newl; - - newl = root->next->v + root->next->next->v; - root->next->back->v = newl; - root->next->next->back->v = newl; - - root->next->back->back=root->next->next->back; - root->next->next->back->back = root->next->back; - while ( root->index != nonodes ) { - tmpnode = nodep[ root->index ]; - nodep[root->index] = root; - root->index++; - root->next->index++; - root->next->next->index++; - nodep[root->index - 2] = tmpnode; - tmpnode->index--; - tmpnode->next->index--; - tmpnode->next->next->index--; - } - nodep[nonodes -1] = NULL; - for(i=0 ; i < endsite ; i++){ - free(root->protx[i]); - free(root->next->protx[i]); - free(root->next->next->protx[i]); - root->protx[i] = NULL; - root->next->protx[i] = NULL; - root->next->next->protx[i] = NULL; - } - free(root->protx); - free(root->next->protx); - free(root->next->next->protx); - - chucktreenode(&grbg,root->next->next); - chucktreenode(&grbg,root->next); - chucktreenode(&grbg,root); - - } -} - - - -void maketree() -{ - long i, j; - boolean dummy_first, goteof; - pointarray dummy_treenode=NULL; - long nextnode; - node *root; - - prot_inittable(); - - if (usertree) { - openfile(&intree,INTREE,"input tree file", "r",progname,intreename); - numtrees = countsemic(&intree); - if(numtrees > MAXSHIMOTREES) - shimotrees = MAXSHIMOTREES; - else - shimotrees = numtrees; - if (numtrees > 2) - initseed(&inseed, &inseed0, seed); - l0gl = (double *) Malloc(shimotrees * sizeof(double)); - l0gf = (double **) Malloc(shimotrees * sizeof(double *)); - for (i=0; i < shimotrees; ++i) - l0gf[i] = (double *) Malloc(endsite * sizeof(double)); - if (treeprint) { - fprintf(outfile, "User-defined tree"); - if (numtrees > 1) - putc('s', outfile); - fprintf(outfile, ":\n\n"); - } - which = 1; - - /* This taken out of tree read, used to be [spp-1], but referring - to [0] produces output identical to what the pre-modified dnaml - produced. */ - - while (which <= numtrees) { - - /* These initializations required each time through the loop - since multiple trees require re-initialization */ - haslengths = true; - nextnode = 0; - dummy_first = true; - goteof = false; - - treeread(intree, &root, dummy_treenode, &goteof, &dummy_first, - curtree.nodep, &nextnode, &haslengths, &grbg, - initpromlnode,false,nonodes2); - proml_unroot(root,curtree.nodep,nonodes2); - if (goteof && (which <= numtrees)) { - /* if we hit the end of the file prematurely */ - printf ("\n"); - printf ("ERROR: trees missing at end of file.\n"); - printf ("\tExpected number of trees:\t\t%ld\n", numtrees); - printf ("\tNumber of trees actually in file:\t%ld.\n\n", which - 1); - exxit (-1); - } - - curtree.start = curtree.nodep[0]->back; - if ( outgropt ) - curtree.start = curtree.nodep[outgrno - 1]->back; - - treevaluate(); - proml_printree(); - summarize(); - if (trout) { - col = 0; - dnaml_treeout(curtree.start); - } - if(which < numtrees){ - prot_freex_notip(nextnode, curtree.nodep); - gdispose(curtree.start, &grbg, curtree.nodep); - } else nonodes2 = nextnode; - which++; - } - FClose(intree); - putc('\n', outfile); - if (!auto_ && numtrees > 1 && weightsum > 1 ) - standev2(numtrees, maxwhich, 0, endsite-1, maxlogl, - l0gl, l0gf, aliasweight, seed); - } else { - /* If there's no input user tree, */ - for (i = 1; i <= spp; i++) - enterorder[i - 1] = i; - if (jumble) - randumize(seed, enterorder); - if (progress) { - printf("\nAdding species:\n"); - writename(0, 3, enterorder); -#ifdef WIN32 - phyFillScreenColor(); -#endif - } - nextsp = 3; - polishing = false; - buildsimpletree(&curtree); - curtree.start = curtree.nodep[enterorder[0] - 1]->back; - smoothit = improve; - nextsp = 4; - while (nextsp <= spp) { - buildnewtip(enterorder[nextsp - 1], &curtree); - bestyet = UNDEFINED; - if (smoothit) - promlcopy(&curtree, &priortree, nonodes2, rcategs); - addtraverse(curtree.nodep[enterorder[nextsp - 1] - 1]->back, - curtree.start, true); - if (smoothit) - promlcopy(&bestree, &curtree, nonodes2, rcategs); - else { - insert_(curtree.nodep[enterorder[nextsp - 1] - 1]->back, qwhere, true); - smoothit = true; - for (i = 1; i<=smoothings; i++) { - smooth(curtree.start); - smooth(curtree.start->back); - } - smoothit = false; - promlcopy(&curtree, &bestree, nonodes2, rcategs); - bestyet = curtree.likelihood; - } - if (progress) { - writename(nextsp - 1, 1, enterorder); -#ifdef WIN32 - phyFillScreenColor(); -#endif - } - if (global && nextsp == spp && progress) { - printf("Doing global rearrangements\n"); - printf(" !"); - for (j = spp ; j < nonodes2 ; j++) - if ( (j - spp) % (( nonodes2 / 72 ) + 1 ) == 0 ) - putchar('-'); - printf("!\n"); -#ifdef WIN32 - phyFillScreenColor(); -#endif - } - succeeded = true; - while (succeeded) { - succeeded = false; - if (global && nextsp == spp && progress) { - printf(" "); - fflush(stdout); - } - if (global && nextsp == spp) - globrearrange(); - else - rearrange(curtree.start, curtree.start->back); - if (global && nextsp == spp && progress) - putchar('\n'); - } - nextsp++; - } - if (global && progress) { - putchar('\n'); - fflush(stdout); -#ifdef WIN32 - phyFillScreenColor(); -#endif - } - promlcopy(&curtree, &bestree, nonodes2, rcategs); - if (njumble > 1) { - if (jumb == 1) - promlcopy(&bestree, &bestree2, nonodes2, rcategs); - else - if (bestree2.likelihood < bestree.likelihood) - promlcopy(&bestree, &bestree2, nonodes2, rcategs); - } - if (jumb == njumble) { - if (njumble > 1) - promlcopy(&bestree2, &curtree, nonodes2, rcategs); - curtree.start = curtree.nodep[outgrno - 1]->back; - for (i = 0; i < nonodes2; i++) { - if (i < spp) - curtree.nodep[i]->initialized = false; - else { - curtree.nodep[i]->initialized = false; - curtree.nodep[i]->next->initialized = false; - curtree.nodep[i]->next->next->initialized = false; - } - } - treevaluate(); - proml_printree(); - summarize(); - if (trout) { - col = 0; - dnaml_treeout(curtree.start); - } - } - } - if (usertree) { - free(l0gl); - for (i=0; i < shimotrees; i++) - free(l0gf[i]); - free(l0gf); - } - prot_freetable(); - if (jumb < njumble) - return; - free(contribution); - free(mp); - for (i=0; i < endsite; i++) - free(term[i]); - free(term); - for (i=0; i < endsite; i++) - free(slopeterm[i]); - free(slopeterm); - for (i=0; i < endsite; i++) - free(curveterm[i]); - free(curveterm); - free_all_protx(nonodes2, curtree.nodep); - if (!usertree) { - free_all_protx(nonodes2, bestree.nodep); - free_all_protx(nonodes2, priortree.nodep); - if (njumble > 1) - free_all_protx(nonodes2, bestree2.nodep); - } - if (progress) { - printf("\n\nOutput written to file \"%s\"\n\n", outfilename); - if (trout) - printf("Tree also written onto file \"%s\"\n", outtreename); - putchar('\n'); - } -} /* maketree */ - - -void clean_up() -{ - /* Free and/or close stuff */ - long i; - - free (rrate); - free (probcat); - free (rate); - /* Seems to require freeing every time... */ - for (i = 0; i < spp; i++) { - free (y[i]); - } - free (y); - free (nayme); - free (enterorder); - free (category); - free (weight); - free (alias); - free (ally); - free (location); - free (aliasweight); - free (probmat); - free (eigmat); - - FClose(infile); - FClose(outfile); - FClose(outtree); -#ifdef MAC - fixmacfile(outfilename); - fixmacfile(outtreename); -#endif -} /* clean_up */ - - -int main(int argc, Char *argv[]) -{ /* Protein Maximum Likelihood */ -#ifdef MAC - argc = 1; /* macsetup("ProML",""); */ - argv[0] = "ProML"; -#endif - init(argc,argv); - progname = argv[0]; - openfile(&infile,INFILE,"input file","r",argv[0],infilename); - openfile(&outfile,OUTFILE,"output file","w",argv[0],outfilename); - mulsets = false; - datasets = 1; - firstset = true; - ibmpc = IBMCRT; - ansi = ANSICRT; - grbg = NULL; - doinit(); - if (ctgry) - openfile(&catfile,CATFILE,"categories file","r",argv[0],catfilename); - if (weights || justwts) - openfile(&weightfile,WEIGHTFILE,"weights file","r",argv[0],weightfilename); - if (trout) - openfile(&outtree,OUTTREE,"output tree file","w",argv[0],outtreename); - for (ith = 1; ith <= datasets; ith++) { - if (datasets > 1) { - fprintf(outfile, "Data set # %ld:\n", ith); - printf("\nData set # %ld:\n", ith); - } - getinput(); - if (ith == 1) - firstset = false; - for (jumb = 1; jumb <= njumble; jumb++) { - max_num_sibs = 0; - maketree(); - } - } - - clean_up(); - printf("Done.\n\n"); -#ifdef WIN32 - phyRestoreConsoleAttributes(); -#endif - return 0; -} /* Protein Maximum Likelihood */ - diff --git a/forester/archive/RIO/others/phylip_mod/src/promlk.c b/forester/archive/RIO/others/phylip_mod/src/promlk.c deleted file mode 100644 index 7717f07..0000000 --- a/forester/archive/RIO/others/phylip_mod/src/promlk.c +++ /dev/null @@ -1,3176 +0,0 @@ - -#include "phylip.h" -#include "seq.h" - -/* version 3.6. (c) Copyright 1986-2004 by the University of Washington - and by Joseph Felsenstein. Written by Joseph Felsenstein and Lucas Mix. - Permission is granted to copy and use this program provided no fee is - charged for it and provided that this copyright notice is not removed. */ - -#define epsilon 0.0001 /* used in makenewv, getthree, update */ -#define over 60 - -typedef long vall[maxcategs]; -typedef double contribarr[maxcategs]; - -#ifndef OLDC -/* function prototypes */ -void init_protmats(void); -void getoptions(void); -void makeprotfreqs(void); -void allocrest(void); -void doinit(void); -void inputoptions(void); -void input_protdata(long); -void makeweights(void); -void prot_makevalues(long, pointarray, long, long, sequence, steptr); -void getinput(void); - -void prot_inittable(void); -void alloc_pmatrix(long); -void make_pmatrix(double **, double **, double **, long, double, double, - double *, double **); -void prot_nuview(node *); -void getthree(node *p, double thigh, double tlow); -void makenewv(node *); -void update(node *); -void smooth(node *); -void promlk_add(node *, node *, node *, boolean); -void promlk_re_move(node **, node **, boolean); - -double prot_evaluate(node *); -void tryadd(node *, node **, node **); -void addpreorder(node *, node *, node *, boolean, boolean); -void restoradd(node *, node *, node *, double); -void tryrearr(node *, boolean *); -void repreorder(node *, boolean *); -void rearrange(node **); -void nodeinit(node *); -void initrav(node *); -void travinit(node *); - -void travsp(node *); -void treevaluate(void); -void promlk_coordinates(node *, long *); -void promlk_drawline(long, double); -void promlk_printree(void); -void describe(node *); -void prot_reconstr(node *, long); -void rectrav(node *, long, long); -void summarize(void); -void promlk_treeout(node *); -void initpromlnode(node **, node **, node *, long, long, long *, long *, - initops, pointarray, pointarray, Char *, Char *, FILE *); -void tymetrav(node *, double *); - -void free_all_protx(long, pointarray); -void maketree(void); -void clean_up(void); -void reallocsites(void); -void prot_freetable(void); -void free_pmatrix(long sib); -/* function prototypes */ -#endif - - -double **tbl; - -Char infilename[100], outfilename[100], intreename[100], outtreename[100], - catfilename[100], weightfilename[100]; -double *rrate; -long sites, weightsum, categs, datasets, ith, njumble, jumb, numtrees, shimotrees; -/* sites = number of sites in actual sequences - numtrees = number of user-defined trees */ -long inseed, inseed0, mx, mx0, mx1; -boolean global, jumble, lngths, trout, usertree, weights, rctgry, ctgry, - auto_, progress, mulsets, firstset, hypstate, smoothit, - polishing, justwts, gama, invar, usejtt, usepmb, usepam; -tree curtree, bestree, bestree2; -node *qwhere, *grbg; -double sumrates, cv, alpha, lambda, lambda1, invarfrac; -long *enterorder; -steptr aliasweight; -double *rate; -longer seed; -double *probcat; -contribarr *contribution; -char aachar[26]="ARNDCQEGHILKMFPSTWYVBZX?*-"; -char *progname; -long rcategs, nonodes2; - - -/* Local variables for maketree, propagated globally for C version: */ -long k, maxwhich, col; -double like, bestyet, tdelta, lnlike, slope, curv, maxlogl; -boolean lastsp, smoothed, succeeded; -double *l0gl; -double x[3], lnl[3]; -double expon1i[maxcategs], expon1v[maxcategs], - expon2i[maxcategs], expon2v[maxcategs]; -node *there; -double **l0gf; -Char ch, ch2; -long **mp; - - -/* Variables introduced to allow for protein probability calculations */ -long max_num_sibs; /* maximum number of siblings used in a */ - /* nuview calculation. determines size */ - /* final size of pmatrices */ -double *eigmat; /* eig matrix variable */ -double **probmat; /* prob matrix variable */ -double ****dpmatrix; /* derivative of pmatrix */ -double ****ddpmatrix; /* derivative of xpmatrix */ -double *****pmatrices; /* matrix of probabilities of protien */ - /* conversion. The 5 subscripts refer */ - /* to sibs, rcategs, categs, final and */ - /* initial states, respectively. */ -double freqaa[20]; /* amino acid frequencies */ - -/* this jtt matrix decomposition due to Elisabeth Tillier */ -static double jtteigmat[] = -{0.0, -0.007031123, -0.006484345, -0.006086499, -0.005514432, --0.00772664, -0.008643413, -0.010620756, -0.009965552, -0.011671808, --0.012222418,-0.004589201, -0.013103714, -0.014048038, -0.003170582, --0.00347935, -0.015311677, -0.016021194, -0.017991454, -0.018911888}; - -static double jttprobmat[20][20] = -{{0.076999996, 0.051000003, 0.043000004, 0.051999998, 0.019999996, 0.041, - 0.061999994, 0.073999997, 0.022999999, 0.052000004, 0.090999997, 0.058999988, - 0.024000007, 0.04, 0.050999992, 0.069, 0.059000006, 0.014000008, 0.032000004, - 0.066000005}, - {0.015604455, -0.068062363, 0.020106264, 0.070723273, 0.011702977, 0.009674053, - 0.074000798, -0.169750458, 0.005560808, -0.008208636, -0.012305869, - -0.063730179, -0.005674643, -0.02116828, 0.104586169, 0.016480839, 0.016765139, - 0.005936994, 0.006046367, -0.0082877}, - {-0.049778281, -0.007118197, 0.003801272, 0.070749616, 0.047506147, - 0.006447017, 0.090522425, -0.053620432, -0.008508175, 0.037170603, - 0.051805545, 0.015413608, 0.019939916, -0.008431976, -0.143511376, - -0.052486072, -0.032116542, -0.000860626, -0.02535993, 0.03843545}, - {-0.028906423, 0.092952047, -0.009615343, -0.067870117, 0.031970392, - 0.048338335, -0.054396304, -0.135916654, 0.017780083, 0.000129242, - 0.031267424, 0.116333586, 0.007499746, -0.032153596, 0.033517051, - -0.013719269, -0.00347293, -0.003291821, -0.02158326, -0.008862168}, - {0.037181176, -0.023106564, -0.004482225, -0.029899635, 0.118139633, - -0.032298569, -0.04683198, 0.05566988, -0.012622847, 0.002023096, - -0.043921088, -0.04792557, -0.003452711, -0.037744513, 0.020822974, - 0.036580187, 0.02331425, -0.004807711, -0.017504496, 0.01086673}, - {0.044754061, -0.002503471, 0.019452517, -0.015611487, -0.02152807, - -0.013131425, -0.03465365, -0.047928912, 0.020608851, 0.067843095, - -0.122130014, 0.002521499, 0.013021646, -0.082891087, -0.061590119, - 0.016270856, 0.051468938, 0.002079063, 0.081019713, 0.082927944}, - {0.058917882, 0.007320741, 0.025278141, 0.000357541, -0.002831285, - -0.032453034, -0.010177288, -0.069447924, -0.034467324, 0.011422358, - -0.128478324, 0.04309667, -0.015319944, 0.113302422, -0.035052393, - 0.046885372, 0.06185183, 0.00175743, -0.06224497, 0.020282093}, - {-0.014562092, 0.022522921, -0.007094389, 0.03480089, -0.000326144, - -0.124039037, 0.020577906, -0.005056454, -0.081841576, -0.004381786, - 0.030826152, 0.091261631, 0.008878828, -0.02829487, 0.042718836, - -0.011180886, -0.012719227, -0.000753926, 0.048062375, -0.009399129}, - {0.033789571, -0.013512235, 0.088010984, 0.017580292, -0.006608005, - -0.037836971, -0.061344686, -0.034268357, 0.018190209, -0.068484614, - 0.120024744, -0.00319321, -0.001349477, -0.03000546, -0.073063759, - 0.081912399, 0.0635245, 0.000197, -0.002481798, -0.09108114}, - {-0.113947615, 0.019230545, 0.088819683, 0.064832765, 0.001801467, - -0.063829682, -0.072001633, 0.018429333, 0.057465965, 0.043901014, - -0.048050874, -0.001705918, 0.022637173, 0.017404665, 0.043877902, - -0.017089594, -0.058489485, 0.000127498, -0.029357194, 0.025943972}, - {0.01512923, 0.023603725, 0.006681954, 0.012360216, -0.000181447, - -0.023011838, -0.008960024, -0.008533239, 0.012569835, 0.03216118, - 0.061986403, -0.001919083, -0.1400832, -0.010669741, -0.003919454, - -0.003707024, -0.026806029, -0.000611603, -0.001402648, 0.065312824}, - {-0.036405351, 0.020816769, 0.011408213, 0.019787053, 0.038897829, - 0.017641789, 0.020858533, -0.006067252, 0.028617353, -0.064259496, - -0.081676567, 0.024421823, -0.028751676, 0.07095096, -0.024199434, - -0.007513119, -0.028108766, -0.01198095, 0.111761119, -0.076198809}, - {0.060831772, 0.144097327, -0.069151377, 0.023754576, -0.003322955, - -0.071618574, 0.03353154, -0.02795295, 0.039519769, -0.023453968, - -0.000630308, -0.098024591, 0.017672997, 0.003813378, -0.009266499, - -0.011192111, 0.016013873, -0.002072968, -0.010022044, -0.012526904}, - {-0.050776604, 0.092833081, 0.044069596, 0.050523021, -0.002628417, - 0.076542572, -0.06388631, -0.00854892, -0.084725311, 0.017401063, - -0.006262541, -0.094457679, -0.002818678, -0.0044122, -0.002883973, - 0.028729685, -0.004961596, -0.001498627, 0.017994575, -0.000232779}, - {-0.01894566, -0.007760205, -0.015160993, -0.027254587, 0.009800903, - -0.013443561, -0.032896517, -0.022734138, -0.001983861, 0.00256111, - 0.024823166, -0.021256768, 0.001980052, 0.028136263, -0.012364384, - -0.013782446, -0.013061091, 0.111173981, 0.021702122, 0.00046654}, - {-0.009444193, -0.042106824, -0.02535015, -0.055125574, 0.006369612, - -0.02945416, -0.069922064, -0.067221068, -0.003004999, 0.053624311, - 0.128862984, -0.057245803, 0.025550508, 0.087741073, -0.001119043, - -0.012036202, -0.000913488, -0.034864475, 0.050124813, 0.055534723}, - {0.145782464, -0.024348311, -0.031216873, 0.106174443, 0.00202862, - 0.02653866, -0.113657267, -0.00755018, 0.000307232, -0.051241158, - 0.001310685, 0.035275877, 0.013308898, 0.002957626, -0.002925034, - -0.065362319, -0.071844582, 0.000475894, -0.000112419, 0.034097762}, - {0.079840455, 0.018769331, 0.078685899, -0.084329807, -0.00277264, - -0.010099754, 0.059700608, -0.019209715, -0.010442992, -0.042100476, - -0.006020556, -0.023061786, 0.017246106, -0.001572858, -0.006703785, - 0.056301316, -0.156787357, -0.000303638, 0.001498195, 0.051363455}, - {0.049628261, 0.016475144, 0.094141653, -0.04444633, 0.005206131, - -0.001827555, 0.02195624, 0.013066683, -0.010415582, -0.022338403, - 0.007837197, -0.023397671, -0.002507095, 0.005177694, 0.017109561, - -0.202340113, 0.069681441, 0.000120736, 0.002201146, 0.004670849}, - {0.089153689, 0.000233354, 0.010826822, -0.004273519, 0.001440618, - 0.000436077, 0.001182351, -0.002255508, -0.000700465, 0.150589876, - -0.003911914, -0.00050154, -0.004564983, 0.00012701, -0.001486973, - -0.018902754, -0.054748555, 0.000217377, -0.000319302, -0.162541651}}; - - -static double pameigmat[] = {0.0, -0.002350753691875762, -0.002701991863800379, - -0.002931612442853115, -0.004262492032364507, -0.005395980482561625, - -0.007141172690079523, -0.007392844756151318, -0.007781761342200766, - -0.00810032066366362, -0.00875299712761124, -0.01048227332164386, - -0.01109594097332267, -0.01298616073142234, -0.01342036228188581, - -0.01552599145527578, -0.01658762802054814, -0.0174893445623765, - -0.01933280832903272, -0.02206353522613025}; - -static double pamprobmat[20][20] = - {{0.087683339901135, 0.04051291829598762, 0.04087846315185977, - 0.04771603459744777, 0.03247095396561266, 0.03784612688594957, - 0.0504933695604875, 0.0898249006830755, 0.03285885059543713, - 0.0357514442352119, 0.0852464099207521, 0.07910313444070642, - 0.01488243946396588, 0.04100101908956829, 0.05158026947089499, - 0.06975497205982451, 0.05832757042475474, 0.00931264523877807, - 0.03171540880870517, 0.06303972920984541}, - {0.01943453646811026, -0.004492574160484092, 0.007694891061220776, - 0.01278399096887701, 0.0106157418450234, 0.007542140341575122, - 0.01326994069032819, 0.02615565199894889, 0.003123125764490066, - 0.002204507682495444, -0.004782898215768979, 0.01204241965177619, - 0.0007847400096924341, -0.03043626073172116, 0.01221202591902536, - 0.01100527004684405, 0.01116495631339549, -0.0925364931988571, - -0.02622065387931562, 0.00843494142432107}, - {0.01855357100209072, 0.01493642835763868, 0.0127983090766285, - 0.0200533250704364, -0.1681898360107787, 0.01551657969909255, - 0.02128060163107209, 0.03100633591848964, 0.00845480845269879, - 0.000927149370785571, 0.00937207565817036, 0.03490557769673472, - 0.00300443019551563, -0.02590837220264415, 0.01329376859943192, - 0.006854110889741407, 0.01102593860528263, 0.003360844186685888, - -0.03459712356647764, 0.003351477369404443}, - {0.02690642688200102, 0.02131745801890152, 0.0143626616005213, - 0.02405101425725929, 0.05041008641436849, 0.01430925051050233, - 0.02362114036816964, 0.04688381789373886, 0.005250115453626377, - -0.02040112168595516, -0.0942720776915669, 0.03773004996758644, - -0.00822831940782616, -0.1164872809439224, 0.02286281877257392, - 0.02849551240669926, 0.01468856796295663, 0.02377110964207936, - -0.094380545436577, -0.02089068498518036}, - {0.00930172577225213, 0.01493463068441099, 0.020186920775608, - 0.02892154953912524, -0.01224593358361567, 0.01404228329986624, - 0.02671186617119041, 0.04537535161795231, 0.02229995804098249, - -0.04635704133961575, -0.1966910360247138, 0.02796648065439046, - -0.02263484732621436, 0.0440490503242072, 0.01148782948302166, - 0.01989170531824069, 0.001306805142981245, -0.005676690969116321, - 0.07680476281625202, -0.07967537039721849}, - {0.06602274245435476, -0.0966661981471856, -0.005241648783844579, - 0.00859135188171146, -0.007762129660943368, -0.02888965572526196, - 0.003592291525888222, 0.1668410669287673, -0.04082039290551406, - 0.005233775047553415, -0.01758244726137135, -0.1493955762326898, - -0.00855819137835548, 0.004211419253492328, 0.01929306335052688, - 0.03008056746359405, 0.0190444422412472, 0.005577189741419315, - 0.0000874156155112068, 0.02634091459108298}, - {0.01933897472880726, 0.05874583569377844, -0.02293534606228405, - -0.07206314017962175, -0.004580681581546643, -0.0628814337610561, - -0.0850783812795136, 0.07988417636610614, -0.0852798990133397, - 0.01649047166155952, -0.05416647263757423, 0.1089834536254064, - 0.005093403979413865, 0.02520300254161142, 0.0005951431406455604, - 0.02441251821224675, 0.02796099482240553, -0.002574933994926502, - -0.007172237553012804, 0.03002455129086954}, - {0.04041118479094272, -0.002476225672095412, -0.01494505811263243, - -0.03759443758599911, -0.00892246902492875, -0.003634714029239211, - -0.03085671837973749, -0.126176309029931, 0.005814031139083794, - 0.01313561962646063, -0.04760487162503322, -0.0490563712725484, - -0.005082243450421558, -0.01213634309383557, 0.1806666927079249, - 0.02111663336185495, 0.02963486860587087, -0.0000175020101657785, - 0.01197155383597686, 0.0357526792184636}, - {-0.01184769557720525, 0.01582776076338872, -0.006570708266564639, - -0.01471915653734024, 0.00894343616503608, 0.00562664968033149, - -0.01465878888356943, 0.05365282692645818, 0.00893509735776116, - -0.05879312944436473, 0.0806048683392995, -0.007722897986905326, - -0.001819943882718859, 0.0942535573077267, 0.07483883782251654, - 0.004354639673913651, -0.02828804845740341, -0.001318222184691827, - -0.07613149604246563, -0.1251675867732172}, - {0.00834167031558193, -0.01509357596974962, 0.007098172811092488, - 0.03127677418040319, 0.001992448468465455, 0.00915441566808454, - 0.03430175973499201, -0.0730648147535803, -0.001402707145575659, - 0.04780949194330815, -0.1115035603461273, -0.01292297197609604, - -0.005056270550868528, 0.1112053349612027, -0.03801929822379964, - -0.001191241001736563, 0.01872874622910247, 0.0005314214903865993, - -0.0882576318311789, 0.07607183599610171}, - {-0.01539460099727769, 0.04988596184297883, -0.01187240760647617, - -0.06987843637091853, -0.002490472846497859, 0.01009857892494956, - -0.07473588067847209, 0.0906009925879084, 0.1243612446505172, - 0.02152806401345371, -0.03504879644860233, -0.06680752427613573, - -0.005574485153629651, 0.001518282948127752, -0.01999168507510701, - -0.01478606199529457, -0.02203749419458996, -0.00132680708294333, - -0.01137505997867614, 0.05332658773667142}, - {-0.06104378736432388, 0.0869446603393548, -0.03298331234537257, - 0.03128515657456024, 0.003906358569208259, 0.03578694104193928, - 0.06241936133189683, 0.06182827284921748, -0.05566564263245907, - 0.02640868588189002, -0.01349751243059039, -0.05507866642582638, - -0.006671347738489326, -0.001470096466016046, 0.05185743641479938, - -0.07494697511168257, -0.1175185439057584, -0.001188074094105709, - 0.00937934805737347, 0.05024773745437657}, - {-0.07252555582124737, -0.116554459356382, 0.003605361887406413, - -0.00836518656029184, 0.004615715410745561, 0.005105376617651312, - -0.00944938657024391, 0.05602449420950007, 0.02722719610561933, - 0.01959357494748446, -0.0258655103753962, 0.1440733975689835, - 0.01446782819722976, 0.003718896062070054, 0.05825843045655135, - -0.06230154142733073, -0.07833704962300169, 0.003160836143568724, - -0.001169873777936648, 0.03471745590503304}, - {-0.03204352258752698, 0.01019272923862322, 0.04509668708733181, - 0.05756522429120813, -0.0004601149081726732, -0.0984718150777423, - -0.01107826100664925, -0.005680277810520585, 0.01962359392320817, - 0.01550006899131986, 0.05143956925922197, 0.02462476682588468, - -0.0888843861002653, -0.00171553583659411, 0.01606331750661664, - 0.001176847743518958, -0.02070972978912828, -0.000341523293579971, - -0.002654732745607882, 0.02075709428885848}, - {0.03595199666430258, -0.02800219615234468, -0.04341570015493925, - -0.0748275906176658, 0.0001051403676377422, 0.1137431321746627, - 0.005852087565974318, 0.003443037513847801, -0.02481931657706633, - -0.003651181839831423, 0.03195794176786321, 0.04135411406392523, - -0.07562030263210619, 0.001769332364699, -0.01984381173403915, - -0.005029750745010152, 0.02649253902476472, 0.000518085571702734, - 0.001062936684474851, 0.01295950668914449}, - {-0.16164552322896, -0.0006050035060464324, 0.0258380054414968, - 0.003188424740960557, -0.0002058911341821877, 0.03157555987384681, - -0.01678913462596107, 0.03096216145389774, -0.0133791110666919, - 0.1125249625204277, -0.00769017706442472, -0.02653938062180483, - -0.002555329863523985, -0.00861833362947954, 0.01775148884754278, - 0.02529310679774722, 0.0826243417011238, -0.0001036728183032624, - 0.001963562313294209, -0.0935900561309786}, - {0.1652394174588469, -0.002814245280784351, -0.0328982001821263, - -0.02000104712964131, 0.0002208121995725443, -0.02733462178511839, - 0.02648078162927627, -0.01788316626401427, 0.01630747623755998, - 0.1053849023838147, -0.005447706553811218, 0.01810876922536839, - -0.001808914710282444, -0.007687912115607397, -0.01332593672114388, - -0.02110750894891371, -0.07456116592983384, 0.000219072589592394, - 0.001270886972191055, -0.1083616930749109}, - {0.02453279389716254, -0.005820072356487439, 0.100260287284095, - 0.01277522280305745, -0.003184943445296999, 0.05814689527984152, - -0.0934012278200201, -0.03017986487349484, -0.03136625380994165, - 0.00988668352785117, -0.00358900410973142, -0.02017443675004764, - 0.000915384582922184, -0.001460963415183106, -0.01370112443251124, - 0.1130040979284457, -0.1196161771323699, -0.0005800211204222045, - -0.0006153403201024954, 0.00416806428223025}, - {-0.0778089244252535, -0.007055161182430869, -0.0349307504860869, - -0.0811915584276571, -0.004689825871599125, -0.03726108871471753, - 0.1072225647141469, -0.00917015113070944, 0.01381628985996913, - -0.00123227881492089, 0.001815954515275675, 0.005708744099349901, - -0.0001448985044877925, -0.001306578795561384, -0.006992743514185243, - 0.1744720240732789, -0.05353628497814023, -0.0007613684227234787, - -0.0003550282315997644, 0.01340106423804634}, - {-0.0159527329868513, -0.007622151568160798, -0.1389875105184963, - 0.1165051999914764, -0.002217810389087748, 0.01550003226513692, - -0.07427664222230566, -0.003371438498619264, 0.01385754771325365, - 0.004759020167383304, 0.001624078805220564, 0.02011638303109029, - -0.001717827082842178, -0.0007424036708598594, -0.003978884451898934, - 0.0866418927301209, -0.01280817739158123, -0.00023039242454603, - 0.002309205802479111, 0.0005926106991001195}}; - -/* this pmb matrix decomposition due to Elisabeth Tillier */ -static double pmbeigmat[20] = -{0.0000001586972220,-1.8416770496147100, -1.6025046986139100,-1.5801012515121300, --1.4987794099715900,-1.3520794233801900,-1.3003469390479700,-1.2439503327631300, --1.1962574080244200,-1.1383730501367500,-1.1153278910708000,-0.4934843510654760, --0.5419014550215590,-0.9657997830826700,-0.6276075673757390,-0.6675927795018510, --0.6932641383465870,-0.8897872681859630,-0.8382698977371710,-0.8074694642446040}; - -static double pmbprobmat[20][20] = -{{0.0771762457248147,0.0531913844998640,0.0393445076407294,0.0466756566755510, -0.0286348361997465,0.0312327748383639,0.0505410248721427,0.0767106611472993, -0.0258916271688597,0.0673140562194124,0.0965705469252199,0.0515979465932174, -0.0250628079438675,0.0503492018628350,0.0399908189418273,0.0641898881894471, -0.0517539616710987,0.0143507440546115,0.0357994592438322,0.0736218495862984}, -{0.0368263046116572,-0.0006728917107827,0.0008590805287740,-0.0002764255356960, -0.0020152937187455,0.0055743720652960,0.0003213317669367,0.0000449190281568, --0.0004226254397134,0.1805040629634510,-0.0272246813586204,0.0005904606533477, --0.0183743200073889,-0.0009194625608688,0.0008173657533167,-0.0262629806302238, -0.0265738757209787,0.0002176606241904,0.0021315644838566,-0.1823229927207580}, -{-0.0194800075560895,0.0012068088610652,-0.0008803318319596,-0.0016044273960017, --0.0002938633803197,-0.0535796754602196,0.0155163896648621,-0.0015006360762140, -0.0021601372013703,0.0268513218744797,-0.1085292493742730,0.0149753083138452, -0.1346457366717310,-0.0009371698759829,0.0013501708044116,0.0346352293103622, --0.0276963770242276,0.0003643142783940,0.0002074817333067,-0.0174108903914110}, -{0.0557839400850153,0.0023271577185437,0.0183481103396687,0.0023339480096311, -0.0002013267015151,-0.0227406863569852,0.0098644845475047,0.0064721276774396, -0.0001389408104210,-0.0473713878768274,-0.0086984445005797,0.0026913674934634, -0.0283724052562196,0.0001063665179457,0.0027442574779383,-0.1875312134708470, -0.1279864877057640,0.0005103347834563,0.0003155113168637,0.0081451082759554}, -{0.0037510125027265,0.0107095920636885,0.0147305410328404,-0.0112351252180332, --0.0001500408626446,-0.1523450933729730,0.0611532413339872,-0.0005496748939503, -0.0048714378736644,-0.0003826320053999,0.0552010244407311,0.0482555671001955, --0.0461664995115847,-0.0021165008617978,-0.0004574454232187,0.0233755883688949, --0.0035484915422384,0.0009090698422851,0.0013840637687758,-0.0073895139302231}, -{-0.0111512564930024,0.1025460064723080,0.0396772456883791,-0.0298408501361294, --0.0001656742634733,-0.0079876311843289,0.0712644184507945,-0.0010780604625230, --0.0035880882043592,0.0021070399334252,0.0016716329894279,-0.1810123023850110, -0.0015141703608724,-0.0032700852781804,0.0035503782441679,0.0118634302028026, -0.0044561606458028,-0.0001576678495964,0.0023470722225751,-0.0027457045397157}, -{0.1474525743949170,-0.0054432538500293,0.0853848892349828,-0.0137787746207348, --0.0008274830358513,0.0042248844582553,0.0019556229305563,-0.0164191435175148, --0.0024501858854849,0.0120908948084233,-0.0381456105972653,0.0101271614855119, --0.0061945941321859,0.0178841099895867,-0.0014577779202600,-0.0752120602555032, --0.1426985695849920,0.0002862275078983,-0.0081191734261838,0.0313401149422531}, -{0.0542034611735289,-0.0078763926211829,0.0060433542506096,0.0033396210615510, -0.0013965072374079,0.0067798903832256,-0.0135291136622509,-0.0089982442731848, --0.0056744537593887,-0.0766524225176246,0.1881210263933930,-0.0065875518675173, -0.0416627569300375,-0.0953804133524747,-0.0012559228448735,0.0101622644292547, --0.0304742453119050,0.0011702318499737,0.0454733434783982,-0.1119239362388150}, -{0.1069409037912470,0.0805064400880297,-0.1127352030714600,0.1001181253523260, --0.0021480427488769,-0.0332884841459003,-0.0679837575848452,-0.0043812841356657, -0.0153418716846395,-0.0079441315103188,-0.0121766182046363,-0.0381127991037620, --0.0036338726532673,0.0195324059593791,-0.0020165963699984,-0.0061222685010268, --0.0253761448771437,-0.0005246410999057,-0.0112205170502433,0.0052248485517237}, -{-0.0325247648326262,0.0238753651653669,0.0203684886605797,0.0295666232678825, --0.0003946714764213,-0.0157242718469554,-0.0511737848084862,0.0084725632040180, --0.0167068828528921,0.0686962159427527,-0.0659702890616198,-0.0014289912494271, --0.0167000964093416,-0.1276689083678200,0.0036575057830967,-0.0205958145531018, -0.0000368919612829,0.0014413626622426,0.1064360941926030,0.0863372661517408}, -{-0.0463777468104402,0.0394712148670596,0.1118686750747160,0.0440711686389031, --0.0026076286506751,-0.0268454015202516,-0.1464943067133240,-0.0137514051835380, --0.0094395514284145,-0.0144124844774228,0.0249103379323744,-0.0071832157138676, -0.0035592787728526,0.0415627419826693,0.0027040097365669,0.0337523666612066, -0.0316121324137152,-0.0011350177559026,-0.0349998884574440,-0.0302651879823361}, -{0.0142360925194728,0.0413145623127025,0.0324976427846929,0.0580930922002398, --0.0586974207121084,0.0202001168873069,0.0492204086749069,0.1126593173463060, -0.0116620013776662,-0.0780333711712066,-0.1109786767320410,0.0407775100936731, --0.0205013161312652,-0.0653458585025237,0.0347351829703865,0.0304448983224773, -0.0068813748197884,-0.0189002309261882,-0.0334507528405279,-0.0668143558699485}, -{-0.0131548829657936,0.0044244322828034,-0.0050639951827271,-0.0038668197633889, --0.1536822386530220,0.0026336969165336,0.0021585651200470,-0.0459233839062969, -0.0046854727140565,0.0393815434593599,0.0619554007991097,0.0027456299925622, -0.0117574347936383,0.0373018612990383,0.0024818527553328,-0.0133956606027299, --0.0020457128424105,0.0154178819990401,0.0246524142683911,0.0275363065682921}, -{-0.1542307272455030,0.0364861558267547,-0.0090880407008181,0.0531673937889863, -0.0157585615170580,0.0029986538457297,0.0180194047699875,0.0652152443589317, -0.0266842840376180,0.0388457366405908,0.0856237634510719,0.0126955778952183, -0.0099593861698250,-0.0013941794862563,0.0294065511237513,-0.1151906949298290, --0.0852991447389655,0.0028699120202636,-0.0332087026659522,0.0006811857297899}, -{0.0281300736924501,-0.0584072081898638,-0.0178386569847853,-0.0536470338171487, --0.0186881656029960,-0.0240008730656106,-0.0541064820498883,0.2217137098936020, --0.0260500001542033,0.0234505236798375,0.0311127151218573,-0.0494139126682672, -0.0057093465049849,0.0124937286655911,-0.0298322975915689,0.0006520211333102, --0.0061018680727128,-0.0007081999479528,-0.0060523759094034,0.0215845995364623}, -{0.0295321046399105,-0.0088296411830544,-0.0065057049917325,-0.0053478115612781, --0.0100646496794634,-0.0015473619084872,0.0008539960632865,-0.0376381933046211, --0.0328135588935604,0.0672161874239480,0.0667626853916552,-0.0026511651464901, -0.0140451514222062,-0.0544836996133137,0.0427485157912094,0.0097455780205802, -0.0177309072915667,-0.0828759701187452,-0.0729504795471370,0.0670731961252313}, -{0.0082646581043963,-0.0319918630534466,-0.0188454445200422,-0.0374976353856606, -0.0037131290686848,-0.0132507796987883,-0.0306958830735725,-0.0044119395527308, --0.0140786756619672,-0.0180512599925078,-0.0208243802903953,-0.0232202769398931, --0.0063135878270273,0.0110442171178168,0.1824538048228460,-0.0006644614422758, --0.0069909097436659,0.0255407650654681,0.0099119399501151,-0.0140911517070698}, -{0.0261344441524861,-0.0714454044548650,0.0159436926233439,0.0028462736216688, --0.0044572637889080,-0.0089474834434532,-0.0177570282144517,-0.0153693244094452, -0.1160919467206400,0.0304911481385036,0.0047047513411774,-0.0456535116423972, -0.0004491494948617,-0.0767108879444462,-0.0012688533741441,0.0192445965934123, -0.0202321954782039,0.0281039933233607,-0.0590403018490048,0.0364080426546883}, -{0.0115826306265004,0.1340228176509380,-0.0236200652949049,-0.1284484655137340, --0.0004742338006503,0.0127617346949511,-0.0428560878860394,0.0060030732454125, -0.0089182609926781,0.0085353834972860,0.0048464809638033,0.0709740071429510, -0.0029940462557054,-0.0483434904493132,-0.0071713680727884,-0.0036840391887209, -0.0031454003250096,0.0246243550241551,-0.0449551277644180,0.0111449232769393}, -{0.0140356721886765,-0.0196518236826680,0.0030517022326582,0.0582672093364850, --0.0000973895685457,0.0021704767224292,0.0341806268602705,-0.0152035987563018, --0.0903198657739177,0.0259623214586925,0.0155832497882743,-0.0040543568451651, -0.0036477631918247,-0.0532892744763217,-0.0142569373662724,0.0104500681408622, -0.0103483945857315,0.0679534422398752,-0.0768068882938636,0.0280289727046158}} -; - - -void init_protmats() -{ - long l, m; - - eigmat = (double *) Malloc (20 * sizeof(double)); - for (l = 0; l <= 19; l++) - if (usejtt) - eigmat[l] = jtteigmat[l]*100.0; - else { - if (usepmb) - eigmat[l] = pmbeigmat[l]; - else - eigmat[l] = pameigmat[l]*100.0; - } - probmat = (double **) Malloc (20 * sizeof(double *)); - for (l = 0; l < 20; l++) - probmat[l] = (double *) Malloc (20 * sizeof(double)); - for (l = 0; l <= 19; l++) - for (m= 0; m <= 19; m++) - if (usejtt) - probmat[l][m] = jttprobmat[l][m]; - else { - if (usepmb) - probmat[l][m] = pmbprobmat[l][m]; - else - probmat[l][m] = pamprobmat[l][m]; - } -} /* init_protmats */ - - -void getoptions() -{ - /* interactively set options */ - long i, loopcount, loopcount2; - Char ch; - boolean done; - boolean didchangecat, didchangercat; - double probsum; - - fprintf(outfile, "\nAmino acid sequence\n"); - fprintf(outfile, " Maximum Likelihood method with molecular "); - fprintf(outfile, "clock, version %s\n\n", VERSION); - - putchar('\n'); - auto_ = false; - ctgry = false; - didchangecat = false; - rctgry = false; - didchangercat = false; - categs = 1; - rcategs = 1; - gama = false; - global = false; - hypstate = false; - invar = false; - jumble = false; - njumble = 1; - lambda = 1.0; - lambda1 = 0.0; - lngths = false; - trout = true; - usepam = false; - usepmb = false; - usejtt = true; - usertree = false; - weights = false; - printdata = false; - progress = true; - treeprint = true; - interleaved = true; - loopcount = 0; - do { - cleerhome(); - printf("\nAmino acid sequence\n"); - printf(" Maximum Likelihood method with molecular clock, version %s\n\n", - VERSION); - printf("Settings for this run:\n"); - printf(" U Search for best tree?"); - if (usertree) - printf(" No, use user trees in input file\n"); - else - printf(" Yes\n"); - printf(" P JTT, PMB or PAM probability model? %s\n", - usejtt ? "Jones-Taylor-Thornton" : - usepmb ? "Henikoff/Tillier PMB" : "Dayhoff PAM"); - if (usertree) { - printf(" L Use lengths from user tree?"); - if (lngths) - printf(" Yes\n"); - else - printf(" No\n"); - } - printf(" C One category of substitution rates?"); - if (!ctgry) - printf(" Yes\n"); - else - printf(" %ld categories\n", categs); - printf(" R Rate variation among sites?"); - if (!rctgry) - printf(" constant rate of change\n"); - else { - if (gama) - printf(" Gamma distributed rates\n"); - else { - if (invar) - printf(" Gamma+Invariant sites\n"); - else - printf(" user-defined HMM of rates\n"); - } - printf(" A Rates at adjacent sites correlated?"); - if (!auto_) - printf(" No, they are independent\n"); - else - printf(" Yes, mean block length =%6.1f\n", 1.0 / lambda); - } - if (!usertree) { - printf(" G Global rearrangements?"); - if (global) - printf(" Yes\n"); - else - printf(" No\n"); - } - printf(" W Sites weighted? %s\n", - (weights ? "Yes" : "No")); - if (!usertree) { - printf(" J Randomize input order of sequences?"); - if (jumble) - printf(" Yes (seed = %8ld, %3ld times)\n", inseed0, njumble); - else - printf(" No. Use input order\n"); - } - printf(" M Analyze multiple data sets?"); - if (mulsets) - printf(" Yes, %2ld %s\n", datasets, - (justwts ? "sets of weights" : "data sets")); - else - printf(" No\n"); - printf(" I Input sequences interleaved?"); - if (interleaved) - printf(" Yes\n"); - else - printf(" No, sequential\n"); - printf(" 0 Terminal type (IBM PC, ANSI, none)?"); - if (ibmpc) - printf(" IBM PC\n"); - if (ansi) - printf(" ANSI\n"); - if (!(ibmpc || ansi)) - printf(" (none)\n"); - printf(" 1 Print out the data at start of run"); - if (printdata) - printf(" Yes\n"); - else - printf(" No\n"); - printf(" 2 Print indications of progress of run"); - if (progress) - printf(" Yes\n"); - else - printf(" No\n"); - printf(" 3 Print out tree"); - if (treeprint) - printf(" Yes\n"); - else - printf(" No\n"); - printf(" 4 Write out trees onto tree file?"); - if (trout) - printf(" Yes\n"); - else - printf(" No\n"); - printf(" 5 Reconstruct hypothetical sequences? %s\n", - (hypstate ? "Yes" : "No")); - printf("\nAre these settings correct? "); - printf("(type Y or the letter for one to change)\n"); -#ifdef WIN32 - phyFillScreenColor(); -#endif - scanf("%c%*[^\n]", &ch); - getchar(); - if (ch == '\n') - ch = ' '; - uppercase(&ch); - done = (ch == 'Y'); - if (!done) { - uppercase(&ch); - if (strchr("UPCRJAFWGLTMI012345", ch) != NULL){ - switch (ch) { - - case 'C': - ctgry = !ctgry; - if (ctgry) { - printf("\nSitewise user-assigned categories:\n\n"); - initcatn(&categs); - if (rate){ - free(rate); - } - rate = (double *) Malloc( categs * sizeof(double)); - didchangecat = true; - initcategs(categs, rate); - } - break; - - case 'P': - if (usejtt) { - usejtt = false; - usepmb = true; - } else { - if (usepmb) { - usepmb = false; - usepam = true; - } else { - usepam = false; - usejtt = true; - } - } - break; - - case 'R': - if (!rctgry) { - rctgry = true; - gama = true; - } else { - if (gama) { - gama = false; - invar = true; - } else { - if (invar) - invar = false; - else - rctgry = false; - } - } - break; - - case 'A': - auto_ = !auto_; - if (auto_) { - initlambda(&lambda); - lambda1 = 1.0 - lambda; - } - break; - - case 'G': - global = !global; - break; - - case 'W': - weights = !weights; - break; - - case 'J': - jumble = !jumble; - if (jumble) - initjumble(&inseed, &inseed0, seed, &njumble); - else njumble = 1; - break; - - case 'L': - lngths = !lngths; - break; - - case 'U': - usertree = !usertree; - break; - - case 'M': - mulsets = !mulsets; - if (mulsets) { - printf("Multiple data sets or multiple weights?"); - loopcount2 = 0; - do { - printf(" (type D or W)\n"); -#ifdef WIN32 - phyFillScreenColor(); -#endif - scanf("%c%*[^\n]", &ch2); - getchar(); - if (ch2 == '\n') - ch2 = ' '; - uppercase(&ch2); - countup(&loopcount2, 10); - } while ((ch2 != 'W') && (ch2 != 'D')); - justwts = (ch2 == 'W'); - if (justwts) - justweights(&datasets); - else - initdatasets(&datasets); - if (!jumble) { - jumble = true; - initjumble(&inseed, &inseed0, seed, &njumble); - } - } - break; - - case 'I': - interleaved = !interleaved; - break; - - case '0': - initterminal(&ibmpc, &ansi); - break; - - case '1': - printdata = !printdata; - break; - - case '2': - progress = !progress; - break; - - case '3': - treeprint = !treeprint; - break; - - case '4': - trout = !trout; - break; - - case '5': - hypstate = !hypstate; - break; - } - } else - printf("Not a possible option!\n"); - } - countup(&loopcount, 100); - } while (!done); - if (gama || invar) { - loopcount = 0; - do { - printf( -"\nCoefficient of variation of substitution rate among sites (must be positive)\n"); - printf( - " In gamma distribution parameters, this is 1/(square root of alpha)\n"); -#ifdef WIN32 - phyFillScreenColor(); -#endif - scanf("%lf%*[^\n]", &cv); - getchar(); - countup(&loopcount, 10); - } while (cv <= 0.0); - alpha = 1.0 / (cv * cv); - } - if (!rctgry) - auto_ = false; - if (rctgry) { - printf("\nRates in HMM"); - if (invar) - printf(" (including one for invariant sites)"); - printf(":\n"); - initcatn(&rcategs); - if (probcat){ - free(probcat); - free(rrate); - } - probcat = (double *) Malloc(rcategs * sizeof(double)); - rrate = (double *) Malloc(rcategs * sizeof(double)); - didchangercat = true; - if (gama) - initgammacat(rcategs, alpha, rrate, probcat); - else { - if (invar) { - loopcount = 0; - do { - printf("Fraction of invariant sites?\n"); - scanf("%lf%*[^\n]", &invarfrac); - getchar(); - countup (&loopcount, 10); - } while ((invarfrac <= 0.0) || (invarfrac >= 1.0)); - initgammacat(rcategs-1, alpha, rrate, probcat); - for (i = 0; i < rcategs-1; i++) - probcat[i] = probcat[i]*(1.0-invarfrac); - probcat[rcategs-1] = invarfrac; - rrate[rcategs-1] = 0.0; - } else { - initcategs(rcategs, rrate); - initprobcat(rcategs, &probsum, probcat); - } - } - } - if (!didchangercat){ - rrate = Malloc( rcategs*sizeof(double)); - probcat = Malloc( rcategs*sizeof(double)); - rrate[0] = 1.0; - probcat[0] = 1.0; - } - if (!didchangecat){ - rate = Malloc( categs*sizeof(double)); - rate[0] = 1.0; - } - init_protmats(); -} /* getoptions */ - - -void makeprotfreqs() -{ - /* calculate amino acid frequencies based on eigmat */ - long i, mineig; - - mineig = 0; - for (i = 0; i <= 19; i++) - if (fabs(eigmat[i]) < fabs(eigmat[mineig])) - mineig = i; - memcpy(freqaa, probmat[mineig], 20 * sizeof(double)); - for (i = 0; i <= 19; i++) - freqaa[i] = fabs(freqaa[i]); -} /* makeprotfreqs */ - -void reallocsites() -{ - long i; - for (i = 0; i < spp; i++) - y[i] = (char *)Malloc(sites * sizeof(char)); - enterorder = (long *)Malloc(spp*sizeof(long)); - weight = (long *)Malloc(sites*sizeof(long)); - category = (long *)Malloc(sites*sizeof(long)); - alias = (long *)Malloc(sites*sizeof(long)); - aliasweight = (long *)Malloc(sites*sizeof(long)); - ally = (long *)Malloc(sites*sizeof(long)); - location = (long *)Malloc(sites*sizeof(long)); - for (i = 0; i < sites; i++) - category[i] = 1; - for (i = 0; i < sites; i++) - weight[i] = 1; - makeweights(); -} - -void allocrest() -{ - long i; - - y = (Char **)Malloc(spp*sizeof(Char *)); - nayme = (naym *)Malloc(spp*sizeof(naym)); - for (i = 0; i < spp; i++) - y[i] = (char *)Malloc(sites * sizeof(char)); - enterorder = (long *)Malloc(spp*sizeof(long)); - weight = (long *)Malloc(sites*sizeof(long)); - category = (long *)Malloc(sites*sizeof(long)); - alias = (long *)Malloc(sites*sizeof(long)); - aliasweight = (long *)Malloc(sites*sizeof(long)); - ally = (long *)Malloc(sites*sizeof(long)); - location = (long *)Malloc(sites*sizeof(long)); -} /* allocrest */ - - -void doinit() -{ - /* initializes variables */ - - inputnumbers(&spp, &sites, &nonodes, 1); - getoptions(); - makeprotfreqs(); - if (printdata) - fprintf(outfile, "%2ld species, %3ld sites\n", spp, sites); - alloctree(&curtree.nodep, nonodes, usertree); - allocrest(); - if (usertree) - return; - alloctree(&bestree.nodep, nonodes, 0); - if (njumble <= 1) - return; - alloctree(&bestree2.nodep, nonodes, 0); -} /* doinit */ - - -void inputoptions() -{ - long i; - - if (!firstset) { - samenumsp(&sites, ith); - reallocsites(); - } - if (firstset) { - for (i = 0; i < sites; i++) - category[i] = 1; - for (i = 0; i < sites; i++) - weight[i] = 1; - } - if (justwts || weights) - inputweights(sites, weight, &weights); - weightsum = 0; - for (i = 0; i < sites; i++) - weightsum += weight[i]; - if ((ctgry && categs > 1) && (firstset || !justwts)) { - inputcategs(0, sites, category, categs, "ProMLK"); - if (printdata) - printcategs(outfile, sites, category, "Site categories"); - } - if (weights && printdata) - printweights(outfile, 0, sites, weight, "Sites"); - fprintf(outfile, "%s model of amino acid change\n\n", - (usejtt ? "Jones-Taylor-Thornton" : - usepmb ? "Henikoff/Tillier PMB" : "Dayhoff PAM")); -} /* inputoptions */ - - -void input_protdata(long chars) -{ - /* input the names and sequences for each species */ - /* used by proml */ - long i, j, k, l, basesread, basesnew; - Char charstate; - boolean allread, done; - - if (printdata) - headings(chars, "Sequences", "---------"); - basesread = 0; - basesnew = 0; - allread = false; - while (!(allread)) { - /* eat white space -- if the separator line has spaces on it*/ - do { - charstate = gettc(infile); - } while (charstate == ' ' || charstate == '\t'); - ungetc(charstate, infile); - if (eoln(infile)) - scan_eoln(infile); - i = 1; - while (i <= spp) { - if ((interleaved && basesread == 0) || !interleaved) - initname(i - 1); - j = (interleaved) ? basesread : 0; - done = false; - while (!done && !eoff(infile)) { - if (interleaved) - done = true; - while (j < chars && !(eoln(infile) || eoff(infile))) { - charstate = gettc(infile); - if (charstate == '\n' || charstate == '\t') - charstate = ' '; - if (charstate == ' ' || (charstate >= '0' && charstate <= '9')) - continue; - uppercase(&charstate); - if ((strchr("ABCDEFGHIKLMNPQRSTVWXYZ*?-", charstate)) == NULL){ - printf("ERROR: bad amino acid: %c at position %ld of species %ld\n", - charstate, j, i); - if (charstate == '.') { - printf(" Periods (.) may not be used as gap characters.\n"); - printf(" The correct gap character is (-)\n"); - } - exxit(-1); - } - j++; - y[i - 1][j - 1] = charstate; - } - if (interleaved) - continue; - if (j < chars) - scan_eoln(infile); - else if (j == chars) - done = true; - } - if (interleaved && i == 1) - basesnew = j; - - scan_eoln(infile); - - if ((interleaved && j != basesnew) || - (!interleaved && j != chars)) { - printf("ERROR: SEQUENCES OUT OF ALIGNMENT AT POSITION %ld.\n", j); - exxit(-1); - } - i++; - } - - if (interleaved) { - basesread = basesnew; - allread = (basesread == chars); - } else - allread = (i > spp); - } - if (!printdata) - return; - for (i = 1; i <= ((chars - 1) / 60 + 1); i++) { - for (j = 1; j <= spp; j++) { - for (k = 0; k < nmlngth; k++) - putc(nayme[j - 1][k], outfile); - fprintf(outfile, " "); - l = i * 60; - if (l > chars) - l = chars; - for (k = (i - 1) * 60 + 1; k <= l; k++) { - if (j > 1 && y[j - 1][k - 1] == y[0][k - 1]) - charstate = '.'; - else - charstate = y[j - 1][k - 1]; - putc(charstate, outfile); - if (k % 10 == 0 && k % 60 != 0) - putc(' ', outfile); - } - putc('\n', outfile); - } - putc('\n', outfile); - } - putc('\n', outfile); -} /* input_protdata */ - - -void makeweights() -{ - /* make up weights vector to avoid duplicate computations */ - long i; - - for (i = 1; i <= sites; i++) { - alias[i - 1] = i; - ally[i - 1] = 0; - aliasweight[i - 1] = weight[i - 1]; - location[i - 1] = 0; - } - sitesort2(sites, aliasweight); - sitecombine2(sites, aliasweight); - sitescrunch2(sites, 1, 2, aliasweight); - for (i = 1; i <= sites; i++) { - if (aliasweight[i - 1] > 0) - endsite = i; - } - for (i = 1; i <= endsite; i++) { - ally[alias[i - 1] - 1] = alias[i - 1]; - location[alias[i - 1] - 1] = i; - } - contribution = (contribarr *) Malloc( endsite*sizeof(contribarr)); -} /* makeweights */ - - -void prot_makevalues(long categs, pointarray treenode, long endsite, - long spp, sequence y, steptr alias) -{ - /* set up fractional likelihoods at tips */ - /* a version of makevalues2 found in seq.c */ - /* used by proml */ - long i, j, k, l; - long b; - - for (k = 0; k < endsite; k++) { - j = alias[k]; - for (i = 0; i < spp; i++) { - for (l = 0; l < categs; l++) { - memset(treenode[i]->protx[k][l], 0, sizeof(double)*20); - switch (y[i][j - 1]) { - - case 'A': - treenode[i]->protx[k][l][0] = 1.0; - break; - - case 'R': - treenode[i]->protx[k][l][(long)arginine - (long)alanine] = 1.0; - break; - - case 'N': - treenode[i]->protx[k][l][(long)asparagine - (long)alanine] = 1.0; - break; - - case 'D': - treenode[i]->protx[k][l][(long)aspartic - (long)alanine] = 1.0; - break; - - case 'C': - treenode[i]->protx[k][l][(long)cysteine - (long)alanine] = 1.0; - break; - - case 'Q': - treenode[i]->protx[k][l][(long)glutamine - (long)alanine] = 1.0; - break; - - case 'E': - treenode[i]->protx[k][l][(long)glutamic - (long)alanine] = 1.0; - break; - - case 'G': - treenode[i]->protx[k][l][(long)glycine - (long)alanine] = 1.0; - break; - - case 'H': - treenode[i]->protx[k][l][(long)histidine - (long)alanine] = 1.0; - break; - - case 'I': - treenode[i]->protx[k][l][(long)isoleucine - (long)alanine] = 1.0; - break; - - case 'L': - treenode[i]->protx[k][l][(long)leucine - (long)alanine] = 1.0; - break; - - case 'K': - treenode[i]->protx[k][l][(long)lysine - (long)alanine] = 1.0; - break; - - case 'M': - treenode[i]->protx[k][l][(long)methionine - (long)alanine] = 1.0; - break; - - case 'F': - treenode[i]->protx[k][l][(long)phenylalanine - (long)alanine] = 1.0; - break; - - case 'P': - treenode[i]->protx[k][l][(long)proline - (long)alanine] = 1.0; - break; - - case 'S': - treenode[i]->protx[k][l][(long)serine - (long)alanine] = 1.0; - break; - - case 'T': - treenode[i]->protx[k][l][(long)threonine - (long)alanine] = 1.0; - break; - - case 'W': - treenode[i]->protx[k][l][(long)tryptophan - (long)alanine] = 1.0; - break; - - case 'Y': - treenode[i]->protx[k][l][(long)tyrosine - (long)alanine] = 1.0; - break; - - case 'V': - treenode[i]->protx[k][l][(long)valine - (long)alanine] = 1.0; - break; - - case 'B': - treenode[i]->protx[k][l][(long)asparagine - (long)alanine] = 1.0; - treenode[i]->protx[k][l][(long)aspartic - (long)alanine] = 1.0; - break; - - case 'Z': - treenode[i]->protx[k][l][(long)glutamine - (long)alanine] = 1.0; - treenode[i]->protx[k][l][(long)glutamic - (long)alanine] = 1.0; - break; - - case 'X': /* unknown aa */ - for (b = 0; b <= 19; b++) - treenode[i]->protx[k][l][b] = 1.0; - break; - - case '?': /* unknown aa */ - for (b = 0; b <= 19; b++) - treenode[i]->protx[k][l][b] = 1.0; - break; - - case '*': /* stop codon symbol */ - for (b = 0; b <= 19; b++) - treenode[i]->protx[k][l][b] = 1.0; - break; - - case '-': /* deletion event-absent data or aa */ - for (b = 0; b <= 19; b++) - treenode[i]->protx[k][l][b] = 1.0; - break; - } - } - } - } -} /* prot_makevalues */ - - -void getinput() -{ - long grcategs; - - /* reads the input data */ - if (!justwts || firstset) - inputoptions(); - if (!justwts || firstset) - input_protdata(sites); - makeweights(); - setuptree2(curtree); - if (!usertree) { - setuptree2(bestree); - if (njumble > 1) - setuptree2(bestree2); - } - grcategs = (categs > rcategs) ? categs : rcategs; - prot_allocx(nonodes, grcategs, curtree.nodep, usertree); - if (!usertree) { - prot_allocx(nonodes, grcategs, bestree.nodep, 0); - if (njumble > 1) - prot_allocx(nonodes, grcategs, bestree2.nodep, 0); - } - prot_makevalues(rcategs, curtree.nodep, endsite, spp, y, alias); -} /* getinput */ - -void prot_freetable(void) -{ - long i,j,k,l; - for (j = 0; j < rcategs; j++) { - for (k = 0; k < categs; k++) { - for (l = 0; l < 20; l++) - free(ddpmatrix[j][k][l]); - free(ddpmatrix[j][k]); - } - free(ddpmatrix[j]); - } - free(ddpmatrix); - - for (j = 0; j < rcategs; j++) { - for (k = 0; k < categs; k++) { - for (l = 0; l < 20; l++) - free(dpmatrix[j][k][l]); - free(dpmatrix[j][k]); - } - free(dpmatrix[j]); - } - free(dpmatrix); - - - for (j = 0; j < rcategs; j++) - free(tbl[j]); - free(tbl); - - for ( i = 0 ; i < max_num_sibs ; i++ ) - free_pmatrix(i); - free(pmatrices); -} - -void prot_inittable() -{ - /* Define a lookup table. Precompute values and print them out in tables */ - /* Allocate memory for the pmatrices, dpmatices and ddpmatrices */ - long i, j, k, l; - double sumrates; - - /* Allocate memory for pmatrices, the array of pointers to pmatrices */ - - pmatrices = (double *****) Malloc (spp * sizeof(double ****)); - - /* Allocate memory for the first 2 pmatrices, the matrix of conversion */ - /* probabilities, but only once per run (aka not on the second jumble. */ - - alloc_pmatrix(0); - alloc_pmatrix(1); - - /* Allocate memory for one dpmatrix, the first derivative matrix */ - - dpmatrix = (double ****) Malloc( rcategs * sizeof(double ***)); - for (j = 0; j < rcategs; j++) { - dpmatrix[j] = (double ***) Malloc( categs * sizeof(double **)); - for (k = 0; k < categs; k++) { - dpmatrix[j][k] = (double **) Malloc( 20 * sizeof(double *)); - for (l = 0; l < 20; l++) - dpmatrix[j][k][l] = (double *) Malloc( 20 * sizeof(double)); - } - } - - /* Allocate memory for one ddpmatrix, the second derivative matrix */ - ddpmatrix = (double ****) Malloc( rcategs * sizeof(double ***)); - for (j = 0; j < rcategs; j++) { - ddpmatrix[j] = (double ***) Malloc( categs * sizeof(double **)); - for (k = 0; k < categs; k++) { - ddpmatrix[j][k] = (double **) Malloc( 20 * sizeof(double *)); - for (l = 0; l < 20; l++) - ddpmatrix[j][k][l] = (double *) Malloc( 20 * sizeof(double)); - } - } - - /* Allocate memory and assign values to tbl, the matrix of possible rates*/ - - tbl = (double **) Malloc( rcategs * sizeof(double *)); - for (j = 0; j < rcategs; j++) - tbl[j] = (double *) Malloc( categs * sizeof(double)); - - for (j = 0; j < rcategs; j++) - for (k = 0; k < categs; k++) - tbl[j][k] = rrate[j]*rate[k]; - - sumrates = 0.0; - for (i = 0; i < endsite; i++) { - for (j = 0; j < rcategs; j++) - sumrates += aliasweight[i] * probcat[j] - * tbl[j][category[alias[i] - 1] - 1]; - } - sumrates /= (double)sites; - for (j = 0; j < rcategs; j++) - for (k = 0; k < categs; k++) { - tbl[j][k] /= sumrates; - } - - if(jumb > 1) - return; - - if (gama || invar) { - fprintf(outfile, "\nDiscrete approximation to gamma distributed rates\n"); - fprintf(outfile, - " Coefficient of variation of rates = %f (alpha = %f)\n", cv, alpha); - } - if (rcategs > 1) { - fprintf(outfile, "\nState in HMM Rate of change Probability\n\n"); - for (i = 0; i < rcategs; i++) - if (probcat[i] < 0.0001) - fprintf(outfile, "%9ld%16.3f%20.6f\n", i+1, rrate[i], probcat[i]); - else if (probcat[i] < 0.001) - fprintf(outfile, "%9ld%16.3f%19.5f\n", i+1, rrate[i], probcat[i]); - else if (probcat[i] < 0.01) - fprintf(outfile, "%9ld%16.3f%18.4f\n", i+1, rrate[i], probcat[i]); - else - fprintf(outfile, "%9ld%16.3f%17.3f\n", i+1, rrate[i], probcat[i]); - putc('\n', outfile); - if (auto_) { - fprintf(outfile, - "Expected length of a patch of sites having the same rate = %8.3f\n", - 1/lambda); - putc('\n', outfile); - } - } - if (categs > 1) { - fprintf(outfile, "\nSite category Rate of change\n\n"); - for (k = 0; k < categs; k++) - fprintf(outfile, "%9ld%16.3f\n", k+1, rate[k]); - fprintf(outfile, "\n\n"); - } -} /* prot_inittable */ - -void free_pmatrix(long sib) -{ - long j,k,l; - - for (j = 0; j < rcategs; j++) { - for (k = 0; k < categs; k++) { - for (l = 0; l < 20; l++) - free(pmatrices[sib][j][k][l]); - free(pmatrices[sib][j][k]); - } - free(pmatrices[sib][j]); - } - free(pmatrices[sib]); -} - -void alloc_pmatrix(long sib) -{ - /* Allocate memory for a new pmatrix. Called iff num_sibs>max_num_sibs */ - long j, k, l; - double ****temp_matrix; - - temp_matrix = (double ****) Malloc (rcategs * sizeof(double ***)); - for (j = 0; j < rcategs; j++) { - temp_matrix[j] = (double ***) Malloc(categs * sizeof(double **)); - for (k = 0; k < categs; k++) { - temp_matrix[j][k] = (double **) Malloc(20 * sizeof (double *)); - for (l = 0; l < 20; l++) - temp_matrix[j][k][l] = (double *) Malloc(20 * sizeof(double)); - } - } - pmatrices[sib] = temp_matrix; - max_num_sibs++; -} /* alloc_pmatrix */ - - -void make_pmatrix(double **matrix, double **dmat, double **ddmat, - long derivative, double lz, double rat, - double *eigmat, double **probmat) -{ - /* Computes the R matrix such that matrix[m][l] is the joint probability */ - /* of m and l. */ - /* Computes a P matrix such that matrix[m][l] is the conditional */ - /* probability of m given l. This is accomplished by dividing all terms */ - /* in the R matrix by freqaa[m], the frequency of l. */ - - long k, l, m; /* (l) original character state */ - /* (m) final character state */ - /* (k) lambda counter */ - double p0, p1, p2, q; - double elambdat[20], delambdat[20], ddelambdat[20]; - /* exponential term for matrix */ - /* and both derivative matrices */ - - for (k = 0; k <= 19; k++) { - elambdat[k] = exp(lz * rat * eigmat[k]); - if(derivative != 0) { - delambdat[k] = (elambdat[k] * rat * eigmat[k]); - ddelambdat[k] = (delambdat[k] * rat * eigmat[k]); - } - } - for (m = 0; m <= 19; m++) { - for (l = 0; l <= 19; l++) { - p0 = 0.0; - p1 = 0.0; - p2 = 0.0; - for (k = 0; k <= 19; k++) { - q = probmat[k][m] * probmat[k][l]; - p0 += (q * elambdat[k]); - if(derivative !=0) { - p1 += (q * delambdat[k]); - p2 += (q * ddelambdat[k]); - } - } - matrix[m][l] = p0 / freqaa[m]; - if(derivative != 0) { - dmat[m][l] = p1 / freqaa[m]; - ddmat[m][l] = p2 / freqaa[m]; - } - } - } -} /* make_pmatrix */ - - -void prot_nuview(node *p) -{ - long b, i, j, k, l, m, num_sibs, sib_index; - node *sib_ptr, *sib_back_ptr; - psitelike prot_xx, x2; - double lw, prod7; - double **pmat; - double maxx,correction; - - /* Figure out how many siblings the current node has */ - /* and be sure that pmatrices is large enough */ - num_sibs = count_sibs(p); - for (i = 0; i < num_sibs; i++) - if (pmatrices[i] == NULL) - alloc_pmatrix(i); - - /* Recursive calls, should be called for all children */ - sib_ptr = p; - for (i=0 ; i < num_sibs; i++) { - sib_ptr = sib_ptr->next; - sib_back_ptr = sib_ptr->back; - - if (!(sib_back_ptr == NULL)) - if (!sib_back_ptr->tip && !sib_back_ptr->initialized) - prot_nuview(sib_back_ptr); - } - - /* Make pmatrices for all possible combinations of category, rcateg */ - /* and sib */ - sib_ptr = p; /* return to p */ - for (sib_index=0; sib_index < num_sibs; sib_index++) { - sib_ptr = sib_ptr->next; - sib_back_ptr = sib_ptr->back; - - if (sib_back_ptr != NULL) - lw = fabs(p->tyme - sib_back_ptr->tyme); - else - lw = 0.0; - - for (j = 0; j < rcategs; j++) - for (k = 0; k < categs; k++) - make_pmatrix(pmatrices[sib_index][j][k], NULL, NULL, 0, lw, - tbl[j][k], eigmat, probmat); - } - - for (i = 0; i < endsite; i++) { - correction = 0; - maxx = 0; - k = category[alias[i]-1] - 1; - for (j = 0; j < rcategs; j++) { - - /* initialize to 1 all values of prot_xx */ - for (m = 0; m <= 19; m++) - prot_xx[m] = 1; - - sib_ptr = p; /* return to p */ - /* loop through all sibs and calculate likelihoods for all possible*/ - /* amino acid combinations */ - for (sib_index=0; sib_index < num_sibs; sib_index++) { - sib_ptr = sib_ptr->next; - sib_back_ptr = sib_ptr->back; - - - if (sib_back_ptr != NULL) { - memcpy(x2, sib_back_ptr->protx[i][j], sizeof(psitelike)); - if ( j == 0 ) - correction += sib_back_ptr->underflows[i]; - } - - else - for (b = 0; b <= 19; b++) - x2[b] = 1.0; - pmat = pmatrices[sib_index][j][k]; - for (m = 0; m <= 19; m++) { - prod7 = 0; - for (l = 0; l <= 19; l++) - prod7 += (pmat[m][l] * x2[l]); - prot_xx[m] *= prod7; - if ( prot_xx[m] > maxx && sib_index == (num_sibs - 1 )) - maxx = prot_xx[m]; - } - } - /* And the final point of this whole function: */ - memcpy(p->protx[i][j], prot_xx, sizeof(psitelike)); - } - p->underflows[i] = 0; - if ( maxx < MIN_DOUBLE ) - fix_protx(p,i,maxx,rcategs); - p->underflows[i] += correction; - } - - p->initialized = true; -} /* prot_nuview */ - - -void getthree(node *p, double thigh, double tlow) -{ - /* compute likelihood at a new triple of points */ - int i; - double tt = p->tyme; - double td = fabs(tdelta); - - x[0] = tt - td; - x[1] = tt; - x[2] = tt + td; - - if ( x[0] < tlow + epsilon ) { - x[0] = tlow + epsilon; - x[1] = ( x[0] + x[2] ) / 2; - } - - if ( x[2] > thigh - epsilon ) { - x[2] = thigh - epsilon; - x[1] = ( x[0] + x[2] ) / 2; - } - - for ( i = 0 ; i < 3 ; i++ ) { - p->tyme = x[i]; - prot_nuview(p); - lnl[i] = prot_evaluate(p); - } -} /* getthree */ - -void makenewv(node *p) -{ - /* improve a node time */ - long it, imin, imax, i, num_sibs; - double tt, tfactor, tlow, thigh, oldlike, ymin, ymax, s32, s21, yold; - boolean done, already; - node *s, *sdown, *sib_ptr, *sib_back_ptr; - - s = curtree.nodep[p->index - 1]; - sdown = s->back; - if (s == curtree.root) - tlow = -10.0; - else - tlow = sdown->tyme; - - sib_ptr = s; - num_sibs = count_sibs(p); - - thigh = s->next->back->tyme; - for (i=0 ; i < num_sibs; i++) { - sib_ptr = sib_ptr->next; - sib_back_ptr = sib_ptr->back; - if (sib_back_ptr->tyme < thigh) - thigh = sib_back_ptr->tyme; - } - done = (thigh - tlow < 4.0*epsilon); - it = 1; - if (s != curtree.root) - tdelta = (thigh - tlow) / 10.0; - else - tdelta = (thigh - s->tyme) / 5.0; - tfactor = 1.0; - if (!done) - getthree(s, thigh, tlow); - while (it < iterations && !done) { - ymax = lnl[0]; - imax = 1; - for (i = 2; i <= 3; i++) { - if (lnl[i - 1] > ymax) { - ymax = lnl[i - 1]; - imax = i; - } - } - if (imax != 2) { - ymax = x[1]; - x[1] = x[imax - 1]; - x[imax - 1] = ymax; - ymax = lnl[1]; - lnl[1] = lnl[imax - 1]; - lnl[imax - 1] = ymax; - } - tt = x[1]; - oldlike = lnl[1]; - yold = tt; - s32 = (lnl[2] - lnl[1]) / (x[2] - x[1]); - s21 = (lnl[1] - lnl[0]) / (x[1] - x[0]); - if (fabs(x[2] - x[0]) > epsilon) - curv = (s32 - s21) / ((x[2] - x[0]) / 2); - else - curv = 0.0; - slope = (s32 + s21) / 2 - curv * (x[2] - 2 * x[1] + x[0]) / 4; - if (curv >= 0.0) { - if (slope < 0) - tdelta = -fabs(tdelta); - else - tdelta = fabs(tdelta); - } else - tdelta = -(tfactor * slope / curv); - if (tt + tdelta <= tlow + epsilon) - tdelta = tlow + epsilon - tt; - if (tt + tdelta >= thigh - epsilon) - tdelta = thigh - epsilon - tt; - tt += tdelta; - done = (fabs(yold - tt) < epsilon || fabs(tdelta) < epsilon); - s->tyme = tt; - prot_nuview(s); - lnlike = prot_evaluate(s); - ymin = lnl[0]; - imin = 1; - for (i = 2; i <= 3; i++) { - if (lnl[i - 1] < ymin) { - ymin = lnl[i - 1]; - imin = i; - } - } - already = (tt == x[0]) || (tt == x[1]) || (tt == x[2]); - if (!already && ymin < lnlike) { - x[imin - 1] = tt; - lnl[imin - 1] = lnlike; - } - if (already || lnlike < oldlike) { - tt = x[1]; - tfactor /= 2; - tdelta /= 2; - curtree.likelihood = oldlike; - lnlike = oldlike; - } else - tfactor = 1.0; - - if (!done) { - sib_ptr = p; - num_sibs = count_sibs(p); - p->tyme = tt; - for (i=0 ; i < num_sibs; i++) { - sib_ptr = sib_ptr->next; - sib_ptr->tyme = tt; - } - - sib_ptr = p; - prot_nuview(p); - for (i=0 ; i < num_sibs; i++) { - sib_ptr = sib_ptr->next; - prot_nuview(sib_ptr); - } - } - - it++; - } - sib_ptr = p; - for (i=0 ; i < num_sibs; i++) { - sib_ptr = sib_ptr->next; - inittrav (sib_ptr); - } - smoothed = smoothed && done; -} /* makenewv */ - - -void update(node *p) -{ - node *sib_ptr, *sib_back_ptr; - long i, num_sibs; - - /* improve time and recompute views at a node */ - if (p == NULL) - return; - if (p->back != NULL) { - if (!p->back->tip && !p->back->initialized) - prot_nuview(p->back); - } - - sib_ptr = p; - num_sibs = count_sibs(p); - for (i=0 ; i < num_sibs; i++) { - sib_ptr = sib_ptr->next; - sib_back_ptr = sib_ptr->back; - if (sib_back_ptr != NULL) { - if (!sib_back_ptr->tip && !sib_back_ptr->initialized) - prot_nuview(sib_back_ptr); - } - } - - if ((!usertree) || (usertree && !lngths) || p->iter) { - makenewv(p); - return; - } - prot_nuview(p); - - sib_ptr = p; - num_sibs = count_sibs(p); - for (i=0 ; i < num_sibs; i++) { - sib_ptr = sib_ptr->next; - prot_nuview(sib_ptr); - } -} /* update */ - - -void smooth(node *p) -{ - node *sib_ptr; - long i, num_sibs; - - if (p == NULL) - return; - if (p->tip) - return; - - update(p); - - smoothed = false; - sib_ptr = p; - num_sibs = count_sibs(p); - for (i=0; i < num_sibs; i++) { - sib_ptr = sib_ptr->next; - if (polishing || (smoothit && !smoothed)) { - smooth(sib_ptr->back); - p->initialized = false; - sib_ptr->initialized = false; - } - update(p); - } -} /* smooth */ - - -void promlk_add(node *below, node *newtip, node *newfork, boolean tempadd) -{ - /* inserts the nodes newfork and its descendant, newtip, into the tree. */ - long i; - boolean done; - node *p; - - below = curtree.nodep[below->index - 1]; - newfork = curtree.nodep[newfork->index-1]; - newtip = curtree.nodep[newtip->index-1]; - if (below->back != NULL) - below->back->back = newfork; - newfork->back = below->back; - below->back = newfork->next->next; - newfork->next->next->back = below; - newfork->next->back = newtip; - newtip->back = newfork->next; - if (newtip->tyme < below->tyme) - p = newtip; - else p = below; - newfork->tyme = p->tyme; - if (curtree.root == below) - curtree.root = newfork; - if (newfork->back != NULL) { - if (p->tyme > newfork->back->tyme) - newfork->tyme = (p->tyme + newfork->back->tyme) / 2.0; - else newfork->tyme = p->tyme - epsilon; - newfork->next->tyme = newfork->tyme; - newfork->next->next->tyme = newfork->tyme; - do { - p = curtree.nodep[p->back->index - 1]; - done = (p == curtree.root); - if (!done) - done = (curtree.nodep[p->back->index - 1]->tyme < p->tyme - epsilon); - if (!done) { - curtree.nodep[p->back->index - 1]->tyme = p->tyme - epsilon; - curtree.nodep[p->back->index - 1]->next->tyme = p->tyme - epsilon; - curtree.nodep[p->back->index - 1]->next->next->tyme = p->tyme - epsilon; - } - } while (!done); - } else { - newfork->tyme = newfork->tyme - 2*epsilon; - newfork->next->tyme = newfork->tyme; - newfork->next->next->tyme = newfork->tyme; - } - inittrav(newtip); - inittrav(newtip->back); - smoothed = false; - i = 1; - while (i < smoothings && !smoothed) { - smoothed = true; - smooth(newfork); - smooth(newfork->back); - i++; - } -} /* promlk_add */ - - -void promlk_re_move(node **item, node **fork, boolean tempadd) -{ - /* removes nodes item and its ancestor, fork, from the tree. - the new descendant of fork's ancestor is made to be - fork's second descendant (other than item). Also - returns pointers to the deleted nodes, item and fork */ - node *p, *q; - long i; - - if ((*item)->back == NULL) { - *fork = NULL; - return; - } - *item = curtree.nodep[(*item)->index-1]; - *fork = curtree.nodep[(*item)->back->index - 1]; - if (curtree.root == *fork) { - if (*item == (*fork)->next->back) - curtree.root = (*fork)->next->next->back; - else - curtree.root = (*fork)->next->back; - } - p = (*item)->back->next->back; - q = (*item)->back->next->next->back; - if (p != NULL) - p->back = q; - if (q != NULL) - q->back = p; - (*fork)->back = NULL; - p = (*fork)->next; - while (p != *fork) { - p->back = NULL; - p = p->next; - } - (*item)->back = NULL; - inittrav(p); - inittrav(q); - if (tempadd) - return; - i = 1; - while (i <= smoothings) { - smooth(q); - if (smoothit) - smooth(q->back); - i++; - } -} /* promlk_re_move */ - - -double prot_evaluate(node *p) -{ - contribarr tterm; - static contribarr like, nulike, clai; - double sum, sum2, sumc=0, y, prod4, prodl, frexm, sumterm, lterm; - double **pmat; - long i, j, k, l, m, lai; - node *q, *r; - psitelike x1, x2; - - sum = 0.0; - - if (p == curtree.root && (count_sibs(p) == 2)) { - r = p->next->back; - q = p->next->next->back; - y = r->tyme + q->tyme - 2 * p->tyme; - if (!r->tip && !r->initialized) prot_nuview (r); - if (!q->tip && !q->initialized) prot_nuview (q); - } else if (p == curtree.root) { - /* the next two lines copy tyme and x to p->next. Normally they are - not initialized for an internal node. */ - /* assumes bifurcation */ - p->next->tyme = p->tyme; - prot_nuview(p->next); - r = p->next; - q = p->next->back; - y = fabs(p->next->tyme - q->tyme); - } else { - r = p; - q = p->back; - if (!r->tip && !r->initialized) prot_nuview (r); - if (!q->tip && !q->initialized) prot_nuview (q); - y = fabs(r->tyme - q->tyme); - } - - for (j = 0; j < rcategs; j++) - for (k = 0; k < categs; k++) - make_pmatrix(pmatrices[0][j][k],NULL,NULL,0,y,tbl[j][k],eigmat,probmat); - for (i = 0; i < endsite; i++) { - k = category[alias[i]-1] - 1; - for (j = 0; j < rcategs; j++) { - memcpy(x1, r->protx[i][j], sizeof(psitelike)); - memcpy(x2, q->protx[i][j], sizeof(psitelike)); - prod4 = 0.0; - pmat = pmatrices[0][j][k]; - for (m = 0; m <= 19; m++) { - prodl = 0.0; - for (l = 0; l <= 19; l++) - prodl += (pmat[m][l] * x2[l]); - frexm = x1[m] * freqaa[m]; - prod4 += (prodl * frexm); - } - tterm[j] = prod4; - } - sumterm = 0.0; - for (j = 0; j < rcategs; j++) - sumterm += probcat[j] * tterm[j]; - if (sumterm < 0.0) - sumterm = 0.00000001; /* ??? */ - lterm = log(sumterm) + p->underflows[i] + q->underflows[i]; - for (j = 0; j < rcategs; j++) - clai[j] = tterm[j] / sumterm; - memcpy(contribution[i], clai, rcategs * sizeof(double)); - if (!auto_ && usertree && (which <= shimotrees)) - l0gf[which - 1][i] = lterm; - sum += aliasweight[i] * lterm; - } - if (auto_) { - for (j = 0; j < rcategs; j++) - like[j] = 1.0; - for (i = 0; i < sites; i++) { - sumc = 0.0; - for (k = 0; k < rcategs; k++) - sumc += probcat[k] * like[k]; - sumc *= lambda; - if ((ally[i] > 0) && (location[ally[i]-1] > 0)) { - lai = location[ally[i] - 1]; - memcpy(clai, contribution[lai - 1], rcategs*sizeof(double)); - for (j = 0; j < rcategs; j++) - nulike[j] = ((1.0 - lambda) * like[j] + sumc) * clai[j]; - } else { - for (j = 0; j < rcategs; j++) - nulike[j] = ((1.0 - lambda) * like[j] + sumc); - } - memcpy(like, nulike, rcategs * sizeof(double)); - } - sum2 = 0.0; - for (i = 0; i < rcategs; i++) - sum2 += probcat[i] * like[i]; - sum += log(sum2); - } - curtree.likelihood = sum; - if (auto_ || !usertree) - return sum; - if(which <= shimotrees) - l0gl[which - 1] = sum; - if (which == 1) { - maxwhich = 1; - maxlogl = sum; - return sum; - } - if (sum > maxlogl) { - maxwhich = which; - maxlogl = sum; - } - return sum; -} /* prot_evaluate */ - - -void tryadd(node *p, node **item, node **nufork) -{ /* temporarily adds one fork and one tip to the tree. - if the location where they are added yields greater - likelihood than other locations tested up to that - time, then keeps that location as there */ - - long grcategs; - grcategs = (categs > rcategs) ? categs : rcategs; - - promlk_add(p, *item, *nufork, true); - like = prot_evaluate(p); - if (lastsp) { - if (like >= bestyet || bestyet == UNDEFINED) - prot_copy_(&curtree, &bestree, nonodes, grcategs); - } - if (like > bestyet || bestyet == UNDEFINED) { - bestyet = like; - there = p; - } - promlk_re_move(item, nufork, true); -} /* tryadd */ - - -void addpreorder(node *p, node *item_, node *nufork_, boolean contin, - boolean continagain) -{ - /* traverses a binary tree, calling function tryadd - at a node before calling tryadd at its descendants */ - node *item, *nufork; - - item = item_; - nufork = nufork_; - if (p == NULL) - return; - tryadd(p, &item, &nufork); - contin = continagain; - if ((!p->tip) && contin) { - addpreorder(p->next->back, item, nufork, contin, continagain); - addpreorder(p->next->next->back, item, nufork, contin, continagain); - } -} /* addpreorder */ - - -void restoradd(node *below, node *newtip, node *newfork, double prevtyme) -{ -/* restore "new" tip and fork to place "below". restore tymes */ -/* assumes bifurcation */ - hookup(newfork, below->back); - hookup(newfork->next, below); - hookup(newtip, newfork->next->next); - curtree.nodep[newfork->index-1] = newfork; - newfork->tyme = prevtyme; -/* assumes bifurcations */ - newfork->next->tyme = prevtyme; - newfork->next->next->tyme = prevtyme; -} /* restoradd */ - - -void tryrearr(node *p, boolean *success) -{ - /* evaluates one rearrangement of the tree. - if the new tree has greater likelihood than the old - one sets success = TRUE and keeps the new tree. - otherwise, restores the old tree */ - node *frombelow, *whereto, *forknode; - double oldlike, prevtyme; - boolean wasonleft; - - if (p == curtree.root) - return; - forknode = curtree.nodep[p->back->index - 1]; - if (forknode == curtree.root) - return; - oldlike = bestyet; - prevtyme = forknode->tyme; -/* the following statement presumes bifurcating tree */ - if (forknode->next->back == p) { - frombelow = forknode->next->next->back; - wasonleft = true; - } - else { - frombelow = forknode->next->back; - wasonleft = false; - } - whereto = curtree.nodep[forknode->back->index - 1]; - promlk_re_move(&p, &forknode, true); - promlk_add(whereto, p, forknode, true); - like = prot_evaluate(p); - if (like <= oldlike && oldlike != UNDEFINED) { - promlk_re_move(&p, &forknode, true); - restoradd(frombelow, p, forknode, prevtyme); - if (wasonleft && (forknode->next->next->back == p)) { - hookup (forknode->next->back, forknode->next->next); - hookup (forknode->next, p); - } - curtree.likelihood = oldlike; - inittrav(forknode); - inittrav(forknode->next); - inittrav(forknode->next->next); - } else { - (*success) = true; - bestyet = like; - } -} /* tryrearr */ - - -void repreorder(node *p, boolean *success) -{ - /* traverses a binary tree, calling function tryrearr - at a node before calling tryrearr at its descendants */ - if (p == NULL) - return; - tryrearr(p, success); - if (p->tip) - return; - if (!(*success)) - repreorder(p->next->back, success); - if (!(*success)) - repreorder(p->next->next->back, success); -} /* repreorder */ - - -void rearrange(node **r) -{ - /* traverses the tree (preorder), finding any local - rearrangement which increases the likelihood. - if traversal succeeds in increasing the tree's - likelihood, function rearrange runs traversal again */ - boolean success; - success = true; - while (success) { - success = false; - repreorder(*r, &success); - } -} /* rearrange */ - - -void nodeinit(node *p) -{ - /* set up times at one node */ - node *sib_ptr, *sib_back_ptr; - long i, num_sibs; - double lowertyme; - - sib_ptr = p; - num_sibs = count_sibs(p); - - /* lowertyme = lowest of children's times */ - lowertyme = p->next->back->tyme; - for (i=0 ; i < num_sibs; i++) { - sib_ptr = sib_ptr->next; - sib_back_ptr = sib_ptr->back; - if (sib_back_ptr->tyme < lowertyme) - lowertyme = sib_back_ptr->tyme; - } - - p->tyme = lowertyme - 0.1; - - sib_ptr = p; - for (i=0 ; i < num_sibs; i++) { - sib_ptr = sib_ptr->next; - sib_back_ptr = sib_ptr->back; - - sib_ptr->tyme = p->tyme; - sib_back_ptr->v = sib_back_ptr->tyme - p->tyme; - sib_ptr->v = sib_back_ptr->v; - } -} /* nodeinit */ - - -void initrav(node *p) -{ - - long i, num_sibs; - node *sib_ptr, *sib_back_ptr; - - /* traverse to set up times throughout tree */ - if (p->tip) - return; - - sib_ptr = p; - num_sibs = count_sibs(p); - for (i=0 ; i < num_sibs; i++) { - sib_ptr = sib_ptr->next; - sib_back_ptr = sib_ptr->back; - initrav(sib_back_ptr); - } - - nodeinit(p); -} /* initrav */ - - -void travinit(node *p) -{ - long i, num_sibs; - node *sib_ptr, *sib_back_ptr; - - /* traverse to set up initial values */ - if (p == NULL) - return; - if (p->tip) - return; - if (p->initialized) - return; - - - sib_ptr = p; - num_sibs = count_sibs(p); - for (i=0 ; i < num_sibs; i++) { - sib_ptr = sib_ptr->next; - sib_back_ptr = sib_ptr->back; - travinit(sib_back_ptr); - } - - prot_nuview(p); - p->initialized = true; -} /* travinit */ - - -void travsp(node *p) -{ - long i, num_sibs; - node *sib_ptr, *sib_back_ptr; - - /* traverse to find tips */ - if (p == curtree.root) - travinit(p); - if (p->tip) - travinit(p->back); - else { - sib_ptr = p; - num_sibs = count_sibs(p); - for (i=0 ; i < num_sibs; i++) { - sib_ptr = sib_ptr->next; - sib_back_ptr = sib_ptr->back; - travsp(sib_back_ptr); - } - } -} /* travsp */ - - -void treevaluate() -{ - /* evaluate likelihood of tree, after iterating branch lengths */ - long i, j, num_sibs; - node *sib_ptr; - - polishing = true; - smoothit = true; - for (i = 0; i < spp; i++) - curtree.nodep[i]->initialized = false; - for (i = spp; i < nonodes; i++) { - sib_ptr = curtree.nodep[i]; - sib_ptr->initialized = false; - num_sibs = count_sibs(sib_ptr); - for (j=0 ; j < num_sibs; j++) { - sib_ptr = sib_ptr->next; - sib_ptr->initialized = false; - } - - } - if (!lngths) - initrav(curtree.root); - travsp(curtree.root); - for (i = 1; i <= smoothings * 4; i++) - smooth(curtree.root); - prot_evaluate(curtree.root); -} /* treevaluate */ - - -void promlk_coordinates(node *p, long *tipy) -{ - /* establishes coordinates of nodes */ - node *q, *first, *last, *pp1 =NULL, *pp2 =NULL; - long num_sibs, p1, p2, i; - - if (p->tip) { - p->xcoord = 0; - p->ycoord = (*tipy); - p->ymin = (*tipy); - p->ymax = (*tipy); - (*tipy) += down; - return; - } - q = p->next; - do { - promlk_coordinates(q->back, tipy); - q = q->next; - } while (p != q); - num_sibs = count_sibs(p); - p1 = (long)((num_sibs+1)/2.0); - p2 = (long)((num_sibs+2)/2.0); - i = 1; - q = p->next; - first = q->back; - do { - if (i == p1) pp1 = q->back; - if (i == p2) pp2 = q->back; - last = q->back; - q = q->next; - i++; - } while (q != p); - p->xcoord = (long)(0.5 - over * p->tyme); - p->ycoord = (pp1->ycoord + pp2->ycoord) / 2; - p->ymin = first->ymin; - p->ymax = last->ymax; -} /* promlk_coordinates */ - - -void promlk_drawline(long i, double scale) -{ - /* draws one row of the tree diagram by moving up tree */ - node *p, *q, *r, *first =NULL, *last =NULL; - long n, j; - boolean extra, done; - - p = curtree.root; - q = curtree.root; - extra = false; - if ((long)(p->ycoord) == i) { - if (p->index - spp >= 10) - fprintf(outfile, "-%2ld", p->index - spp); - else - fprintf(outfile, "--%ld", p->index - spp); - extra = true; - } else - fprintf(outfile, " "); - do { - if (!p->tip) { - r = p->next; - done = false; - do { - if (i >= r->back->ymin && i <= r->back->ymax) { - q = r->back; - done = true; - } - r = r->next; - } while (!(done || r == p)); - first = p->next->back; - r = p->next; - while (r->next != p) - r = r->next; - last = r->back; - } - done = (p == q); - n = (long)(scale * ((long)(p->xcoord) - (long)(q->xcoord)) + 0.5); - if (n < 3 && !q->tip) - n = 3; - if (extra) { - n--; - extra = false; - } - if ((long)(q->ycoord) == i && !done) { - if (p->ycoord != q->ycoord) - putc('+', outfile); - else - putc('-', outfile); - if (!q->tip) { - for (j = 1; j <= n - 2; j++) - putc('-', outfile); - if (q->index - spp >= 10) - fprintf(outfile, "%2ld", q->index - spp); - else - fprintf(outfile, "-%ld", q->index - spp); - extra = true; - } else { - for (j = 1; j < n; j++) - putc('-', outfile); - } - } else if (!p->tip) { - if ((long)(last->ycoord) > i && (long)(first->ycoord) < i && - i != (long)(p->ycoord)) { - putc('!', outfile); - for (j = 1; j < n; j++) - putc(' ', outfile); - } else { - for (j = 1; j <= n; j++) - putc(' ', outfile); - } - } else { - for (j = 1; j <= n; j++) - putc(' ', outfile); - } - if (p != q) - p = q; - } while (!done); - if ((long)(p->ycoord) == i && p->tip) { - for (j = 0; j < nmlngth; j++) - putc(nayme[p->index - 1][j], outfile); - } - putc('\n', outfile); -} /* promlk_drawline */ - - -void promlk_printree() -{ - /* prints out diagram of the tree */ - long tipy; - double scale; - long i; - node *p; - - if (!treeprint) - return; - putc('\n', outfile); - tipy = 1; - promlk_coordinates(curtree.root, &tipy); - p = curtree.root; - while (!p->tip) - p = p->next->back; - scale = 1.0 / (long)(p->tyme - curtree.root->tyme + 1.000); - putc('\n', outfile); - for (i = 1; i <= tipy - down; i++) - promlk_drawline(i, scale); - putc('\n', outfile); -} /* promlk_printree */ - - -void describe(node *p) -{ - long i, num_sibs; - node *sib_ptr, *sib_back_ptr; - double v; - - if (p == curtree.root) - fprintf(outfile, " root "); - else - fprintf(outfile, "%4ld ", p->back->index - spp); - if (p->tip) { - for (i = 0; i < nmlngth; i++) - putc(nayme[p->index - 1][i], outfile); - } else - fprintf(outfile, "%4ld ", p->index - spp); - if (p != curtree.root) { - fprintf(outfile, "%11.5f", (p->tyme - curtree.root->tyme)); - v = (p->tyme - curtree.nodep[p->back->index - 1]->tyme); - fprintf(outfile, "%13.5f", v); - } - putc('\n', outfile); - if (!p->tip) { - - sib_ptr = p; - num_sibs = count_sibs(p); - for (i=0 ; i < num_sibs; i++) { - sib_ptr = sib_ptr->next; - sib_back_ptr = sib_ptr->back; - describe(sib_back_ptr); - } - } -} /* describe */ - - -void prot_reconstr(node *p, long n) -{ - /* reconstruct and print out acid at site n+1 at node p */ - long i, j, k, first, num_sibs = 0; - double f, sum, xx[20]; - node *q = NULL; - - if (p->tip) - putc(y[p->index-1][n], outfile); - else { - num_sibs = count_sibs(p); - if ((ally[n] == 0) || (location[ally[n]-1] == 0)) - putc('.', outfile); - else { - j = location[ally[n]-1] - 1; - sum = 0; - for (i = 0; i <= 19; i++) { - f = p->protx[j][mx-1][i]; - if (!p->tip) { - q = p; - for (k = 0; k < num_sibs; k++) { - q = q->next; - f *= q->protx[j][mx-1][i]; - } - } - f = sqrt(f); - xx[i] = f * freqaa[i]; - sum += xx[i]; - } - for (i = 0; i <= 19; i++) - xx[i] /= sum; - first = 0; - for (i = 0; i <= 19; i++) - if (xx[i] > xx[first]) - first = i; - if (xx[first] > 0.95) - putc(aachar[first], outfile); - else - putc(tolower(aachar[first]), outfile); - if (rctgry && rcategs > 1) - mx = mp[n][mx - 1]; - else - mx = 1; - } - } -} /* prot_reconstr */ - - -void rectrav(node *p, long m, long n) -{ - /* print out segment of reconstructed sequence for one branch */ - long num_sibs, i; - node *sib_ptr; - - putc(' ', outfile); - if (p->tip) { - for (i = 0; i < nmlngth; i++) - putc(nayme[p->index-1][i], outfile); - } else - fprintf(outfile, "%4ld ", p->index - spp); - fprintf(outfile, " "); - mx = mx0; - for (i = m; i <= n; i++) { - if ((i % 10 == 0) && (i != m)) - putc(' ', outfile); - prot_reconstr(p, i); - } - putc('\n', outfile); - if (!p->tip) { - num_sibs = count_sibs(p); - sib_ptr = p; - for (i = 0; i < num_sibs; i++) { - sib_ptr = sib_ptr->next; - rectrav(sib_ptr->back, m, n); - } - } - mx1 = mx; -} /* rectrav */ - - -void summarize() -{ - long i, j, mm; - double mode, sum; - double like[maxcategs], nulike[maxcategs]; - double **marginal; - - mp = (long **)Malloc(sites * sizeof(long *)); - for (i = 0; i <= sites-1; ++i) - mp[i] = (long *)Malloc(sizeof(long)*rcategs); - fprintf(outfile, "\nLn Likelihood = %11.5f\n\n", curtree.likelihood); - fprintf(outfile, " Ancestor Node Node Height Length\n"); - fprintf(outfile, " -------- ---- ---- ------ ------\n"); - describe(curtree.root); - putc('\n', outfile); - if (rctgry && rcategs > 1) { - for (i = 0; i < rcategs; i++) - like[i] = 1.0; - for (i = sites - 1; i >= 0; i--) { - sum = 0.0; - for (j = 0; j < rcategs; j++) { - nulike[j] = (lambda1 + lambda * probcat[j]) * like[j]; - mp[i][j] = j + 1; - for (k = 1; k <= rcategs; k++) { - if (k != j + 1) { - if (lambda * probcat[k - 1] * like[k - 1] > nulike[j]) { - nulike[j] = lambda * probcat[k - 1] * like[k - 1]; - mp[i][j] = k; - } - } - } - if ((ally[i] > 0) && (location[ally[i]-1] > 0)) - nulike[j] *= contribution[location[ally[i] - 1] - 1][j]; - sum += nulike[j]; - } - for (j = 0; j < rcategs; j++) - nulike[j] /= sum; - memcpy(like, nulike, rcategs * sizeof(double)); - } - mode = 0.0; - mx = 1; - for (i = 1; i <= rcategs; i++) { - if (probcat[i - 1] * like[i - 1] > mode) { - mx = i; - mode = probcat[i - 1] * like[i - 1]; - } - } - mx0 = mx; - fprintf(outfile, - "Combination of categories that contributes the most to the likelihood:\n\n"); - for (i = 1; i <= nmlngth + 3; i++) - putc(' ', outfile); - for (i = 1; i <= sites; i++) { - fprintf(outfile, "%ld", mx); - if (i % 10 == 0) - putc(' ', outfile); - if (i % 60 == 0 && i != sites) { - putc('\n', outfile); - for (j = 1; j <= nmlngth + 3; j++) - putc(' ', outfile); - } - mx = mp[i - 1][mx - 1]; - } - fprintf(outfile, "\n\n"); - marginal = (double **) Malloc( sites*sizeof(double *)); - for (i = 0; i < sites; i++) - marginal[i] = (double *) Malloc( rcategs*sizeof(double)); - for (i = 0; i < rcategs; i++) - like[i] = 1.0; - for (i = sites - 1; i >= 0; i--) { - sum = 0.0; - for (j = 0; j < rcategs; j++) { - nulike[j] = (lambda1 + lambda * probcat[j]) * like[j]; - for (k = 1; k <= rcategs; k++) { - if (k != j + 1) - nulike[j] += lambda * probcat[k - 1] * like[k - 1]; - } - if ((ally[i] > 0) && (location[ally[i]-1] > 0)) - nulike[j] *= contribution[location[ally[i] - 1] - 1][j]; - sum += nulike[j]; - } - for (j = 0; j < rcategs; j++) { - nulike[j] /= sum; - marginal[i][j] = nulike[j]; - } - memcpy(like, nulike, rcategs * sizeof(double)); - } - for (i = 0; i < rcategs; i++) - like[i] = 1.0; - for (i = 0; i < sites; i++) { - sum = 0.0; - for (j = 0; j < rcategs; j++) { - nulike[j] = (lambda1 + lambda * probcat[j]) * like[j]; - for (k = 1; k <= rcategs; k++) { - if (k != j + 1) - nulike[j] += lambda * probcat[k - 1] * like[k - 1]; - } - marginal[i][j] *= like[j] * probcat[j]; - sum += nulike[j]; - } - for (j = 0; j < rcategs; j++) - nulike[j] /= sum; - memcpy(like, nulike, rcategs * sizeof(double)); - sum = 0.0; - for (j = 0; j < rcategs; j++) - sum += marginal[i][j]; - for (j = 0; j < rcategs; j++) - marginal[i][j] /= sum; - } - fprintf(outfile, "Most probable category at each site if > 0.95"); - fprintf(outfile, " probability (\".\" otherwise)\n\n"); - for (i = 1; i <= nmlngth + 3; i++) - putc(' ', outfile); - for (i = 0; i < sites; i++) { - sum = 0.0; - for (j = 0; j < rcategs; j++) - if (marginal[i][j] > sum) { - sum = marginal[i][j]; - mm = j; - } - if (sum >= 0.95) - fprintf(outfile, "%ld", mm+1); - else - putc('.', outfile); - if ((i+1) % 60 == 0) { - if (i != 0) { - putc('\n', outfile); - for (j = 1; j <= nmlngth + 3; j++) - putc(' ', outfile); - } - } - else if ((i+1) % 10 == 0) - putc(' ', outfile); - } - putc('\n', outfile); - for (i = 0; i < sites; i++) - free(marginal[i]); - free(marginal); - } - putc('\n', outfile); - putc('\n', outfile); - putc('\n', outfile); - if (hypstate) { - fprintf(outfile, "Probable sequences at interior nodes:\n\n"); - fprintf(outfile, " node "); - for (i = 0; (i < 13) && (i < ((sites + (sites-1)/10 - 39) / 2)); i++) - putc(' ', outfile); - fprintf(outfile, "Reconstructed sequence (caps if > 0.95)\n\n"); - if (!rctgry || (rcategs == 1)) - mx0 = 1; - for (i = 0; i < sites; i += 60) { - k = i + 59; - if (k >= sites) - k = sites - 1; - rectrav(curtree.root, i, k); - putc('\n', outfile); - mx0 = mx1; - } - } - for (i = 0; i <= sites-1; ++i) - free(mp[i]); - free(mp); -} /* summarize */ - - -void promlk_treeout(node *p) -{ - /* write out file with representation of final tree */ - node *sib_ptr; - long i, n, w, num_sibs; - Char c; - double x; - - if (p->tip) { - n = 0; - for (i = 1; i <= nmlngth; i++) { - if (nayme[p->index - 1][i - 1] != ' ') - n = i; - } - for (i = 0; i < n; i++) { - c = nayme[p->index - 1][i]; - if (c == ' ') - c = '_'; - putc(c, outtree); - } - col += n; - } else { - sib_ptr = p; - num_sibs = count_sibs(p); - putc('(', outtree); - col++; - - for (i=0; i < (num_sibs - 1); i++) { - sib_ptr = sib_ptr->next; - promlk_treeout(sib_ptr->back); - putc(',', outtree); - col++; - if (col > 55) { - putc('\n', outtree); - col = 0; - } - } - sib_ptr = sib_ptr->next; - promlk_treeout(sib_ptr->back); - putc(')', outtree); - col++; - } - if (p == curtree.root) { - fprintf(outtree, ";\n"); - return; - } - x = (p->tyme - curtree.nodep[p->back->index - 1]->tyme); - if (x > 0.0) - w = (long)(0.4342944822 * log(x)); - else if (x == 0.0) - w = 0; - else - w = (long)(0.4342944822 * log(-x)) + 1; - if (w < 0) - w = 0; - fprintf(outtree, ":%*.5f", (int)(w + 7), x); - col += w + 8; -} /* promlk_treeout */ - - -void initpromlnode(node **p, node **grbg, node *q, long len, long nodei, - long *ntips, long *parens, initops whichinit, - pointarray treenode, pointarray nodep, Char *str, - Char *ch, FILE *intree) -{ - /* initializes a node */ - boolean minusread; - double valyew, divisor; - - switch (whichinit) { - case bottom: - gnu(grbg, p); - (*p)->index = nodei; - (*p)->tip = false; - malloc_ppheno((*p), endsite, rcategs); - nodep[(*p)->index - 1] = (*p); - break; - case nonbottom: - gnu(grbg, p); - malloc_ppheno(*p, endsite, rcategs); - (*p)->index = nodei; - break; - case tip: - match_names_to_data(str, nodep, p, spp); - break; - case iter: - (*p)->initialized = false; - (*p)->v = initialv; - (*p)->iter = true; - if ((*p)->back != NULL) - (*p)->back->iter = true; - break; - case length: - processlength(&valyew, &divisor, ch, &minusread, intree, parens); - (*p)->v = valyew / divisor; - (*p)->iter = false; - if ((*p)->back != NULL) { - (*p)->back->v = (*p)->v; - (*p)->back->iter = false; - } - break; - case unittrwt: - curtree.nodep[spp]->iter = false; - break; - default: /* cases hslength, hsnolength, treewt */ - break; /* should never occur */ - } -} /* initpromlnode */ - - -void tymetrav(node *p, double *x) -{ - /* set up times of nodes */ - node *sib_ptr, *q; - long i, num_sibs; - double xmax; - - xmax = 0.0; - if (!p->tip) { - sib_ptr = p; - num_sibs = count_sibs(p); - for (i=0; i < num_sibs; i++) { - sib_ptr = sib_ptr->next; - tymetrav(sib_ptr->back, x); - if (xmax > (*x)) - xmax = (*x); - } - } else - (*x) = 0.0; - p->tyme = xmax; - if (!p->tip) { - q = p; - while (q->next != p) { - q = q->next; - q->tyme = p->tyme; - } - } - (*x) = p->tyme - p->v; -} /* tymetrav */ - - -void free_all_protx (long nonodes, pointarray treenode) -{ - /* used in proml */ - long i, j, k; - node *p; - - /* Zero thru spp are tips, */ - for (i = 0; i < spp; i++) { - for (j = 0; j < endsite; j++) - free(treenode[i]->protx[j]); - free(treenode[i]->protx); - } - - /* The rest are rings (i.e. triads) */ - for (i = spp; i < nonodes; i++) { - if (treenode[i] != NULL) { - p = treenode[i]; - for (j = 1; j <= 3; j++) { - for (k = 0; k < endsite; k++) - free(p->protx[k]); - free(p->protx); - p = p->next; - } - } - } -} /* free_all_protx */ - - -void maketree() -{ - /* constructs a binary tree from the pointers in curtree.nodep, - adds each node at location which yields highest likelihood - then rearranges the tree for greatest likelihood */ - - long i, j; - long numtrees = 0; - double bestlike, gotlike, x; - node *item, *nufork, *dummy, *q, *root=NULL; - boolean dummy_haslengths, dummy_first, goteof; - long nextnode; - long grcategs; - pointarray dummy_treenode=NULL; - - grcategs = (categs > rcategs) ? categs : rcategs; - - prot_inittable(); - - if (!usertree) { - for (i = 1; i <= spp; i++) - enterorder[i - 1] = i; - if (jumble) - randumize(seed, enterorder); - curtree.root = curtree.nodep[spp]; - curtree.root->back = NULL; - for (i = 0; i < spp; i++) - curtree.nodep[i]->back = NULL; - for (i = spp; i < nonodes; i++) { - q = curtree.nodep[i]; - q->back = NULL; - while ((q = q->next) != curtree.nodep[i]) - q->back = NULL; - } - polishing = false; - promlk_add(curtree.nodep[enterorder[0]-1], curtree.nodep[enterorder[1]-1], - curtree.nodep[spp], false); - if (progress) { - printf("\nAdding species:\n"); - writename(0, 2, enterorder); -#ifdef WIN32 - phyFillScreenColor(); -#endif - } - lastsp = false; - smoothit = false; - for (i = 3; i <= spp; i++) { - bestyet = UNDEFINED; - bestree.likelihood = bestyet; - there = curtree.root; - item = curtree.nodep[enterorder[i - 1] - 1]; - nufork = curtree.nodep[spp + i - 2]; - lastsp = (i == spp); - addpreorder(curtree.root, item, nufork, true, true); - promlk_add(there, item, nufork, false); - like = prot_evaluate(curtree.root); - rearrange(&curtree.root); - if (curtree.likelihood > bestree.likelihood) { - prot_copy_(&curtree, &bestree, nonodes, grcategs); - } - if (progress) { - writename(i - 1, 1, enterorder); -#ifdef WIN32 - phyFillScreenColor(); -#endif - } - if (lastsp && global) { - if (progress) { - printf("Doing global rearrangements\n"); - printf(" !"); - for (j = 1; j <= nonodes; j++) - if ( j % (( nonodes / 72 ) + 1 ) == 0 ) - putchar('-'); - printf("!\n"); - } - bestlike = bestyet; - do { - if (progress) - printf(" "); - gotlike = bestlike; - for (j = 0; j < nonodes; j++) { - bestyet = UNDEFINED; - item = curtree.nodep[j]; - if (item != curtree.root) { - nufork = curtree.nodep[curtree.nodep[j]->back->index - 1]; - promlk_re_move(&item, &nufork, false); - there = curtree.root; - addpreorder(curtree.root, item, nufork, true, true); - promlk_add(there, item, nufork, false); - } - if (progress) { - if ( j % (( nonodes / 72 ) + 1 ) == 0 ) - putchar('.'); - fflush(stdout); - } - } - if (progress) - putchar('\n'); - } while (bestlike < gotlike); - } - } - if (njumble > 1 && lastsp) { - for (i = 0; i < spp; i++ ) - promlk_re_move(&curtree.nodep[i], &dummy, false); - if (jumb == 1 || bestree2.likelihood < bestree.likelihood) - prot_copy_(&bestree, &bestree2, nonodes, grcategs); - } - if (jumb == njumble) { - if (njumble > 1) - prot_copy_(&bestree2, &curtree, nonodes, grcategs); - else - prot_copy_(&bestree, &curtree, nonodes, grcategs); - fprintf(outfile, "\n\n"); - treevaluate(); - curtree.likelihood = prot_evaluate(curtree.root); - promlk_printree(); - summarize(); - if (trout) { - col = 0; - promlk_treeout(curtree.root); - } - } - } else { - openfile(&intree, INTREE, "input tree file", "r", progname, intreename); - numtrees = countsemic(&intree); - if(numtrees > MAXSHIMOTREES) - shimotrees = MAXSHIMOTREES; - else - shimotrees = numtrees; - if (numtrees > 2) - initseed(&inseed, &inseed0, seed); - l0gl = (double *) Malloc(shimotrees * sizeof(double)); - l0gf = (double **) Malloc(shimotrees * sizeof(double *)); - for (i=0; i < shimotrees; ++i) - l0gf[i] = (double *)Malloc(endsite * sizeof(double)); - if (treeprint) { - fprintf(outfile, "User-defined tree"); - if (numtrees > 1) - putc('s', outfile); - fprintf(outfile, ":\n\n"); - } - fprintf(outfile, "\n\n"); - which = 1; - while (which <= numtrees) { - - /* These initializations required each time through the loop - since multiple trees require re-initialization */ - dummy_haslengths = true; - nextnode = 0; - dummy_first = true; - goteof = false; - - treeread(intree, &root, dummy_treenode, &goteof, &dummy_first, - curtree.nodep, &nextnode, &dummy_haslengths, &grbg, - initpromlnode,false,nonodes); - - nonodes = nextnode; - - root = curtree.nodep[root->index - 1]; - curtree.root = root; - - if (lngths) - tymetrav(curtree.root, &x); - - if (goteof && (which <= numtrees)) { - /* if we hit the end of the file prematurely */ - printf ("\n"); - printf ("ERROR: trees missing at end of file.\n"); - printf ("\tExpected number of trees:\t\t%ld\n", numtrees); - printf ("\tNumber of trees actually in file:\t%ld.\n\n", which - 1); - exxit(-1); - } - curtree.start = curtree.nodep[0]->back; - treevaluate(); - promlk_printree(); - summarize(); - if (trout) { - col = 0; - promlk_treeout(curtree.root); - } - if(which < numtrees){ - prot_freex_notip(nonodes, curtree.nodep); - gdispose(curtree.root, &grbg, curtree.nodep); - } - which++; - } - - FClose(intree); - if (!auto_ && numtrees > 1 && weightsum > 1 ) - standev2(numtrees, maxwhich, 0, endsite, maxlogl, l0gl, l0gf, - aliasweight, seed); - } - if (usertree) { - free(l0gl); - for (i=0; i < shimotrees; i++) - free(l0gf[i]); - free(l0gf); - } - prot_freetable(); - if (jumb < njumble) - return; - free(contribution); - free_all_protx(nonodes2, curtree.nodep); - if (!usertree) { - free_all_protx(nonodes2, bestree.nodep); - if (njumble > 1) - free_all_protx(nonodes2, bestree2.nodep); - } - if (progress) { - printf("\n\nOutput written to file \"%s\"\n\n", outfilename); - if (trout) - printf("Tree also written onto file \"%s\"\n", outtreename); - putchar('\n'); - } - - free(root); -} /* maketree */ - - -void clean_up() -{ - /* Free and/or close stuff */ - long i; - - free (rrate); - free (probcat); - free (rate); - /* Seems to require freeing every time... */ - for (i = 0; i < spp; i++) { - free (y[i]); - } - free (y); - free (nayme); - free (enterorder); - free (category); - free (weight); - free (alias); - free (ally); - free (location); - free (aliasweight); - free (probmat); - free (eigmat); - if (! (njumble <= 1)) - freetree2(bestree2.nodep, nonodes2); - FClose(infile); - FClose(outfile); - FClose(outtree); -#ifdef MAC - fixmacfile(outfilename); - fixmacfile(outtreename); -#endif -} /* clean_up */ - - -int main(int argc, Char *argv[]) -{ /* Protein Maximum Likelihood with molecular clock */ - -#ifdef MAC - argc = 1; /* macsetup("Promlk", ""); */ - argv[0] = "Promlk"; -#endif - init(argc,argv); - progname = argv[0]; - openfile(&infile, INFILE, "input file", "r", argv[0], infilename); - openfile(&outfile, OUTFILE, "output file", "w", argv[0], outfilename); - - ibmpc = IBMCRT; - ansi = ANSICRT; - datasets = 1; - mulsets = false; - firstset = true; - doinit(); - - if (trout) - openfile(&outtree,OUTTREE,"output tree file","w",argv[0],outtreename); - if (ctgry) - openfile(&catfile,CATFILE,"categories file","r",argv[0],catfilename); - if (weights || justwts) - openfile(&weightfile,WEIGHTFILE,"weights file","r",argv[0],weightfilename); - for (ith = 1; ith <= datasets; ith++) { - if (datasets > 1) { - fprintf(outfile, "Data set # %ld:\n\n", ith); - if (progress) - printf("\nData set # %ld:\n", ith); - } - getinput(); - - if (ith == 1) - firstset = false; - for (jumb = 1; jumb <= njumble; jumb++){ - max_num_sibs = 0; - maketree(); - } - } - - clean_up(); - printf("Done.\n\n"); -#ifdef WIN32 - phyRestoreConsoleAttributes(); -#endif - return 0; -} /* Protein Maximum Likelihood with molecular clock */ - diff --git a/forester/archive/RIO/others/phylip_mod/src/protdist.c b/forester/archive/RIO/others/phylip_mod/src/protdist.c deleted file mode 100644 index b6a9428..0000000 --- a/forester/archive/RIO/others/phylip_mod/src/protdist.c +++ /dev/null @@ -1,1973 +0,0 @@ -/*Modified by Christian Zmasek. Use at your own risk.*/ - -#include "phylip.h" -#include "seq.h" - -/* version 3.6. (c) Copyright 1993-2004 by the University of Washington. - Written by Joseph Felsenstein, Akiko Fuseki, Sean Lamont, and Andrew Keeffe. - Permission is granted to copy and use this program provided no fee is - charged for it and provided that this copyright notice is not removed. */ - -#define nmlngth 26 /*changed from to 10 to 26 by CZ 2006-07-28 */ /* number of characters in species name */ -#define protepsilon .00001 -typedef long *steparray; -typedef enum { - universal, ciliate, mito, vertmito, flymito, yeastmito -} codetype; -typedef enum { - chemical, hall, george -} cattype; - -typedef double matrix[20][20]; - -#ifndef OLDC -/* function prototypes */ -void protdist_uppercase(Char *); -void protdist_inputnumbers(void); -void getoptions(void); -void transition(void); -void doinit(void); -void printcategories(void); -void inputoptions(void); -void protdist_inputdata(void); -void doinput(void); -void code(void); -void protdist_cats(void); -void maketrans(void); -void givens(matrix, long, long, long, double, double, boolean); -void coeffs(double, double, double *, double *, double); -void tridiag(matrix, long, double); -void shiftqr(matrix, long, double); -void qreigen(matrix, long); -void pmbeigen(void); -void pameigen(void); -void jtteigen(void); -void predict(long, long, long); -void makedists(void); -void reallocchars(void); -/* function prototypes */ -#endif - -long chars, datasets, ith, ctgry, categs; -/* spp = number of species - chars = number of positions in actual sequences */ -double freqa, freqc, freqg, freqt, cvi, invarfrac, ttratio, xi, xv, - ease, fracchange; -boolean weights, justwts, progress, mulsets, gama, invar, basesequal, - usepmb, usejtt, usepam, kimura, similarity, firstset; -codetype whichcode; -cattype whichcat; -steptr oldweight; -double rate[maxcategs]; -aas **gnode; -aas trans[4][4][4]; -double pie[20]; -long cat[(long)ser - (long)ala + 1], numaa[(long)ser - (long)ala + 1]; -double eig[20]; -matrix prob, eigvecs; -double **d; -char infilename[100], outfilename[100], catfilename[100], weightfilename[100]; - -/* Local variables for makedists, propagated globally for c version: */ - double tt, p, dp, d2p, q, elambdat; - - -/* this jtt matrix decomposition due to Elisabeth Tillier */ -static double jtteigs[] = -{0.0, -0.007031123, -0.006484345, -0.006086499, -0.005514432, --0.00772664, -0.008643413, -0.010620756, -0.009965552, -0.011671808, --0.012222418,-0.004589201, -0.013103714, -0.014048038, -0.003170582, --0.00347935, -0.015311677, -0.016021194, -0.017991454, -0.018911888}; - -static double jttprobs[20][20] = -{{0.076999996, 0.051000003, 0.043000004, 0.051999998, 0.019999996, 0.041, - 0.061999994, 0.073999997, 0.022999999, 0.052000004, 0.090999997, 0.058999988, - 0.024000007, 0.04, 0.050999992, 0.069, 0.059000006, 0.014000008, 0.032000004, - 0.066000005}, - {0.015604455, -0.068062363, 0.020106264, 0.070723273, 0.011702977, 0.009674053, - 0.074000798, -0.169750458, 0.005560808, -0.008208636, -0.012305869, - -0.063730179, -0.005674643, -0.02116828, 0.104586169, 0.016480839, 0.016765139, - 0.005936994, 0.006046367, -0.0082877}, - {-0.049778281, -0.007118197, 0.003801272, 0.070749616, 0.047506147, - 0.006447017, 0.090522425, -0.053620432, -0.008508175, 0.037170603, - 0.051805545, 0.015413608, 0.019939916, -0.008431976, -0.143511376, - -0.052486072, -0.032116542, -0.000860626, -0.02535993, 0.03843545}, - {-0.028906423, 0.092952047, -0.009615343, -0.067870117, 0.031970392, - 0.048338335, -0.054396304, -0.135916654, 0.017780083, 0.000129242, - 0.031267424, 0.116333586, 0.007499746, -0.032153596, 0.033517051, - -0.013719269, -0.00347293, -0.003291821, -0.02158326, -0.008862168}, - {0.037181176, -0.023106564, -0.004482225, -0.029899635, 0.118139633, - -0.032298569, -0.04683198, 0.05566988, -0.012622847, 0.002023096, - -0.043921088, -0.04792557, -0.003452711, -0.037744513, 0.020822974, - 0.036580187, 0.02331425, -0.004807711, -0.017504496, 0.01086673}, - {0.044754061, -0.002503471, 0.019452517, -0.015611487, -0.02152807, - -0.013131425, -0.03465365, -0.047928912, 0.020608851, 0.067843095, - -0.122130014, 0.002521499, 0.013021646, -0.082891087, -0.061590119, - 0.016270856, 0.051468938, 0.002079063, 0.081019713, 0.082927944}, - {0.058917882, 0.007320741, 0.025278141, 0.000357541, -0.002831285, - -0.032453034, -0.010177288, -0.069447924, -0.034467324, 0.011422358, - -0.128478324, 0.04309667, -0.015319944, 0.113302422, -0.035052393, - 0.046885372, 0.06185183, 0.00175743, -0.06224497, 0.020282093}, - {-0.014562092, 0.022522921, -0.007094389, 0.03480089, -0.000326144, - -0.124039037, 0.020577906, -0.005056454, -0.081841576, -0.004381786, - 0.030826152, 0.091261631, 0.008878828, -0.02829487, 0.042718836, - -0.011180886, -0.012719227, -0.000753926, 0.048062375, -0.009399129}, - {0.033789571, -0.013512235, 0.088010984, 0.017580292, -0.006608005, - -0.037836971, -0.061344686, -0.034268357, 0.018190209, -0.068484614, - 0.120024744, -0.00319321, -0.001349477, -0.03000546, -0.073063759, - 0.081912399, 0.0635245, 0.000197, -0.002481798, -0.09108114}, - {-0.113947615, 0.019230545, 0.088819683, 0.064832765, 0.001801467, - -0.063829682, -0.072001633, 0.018429333, 0.057465965, 0.043901014, - -0.048050874, -0.001705918, 0.022637173, 0.017404665, 0.043877902, - -0.017089594, -0.058489485, 0.000127498, -0.029357194, 0.025943972}, - {0.01512923, 0.023603725, 0.006681954, 0.012360216, -0.000181447, - -0.023011838, -0.008960024, -0.008533239, 0.012569835, 0.03216118, - 0.061986403, -0.001919083, -0.1400832, -0.010669741, -0.003919454, - -0.003707024, -0.026806029, -0.000611603, -0.001402648, 0.065312824}, - {-0.036405351, 0.020816769, 0.011408213, 0.019787053, 0.038897829, - 0.017641789, 0.020858533, -0.006067252, 0.028617353, -0.064259496, - -0.081676567, 0.024421823, -0.028751676, 0.07095096, -0.024199434, - -0.007513119, -0.028108766, -0.01198095, 0.111761119, -0.076198809}, - {0.060831772, 0.144097327, -0.069151377, 0.023754576, -0.003322955, - -0.071618574, 0.03353154, -0.02795295, 0.039519769, -0.023453968, - -0.000630308, -0.098024591, 0.017672997, 0.003813378, -0.009266499, - -0.011192111, 0.016013873, -0.002072968, -0.010022044, -0.012526904}, - {-0.050776604, 0.092833081, 0.044069596, 0.050523021, -0.002628417, - 0.076542572, -0.06388631, -0.00854892, -0.084725311, 0.017401063, - -0.006262541, -0.094457679, -0.002818678, -0.0044122, -0.002883973, - 0.028729685, -0.004961596, -0.001498627, 0.017994575, -0.000232779}, - {-0.01894566, -0.007760205, -0.015160993, -0.027254587, 0.009800903, - -0.013443561, -0.032896517, -0.022734138, -0.001983861, 0.00256111, - 0.024823166, -0.021256768, 0.001980052, 0.028136263, -0.012364384, - -0.013782446, -0.013061091, 0.111173981, 0.021702122, 0.00046654}, - {-0.009444193, -0.042106824, -0.02535015, -0.055125574, 0.006369612, - -0.02945416, -0.069922064, -0.067221068, -0.003004999, 0.053624311, - 0.128862984, -0.057245803, 0.025550508, 0.087741073, -0.001119043, - -0.012036202, -0.000913488, -0.034864475, 0.050124813, 0.055534723}, - {0.145782464, -0.024348311, -0.031216873, 0.106174443, 0.00202862, - 0.02653866, -0.113657267, -0.00755018, 0.000307232, -0.051241158, - 0.001310685, 0.035275877, 0.013308898, 0.002957626, -0.002925034, - -0.065362319, -0.071844582, 0.000475894, -0.000112419, 0.034097762}, - {0.079840455, 0.018769331, 0.078685899, -0.084329807, -0.00277264, - -0.010099754, 0.059700608, -0.019209715, -0.010442992, -0.042100476, - -0.006020556, -0.023061786, 0.017246106, -0.001572858, -0.006703785, - 0.056301316, -0.156787357, -0.000303638, 0.001498195, 0.051363455}, - {0.049628261, 0.016475144, 0.094141653, -0.04444633, 0.005206131, - -0.001827555, 0.02195624, 0.013066683, -0.010415582, -0.022338403, - 0.007837197, -0.023397671, -0.002507095, 0.005177694, 0.017109561, - -0.202340113, 0.069681441, 0.000120736, 0.002201146, 0.004670849}, - {0.089153689, 0.000233354, 0.010826822, -0.004273519, 0.001440618, - 0.000436077, 0.001182351, -0.002255508, -0.000700465, 0.150589876, - -0.003911914, -0.00050154, -0.004564983, 0.00012701, -0.001486973, - -0.018902754, -0.054748555, 0.000217377, -0.000319302, -0.162541651}}; - -/* PMB matrix decomposition courtesy of Elisabeth Tillier */ -static double pmbeigs[] = -{0.0000001586972220,-1.8416770496147100, -1.6025046986139100,-1.5801012515121300, --1.4987794099715900,-1.3520794233801900,-1.3003469390479700,-1.2439503327631300, --1.1962574080244200,-1.1383730501367500,-1.1153278910708000,-0.4934843510654760, --0.5419014550215590,-0.9657997830826700,-0.6276075673757390,-0.6675927795018510, --0.6932641383465870,-0.8897872681859630,-0.8382698977371710,-0.8074694642446040}; -static double pmbprobs[20][20] = -{{0.0771762457248147,0.0531913844998640,0.0393445076407294,0.0466756566755510, -0.0286348361997465,0.0312327748383639,0.0505410248721427,0.0767106611472993, -0.0258916271688597,0.0673140562194124,0.0965705469252199,0.0515979465932174, -0.0250628079438675,0.0503492018628350,0.0399908189418273,0.0641898881894471, -0.0517539616710987,0.0143507440546115,0.0357994592438322,0.0736218495862984}, -{0.0368263046116572,-0.0006728917107827,0.0008590805287740,-0.0002764255356960, -0.0020152937187455,0.0055743720652960,0.0003213317669367,0.0000449190281568, --0.0004226254397134,0.1805040629634510,-0.0272246813586204,0.0005904606533477, --0.0183743200073889,-0.0009194625608688,0.0008173657533167,-0.0262629806302238, -0.0265738757209787,0.0002176606241904,0.0021315644838566,-0.1823229927207580}, -{-0.0194800075560895,0.0012068088610652,-0.0008803318319596,-0.0016044273960017, --0.0002938633803197,-0.0535796754602196,0.0155163896648621,-0.0015006360762140, -0.0021601372013703,0.0268513218744797,-0.1085292493742730,0.0149753083138452, -0.1346457366717310,-0.0009371698759829,0.0013501708044116,0.0346352293103622, --0.0276963770242276,0.0003643142783940,0.0002074817333067,-0.0174108903914110}, -{0.0557839400850153,0.0023271577185437,0.0183481103396687,0.0023339480096311, -0.0002013267015151,-0.0227406863569852,0.0098644845475047,0.0064721276774396, -0.0001389408104210,-0.0473713878768274,-0.0086984445005797,0.0026913674934634, -0.0283724052562196,0.0001063665179457,0.0027442574779383,-0.1875312134708470, -0.1279864877057640,0.0005103347834563,0.0003155113168637,0.0081451082759554}, -{0.0037510125027265,0.0107095920636885,0.0147305410328404,-0.0112351252180332, --0.0001500408626446,-0.1523450933729730,0.0611532413339872,-0.0005496748939503, -0.0048714378736644,-0.0003826320053999,0.0552010244407311,0.0482555671001955, --0.0461664995115847,-0.0021165008617978,-0.0004574454232187,0.0233755883688949, --0.0035484915422384,0.0009090698422851,0.0013840637687758,-0.0073895139302231}, -{-0.0111512564930024,0.1025460064723080,0.0396772456883791,-0.0298408501361294, --0.0001656742634733,-0.0079876311843289,0.0712644184507945,-0.0010780604625230, --0.0035880882043592,0.0021070399334252,0.0016716329894279,-0.1810123023850110, -0.0015141703608724,-0.0032700852781804,0.0035503782441679,0.0118634302028026, -0.0044561606458028,-0.0001576678495964,0.0023470722225751,-0.0027457045397157}, -{0.1474525743949170,-0.0054432538500293,0.0853848892349828,-0.0137787746207348, --0.0008274830358513,0.0042248844582553,0.0019556229305563,-0.0164191435175148, --0.0024501858854849,0.0120908948084233,-0.0381456105972653,0.0101271614855119, --0.0061945941321859,0.0178841099895867,-0.0014577779202600,-0.0752120602555032, --0.1426985695849920,0.0002862275078983,-0.0081191734261838,0.0313401149422531}, -{0.0542034611735289,-0.0078763926211829,0.0060433542506096,0.0033396210615510, -0.0013965072374079,0.0067798903832256,-0.0135291136622509,-0.0089982442731848, --0.0056744537593887,-0.0766524225176246,0.1881210263933930,-0.0065875518675173, -0.0416627569300375,-0.0953804133524747,-0.0012559228448735,0.0101622644292547, --0.0304742453119050,0.0011702318499737,0.0454733434783982,-0.1119239362388150}, -{0.1069409037912470,0.0805064400880297,-0.1127352030714600,0.1001181253523260, --0.0021480427488769,-0.0332884841459003,-0.0679837575848452,-0.0043812841356657, -0.0153418716846395,-0.0079441315103188,-0.0121766182046363,-0.0381127991037620, --0.0036338726532673,0.0195324059593791,-0.0020165963699984,-0.0061222685010268, --0.0253761448771437,-0.0005246410999057,-0.0112205170502433,0.0052248485517237}, -{-0.0325247648326262,0.0238753651653669,0.0203684886605797,0.0295666232678825, --0.0003946714764213,-0.0157242718469554,-0.0511737848084862,0.0084725632040180, --0.0167068828528921,0.0686962159427527,-0.0659702890616198,-0.0014289912494271, --0.0167000964093416,-0.1276689083678200,0.0036575057830967,-0.0205958145531018, -0.0000368919612829,0.0014413626622426,0.1064360941926030,0.0863372661517408}, -{-0.0463777468104402,0.0394712148670596,0.1118686750747160,0.0440711686389031, --0.0026076286506751,-0.0268454015202516,-0.1464943067133240,-0.0137514051835380, --0.0094395514284145,-0.0144124844774228,0.0249103379323744,-0.0071832157138676, -0.0035592787728526,0.0415627419826693,0.0027040097365669,0.0337523666612066, -0.0316121324137152,-0.0011350177559026,-0.0349998884574440,-0.0302651879823361}, -{0.0142360925194728,0.0413145623127025,0.0324976427846929,0.0580930922002398, --0.0586974207121084,0.0202001168873069,0.0492204086749069,0.1126593173463060, -0.0116620013776662,-0.0780333711712066,-0.1109786767320410,0.0407775100936731, --0.0205013161312652,-0.0653458585025237,0.0347351829703865,0.0304448983224773, -0.0068813748197884,-0.0189002309261882,-0.0334507528405279,-0.0668143558699485}, -{-0.0131548829657936,0.0044244322828034,-0.0050639951827271,-0.0038668197633889, --0.1536822386530220,0.0026336969165336,0.0021585651200470,-0.0459233839062969, -0.0046854727140565,0.0393815434593599,0.0619554007991097,0.0027456299925622, -0.0117574347936383,0.0373018612990383,0.0024818527553328,-0.0133956606027299, --0.0020457128424105,0.0154178819990401,0.0246524142683911,0.0275363065682921}, -{-0.1542307272455030,0.0364861558267547,-0.0090880407008181,0.0531673937889863, -0.0157585615170580,0.0029986538457297,0.0180194047699875,0.0652152443589317, -0.0266842840376180,0.0388457366405908,0.0856237634510719,0.0126955778952183, -0.0099593861698250,-0.0013941794862563,0.0294065511237513,-0.1151906949298290, --0.0852991447389655,0.0028699120202636,-0.0332087026659522,0.0006811857297899}, -{0.0281300736924501,-0.0584072081898638,-0.0178386569847853,-0.0536470338171487, --0.0186881656029960,-0.0240008730656106,-0.0541064820498883,0.2217137098936020, --0.0260500001542033,0.0234505236798375,0.0311127151218573,-0.0494139126682672, -0.0057093465049849,0.0124937286655911,-0.0298322975915689,0.0006520211333102, --0.0061018680727128,-0.0007081999479528,-0.0060523759094034,0.0215845995364623}, -{0.0295321046399105,-0.0088296411830544,-0.0065057049917325,-0.0053478115612781, --0.0100646496794634,-0.0015473619084872,0.0008539960632865,-0.0376381933046211, --0.0328135588935604,0.0672161874239480,0.0667626853916552,-0.0026511651464901, -0.0140451514222062,-0.0544836996133137,0.0427485157912094,0.0097455780205802, -0.0177309072915667,-0.0828759701187452,-0.0729504795471370,0.0670731961252313}, -{0.0082646581043963,-0.0319918630534466,-0.0188454445200422,-0.0374976353856606, -0.0037131290686848,-0.0132507796987883,-0.0306958830735725,-0.0044119395527308, --0.0140786756619672,-0.0180512599925078,-0.0208243802903953,-0.0232202769398931, --0.0063135878270273,0.0110442171178168,0.1824538048228460,-0.0006644614422758, --0.0069909097436659,0.0255407650654681,0.0099119399501151,-0.0140911517070698}, -{0.0261344441524861,-0.0714454044548650,0.0159436926233439,0.0028462736216688, --0.0044572637889080,-0.0089474834434532,-0.0177570282144517,-0.0153693244094452, -0.1160919467206400,0.0304911481385036,0.0047047513411774,-0.0456535116423972, -0.0004491494948617,-0.0767108879444462,-0.0012688533741441,0.0192445965934123, -0.0202321954782039,0.0281039933233607,-0.0590403018490048,0.0364080426546883}, -{0.0115826306265004,0.1340228176509380,-0.0236200652949049,-0.1284484655137340, --0.0004742338006503,0.0127617346949511,-0.0428560878860394,0.0060030732454125, -0.0089182609926781,0.0085353834972860,0.0048464809638033,0.0709740071429510, -0.0029940462557054,-0.0483434904493132,-0.0071713680727884,-0.0036840391887209, -0.0031454003250096,0.0246243550241551,-0.0449551277644180,0.0111449232769393}, -{0.0140356721886765,-0.0196518236826680,0.0030517022326582,0.0582672093364850, --0.0000973895685457,0.0021704767224292,0.0341806268602705,-0.0152035987563018, --0.0903198657739177,0.0259623214586925,0.0155832497882743,-0.0040543568451651, -0.0036477631918247,-0.0532892744763217,-0.0142569373662724,0.0104500681408622, -0.0103483945857315,0.0679534422398752,-0.0768068882938636,0.0280289727046158}} -; - -static double pameigs[] = {0.0, -0.002350753691875762, -0.002701991863800379, - -0.002931612442853115, -0.004262492032364507, -0.005395980482561625, - -0.007141172690079523, -0.007392844756151318, -0.007781761342200766, - -0.00810032066366362, -0.00875299712761124, -0.01048227332164386, - -0.01109594097332267, -0.01298616073142234, -0.01342036228188581, - -0.01552599145527578, -0.01658762802054814, -0.0174893445623765, - -0.01933280832903272, -0.02206353522613025}; - -static double pamprobs[20][20] = - {{0.087683339901135, 0.04051291829598762, 0.04087846315185977, - 0.04771603459744777, 0.03247095396561266, 0.03784612688594957, - 0.0504933695604875, 0.0898249006830755, 0.03285885059543713, - 0.0357514442352119, 0.0852464099207521, 0.07910313444070642, - 0.01488243946396588, 0.04100101908956829, 0.05158026947089499, - 0.06975497205982451, 0.05832757042475474, 0.00931264523877807, - 0.03171540880870517, 0.06303972920984541}, - {0.01943453646811026, -0.004492574160484092, 0.007694891061220776, - 0.01278399096887701, 0.0106157418450234, 0.007542140341575122, - 0.01326994069032819, 0.02615565199894889, 0.003123125764490066, - 0.002204507682495444, -0.004782898215768979, 0.01204241965177619, - 0.0007847400096924341, -0.03043626073172116, 0.01221202591902536, - 0.01100527004684405, 0.01116495631339549, -0.0925364931988571, - -0.02622065387931562, 0.00843494142432107}, - {0.01855357100209072, 0.01493642835763868, 0.0127983090766285, - 0.0200533250704364, -0.1681898360107787, 0.01551657969909255, - 0.02128060163107209, 0.03100633591848964, 0.00845480845269879, - 0.000927149370785571, 0.00937207565817036, 0.03490557769673472, - 0.00300443019551563, -0.02590837220264415, 0.01329376859943192, - 0.006854110889741407, 0.01102593860528263, 0.003360844186685888, - -0.03459712356647764, 0.003351477369404443}, - {0.02690642688200102, 0.02131745801890152, 0.0143626616005213, - 0.02405101425725929, 0.05041008641436849, 0.01430925051050233, - 0.02362114036816964, 0.04688381789373886, 0.005250115453626377, - -0.02040112168595516, -0.0942720776915669, 0.03773004996758644, - -0.00822831940782616, -0.1164872809439224, 0.02286281877257392, - 0.02849551240669926, 0.01468856796295663, 0.02377110964207936, - -0.094380545436577, -0.02089068498518036}, - {0.00930172577225213, 0.01493463068441099, 0.020186920775608, - 0.02892154953912524, -0.01224593358361567, 0.01404228329986624, - 0.02671186617119041, 0.04537535161795231, 0.02229995804098249, - -0.04635704133961575, -0.1966910360247138, 0.02796648065439046, - -0.02263484732621436, 0.0440490503242072, 0.01148782948302166, - 0.01989170531824069, 0.001306805142981245, -0.005676690969116321, - 0.07680476281625202, -0.07967537039721849}, - {0.06602274245435476, -0.0966661981471856, -0.005241648783844579, - 0.00859135188171146, -0.007762129660943368, -0.02888965572526196, - 0.003592291525888222, 0.1668410669287673, -0.04082039290551406, - 0.005233775047553415, -0.01758244726137135, -0.1493955762326898, - -0.00855819137835548, 0.004211419253492328, 0.01929306335052688, - 0.03008056746359405, 0.0190444422412472, 0.005577189741419315, - 0.0000874156155112068, 0.02634091459108298}, - {0.01933897472880726, 0.05874583569377844, -0.02293534606228405, - -0.07206314017962175, -0.004580681581546643, -0.0628814337610561, - -0.0850783812795136, 0.07988417636610614, -0.0852798990133397, - 0.01649047166155952, -0.05416647263757423, 0.1089834536254064, - 0.005093403979413865, 0.02520300254161142, 0.0005951431406455604, - 0.02441251821224675, 0.02796099482240553, -0.002574933994926502, - -0.007172237553012804, 0.03002455129086954}, - {0.04041118479094272, -0.002476225672095412, -0.01494505811263243, - -0.03759443758599911, -0.00892246902492875, -0.003634714029239211, - -0.03085671837973749, -0.126176309029931, 0.005814031139083794, - 0.01313561962646063, -0.04760487162503322, -0.0490563712725484, - -0.005082243450421558, -0.01213634309383557, 0.1806666927079249, - 0.02111663336185495, 0.02963486860587087, -0.0000175020101657785, - 0.01197155383597686, 0.0357526792184636}, - {-0.01184769557720525, 0.01582776076338872, -0.006570708266564639, - -0.01471915653734024, 0.00894343616503608, 0.00562664968033149, - -0.01465878888356943, 0.05365282692645818, 0.00893509735776116, - -0.05879312944436473, 0.0806048683392995, -0.007722897986905326, - -0.001819943882718859, 0.0942535573077267, 0.07483883782251654, - 0.004354639673913651, -0.02828804845740341, -0.001318222184691827, - -0.07613149604246563, -0.1251675867732172}, - {0.00834167031558193, -0.01509357596974962, 0.007098172811092488, - 0.03127677418040319, 0.001992448468465455, 0.00915441566808454, - 0.03430175973499201, -0.0730648147535803, -0.001402707145575659, - 0.04780949194330815, -0.1115035603461273, -0.01292297197609604, - -0.005056270550868528, 0.1112053349612027, -0.03801929822379964, - -0.001191241001736563, 0.01872874622910247, 0.0005314214903865993, - -0.0882576318311789, 0.07607183599610171}, - {-0.01539460099727769, 0.04988596184297883, -0.01187240760647617, - -0.06987843637091853, -0.002490472846497859, 0.01009857892494956, - -0.07473588067847209, 0.0906009925879084, 0.1243612446505172, - 0.02152806401345371, -0.03504879644860233, -0.06680752427613573, - -0.005574485153629651, 0.001518282948127752, -0.01999168507510701, - -0.01478606199529457, -0.02203749419458996, -0.00132680708294333, - -0.01137505997867614, 0.05332658773667142}, - {-0.06104378736432388, 0.0869446603393548, -0.03298331234537257, - 0.03128515657456024, 0.003906358569208259, 0.03578694104193928, - 0.06241936133189683, 0.06182827284921748, -0.05566564263245907, - 0.02640868588189002, -0.01349751243059039, -0.05507866642582638, - -0.006671347738489326, -0.001470096466016046, 0.05185743641479938, - -0.07494697511168257, -0.1175185439057584, -0.001188074094105709, - 0.00937934805737347, 0.05024773745437657}, - {-0.07252555582124737, -0.116554459356382, 0.003605361887406413, - -0.00836518656029184, 0.004615715410745561, 0.005105376617651312, - -0.00944938657024391, 0.05602449420950007, 0.02722719610561933, - 0.01959357494748446, -0.0258655103753962, 0.1440733975689835, - 0.01446782819722976, 0.003718896062070054, 0.05825843045655135, - -0.06230154142733073, -0.07833704962300169, 0.003160836143568724, - -0.001169873777936648, 0.03471745590503304}, - {-0.03204352258752698, 0.01019272923862322, 0.04509668708733181, - 0.05756522429120813, -0.0004601149081726732, -0.0984718150777423, - -0.01107826100664925, -0.005680277810520585, 0.01962359392320817, - 0.01550006899131986, 0.05143956925922197, 0.02462476682588468, - -0.0888843861002653, -0.00171553583659411, 0.01606331750661664, - 0.001176847743518958, -0.02070972978912828, -0.000341523293579971, - -0.002654732745607882, 0.02075709428885848}, - {0.03595199666430258, -0.02800219615234468, -0.04341570015493925, - -0.0748275906176658, 0.0001051403676377422, 0.1137431321746627, - 0.005852087565974318, 0.003443037513847801, -0.02481931657706633, - -0.003651181839831423, 0.03195794176786321, 0.04135411406392523, - -0.07562030263210619, 0.001769332364699, -0.01984381173403915, - -0.005029750745010152, 0.02649253902476472, 0.000518085571702734, - 0.001062936684474851, 0.01295950668914449}, - {-0.16164552322896, -0.0006050035060464324, 0.0258380054414968, - 0.003188424740960557, -0.0002058911341821877, 0.03157555987384681, - -0.01678913462596107, 0.03096216145389774, -0.0133791110666919, - 0.1125249625204277, -0.00769017706442472, -0.02653938062180483, - -0.002555329863523985, -0.00861833362947954, 0.01775148884754278, - 0.02529310679774722, 0.0826243417011238, -0.0001036728183032624, - 0.001963562313294209, -0.0935900561309786}, - {0.1652394174588469, -0.002814245280784351, -0.0328982001821263, - -0.02000104712964131, 0.0002208121995725443, -0.02733462178511839, - 0.02648078162927627, -0.01788316626401427, 0.01630747623755998, - 0.1053849023838147, -0.005447706553811218, 0.01810876922536839, - -0.001808914710282444, -0.007687912115607397, -0.01332593672114388, - -0.02110750894891371, -0.07456116592983384, 0.000219072589592394, - 0.001270886972191055, -0.1083616930749109}, - {0.02453279389716254, -0.005820072356487439, 0.100260287284095, - 0.01277522280305745, -0.003184943445296999, 0.05814689527984152, - -0.0934012278200201, -0.03017986487349484, -0.03136625380994165, - 0.00988668352785117, -0.00358900410973142, -0.02017443675004764, - 0.000915384582922184, -0.001460963415183106, -0.01370112443251124, - 0.1130040979284457, -0.1196161771323699, -0.0005800211204222045, - -0.0006153403201024954, 0.00416806428223025}, - {-0.0778089244252535, -0.007055161182430869, -0.0349307504860869, - -0.0811915584276571, -0.004689825871599125, -0.03726108871471753, - 0.1072225647141469, -0.00917015113070944, 0.01381628985996913, - -0.00123227881492089, 0.001815954515275675, 0.005708744099349901, - -0.0001448985044877925, -0.001306578795561384, -0.006992743514185243, - 0.1744720240732789, -0.05353628497814023, -0.0007613684227234787, - -0.0003550282315997644, 0.01340106423804634}, - {-0.0159527329868513, -0.007622151568160798, -0.1389875105184963, - 0.1165051999914764, -0.002217810389087748, 0.01550003226513692, - -0.07427664222230566, -0.003371438498619264, 0.01385754771325365, - 0.004759020167383304, 0.001624078805220564, 0.02011638303109029, - -0.001717827082842178, -0.0007424036708598594, -0.003978884451898934, - 0.0866418927301209, -0.01280817739158123, -0.00023039242454603, - 0.002309205802479111, 0.0005926106991001195}}; - - -void protdist_uppercase(Char *ch) -{ - (*ch) = (isupper(*ch) ? (*ch) : toupper(*ch)); -} /* protdist_uppercase */ - - -void protdist_inputnumbers() -{ - /* input the numbers of species and of characters */ - long i; - - fscanf(infile, "%ld%ld", &spp, &chars); - - if (printdata) - fprintf(outfile, "%2ld species, %3ld positions\n\n", spp, chars); - gnode = (aas **)Malloc(spp * sizeof(aas *)); - if (firstset) { - for (i = 0; i < spp; i++) - gnode[i] = (aas *)Malloc(chars * sizeof(aas )); - } - weight = (steparray)Malloc(chars*sizeof(long)); - oldweight = (steparray)Malloc(chars*sizeof(long)); - category = (steparray)Malloc(chars*sizeof(long)); - d = (double **)Malloc(spp*sizeof(double *)); - nayme = (naym *)Malloc(spp*sizeof(naym)); - - for (i = 0; i < spp; ++i) - d[i] = (double *)Malloc(spp*sizeof(double)); -} /* protdist_inputnumbers */ - - -void getoptions() -{ - /* interactively set options */ - long loopcount, loopcount2; - Char ch, ch2; - Char in[100]; - boolean done; - - if (printdata) - fprintf(outfile, "\nProtein distance algorithm, version %s\n\n",VERSION); - putchar('\n'); - weights = false; - printdata = false; - progress = true; - interleaved = true; - similarity = false; - ttratio = 2.0; - whichcode = universal; - whichcat = george; - basesequal = true; - freqa = 0.25; - freqc = 0.25; - freqg = 0.25; - freqt = 0.25; - usejtt = true; - usepmb = false; - usepam = false; - kimura = false; - gama = false; - invar = false; - invarfrac = 0.0; - ease = 0.457; - loopcount = 0; - do { - cleerhome(); - printf("\nProtein distance algorithm, version %s\n\n",VERSION); - printf("Settings for this run:\n"); - printf(" P Use JTT, PMB, PAM, Kimura, categories model? %s\n", - usejtt ? "Jones-Taylor-Thornton matrix" : - usepmb ? "Henikoff/Tillier PMB matrix" : - usepam ? "Dayhoff PAM matrix" : - kimura ? "Kimura formula" : - similarity ? "Similarity table" : "Categories model"); - if (!kimura && !similarity) { - printf(" G Gamma distribution of rates among positions?"); - if (gama) - printf(" Yes\n"); - else { - if (invar) - printf(" Gamma+Invariant\n"); - else - printf(" No\n"); - } - } - printf(" C One category of substitution rates?"); - if (!ctgry || categs == 1) - printf(" Yes\n"); - else - printf(" %ld categories\n", categs); - printf(" W Use weights for positions?"); - if (weights) - printf(" Yes\n"); - else - printf(" No\n"); - if (!(usejtt || usepmb || usepam || kimura || similarity)) { - printf(" U Use which genetic code? %s\n", - (whichcode == universal) ? "Universal" : - (whichcode == ciliate) ? "Ciliate" : - (whichcode == mito) ? "Universal mitochondrial" : - (whichcode == vertmito) ? "Vertebrate mitochondrial" : - (whichcode == flymito) ? "Fly mitochondrial" : - (whichcode == yeastmito) ? "Yeast mitochondrial" : ""); - printf(" A Which categorization of amino acids? %s\n", - (whichcat == chemical) ? "Chemical" : - (whichcat == george) ? "George/Hunt/Barker" : "Hall"); - - printf(" E Prob change category (1.0=easy):%8.4f\n",ease); - printf(" T Transition/transversion ratio:%7.3f\n",ttratio); - printf(" F Base Frequencies:"); - if (basesequal) - printf(" Equal\n"); - else - printf("%7.3f%6.3f%6.3f%6.3f\n", freqa, freqc, freqg, freqt); - } - printf(" M Analyze multiple data sets?"); - if (mulsets) - printf(" Yes, %2ld %s\n", datasets, - (justwts ? "sets of weights" : "data sets")); - else - printf(" No\n"); - printf(" I Input sequences interleaved? %s\n", - (interleaved ? "Yes" : "No, sequential")); - printf(" 0 Terminal type (IBM PC, ANSI)? %s\n", - ibmpc ? "IBM PC" : - ansi ? "ANSI" : "(none)"); - printf(" 1 Print out the data at start of run %s\n", - (printdata ? "Yes" : "No")); - printf(" 2 Print indications of progress of run %s\n", - progress ? "Yes" : "No"); - printf("\nAre these settings correct? (type Y or the letter for one to change)\n"); - in[0] = '\0'; - getstryng(in); - ch=in[0]; - if (ch == '\n') - ch = ' '; - protdist_uppercase(&ch); - done = (ch == 'Y'); - if (!done) { - if (((strchr("CPGMWI120",ch) != NULL) && (usejtt || usepmb || usepam)) || - ((strchr("CPMWI120",ch) != NULL) && (kimura || similarity)) || - ((strchr("CUAPGETFMWI120",ch) != NULL) && - (! (usejtt || usepmb || usepam || kimura || similarity)))) { - switch (ch) { - - case 'U': - printf("Which genetic code?\n"); - printf(" type for\n\n"); - printf(" U Universal\n"); - printf(" M Mitochondrial\n"); - printf(" V Vertebrate mitochondrial\n"); - printf(" F Fly mitochondrial\n"); - printf(" Y Yeast mitochondrial\n\n"); - loopcount2 = 0; - do { - printf("type U, M, V, F, or Y\n"); - scanf("%c%*[^\n]", &ch); - getchar(); - if (ch == '\n') - ch = ' '; - protdist_uppercase(&ch); - countup(&loopcount2, 10); - } while (ch != 'U' && ch != 'M' && ch != 'V' && ch != 'F' && ch != 'Y'); - switch (ch) { - - case 'U': - whichcode = universal; - break; - - case 'M': - whichcode = mito; - break; - - case 'V': - whichcode = vertmito; - break; - - case 'F': - whichcode = flymito; - break; - - case 'Y': - whichcode = yeastmito; - break; - } - break; - - case 'A': - printf( - "Which of these categorizations of amino acids do you want to use:\n\n"); - printf( - " all have groups: (Glu Gln Asp Asn), (Lys Arg His), (Phe Tyr Trp)\n"); - printf(" plus:\n"); - printf("George/Hunt/Barker:"); - printf(" (Cys), (Met Val Leu Ileu), (Gly Ala Ser Thr Pro)\n"); - printf("Chemical: "); - printf(" (Cys Met), (Val Leu Ileu Gly Ala Ser Thr), (Pro)\n"); - printf("Hall: "); - printf(" (Cys), (Met Val Leu Ileu), (Gly Ala Ser Thr), (Pro)\n\n"); - printf("Which do you want to use (type C, H, or G)\n"); - loopcount2 = 0; - do { - scanf("%c%*[^\n]", &ch); - getchar(); - if (ch == '\n') - ch = ' '; - protdist_uppercase(&ch); - countup(&loopcount2, 10); - } while (ch != 'C' && ch != 'H' && ch != 'G'); - switch (ch) { - - case 'C': - whichcat = chemical; - break; - - case 'H': - whichcat = hall; - break; - - case 'G': - whichcat = george; - break; - } - break; - - case 'C': - ctgry = !ctgry; - if (ctgry) { - initcatn(&categs); - initcategs(categs, rate); - } - break; - - case 'W': - weights = !weights; - break; - - case 'P': - if (usejtt) { - usejtt = false; - usepmb = true; - } else { - if (usepmb) { - usepmb = false; - usepam = true; - } else { - if (usepam) { - usepam = false; - kimura = true; - } else { - if (kimura) { - kimura = false; - similarity = true; - } else { - if (similarity) - similarity = false; - else - usejtt = true; - } - } - } - } - break; - - case 'G': - if (!(gama || invar)) - gama = true; - else { - if (gama) { - gama = false; - invar = true; - } else { - if (invar) - invar = false; - } - } - break; - - - case 'E': - printf("Ease of changing category of amino acid?\n"); - loopcount2 = 0; - do { - printf(" (1.0 if no difficulty of changing,\n"); - printf(" less if less easy. Can't be negative\n"); - scanf("%lf%*[^\n]", &ease); - getchar(); - countup(&loopcount2, 10); - } while (ease > 1.0 || ease < 0.0); - break; - - case 'T': - loopcount2 = 0; - do { - printf("Transition/transversion ratio?\n"); - scanf("%lf%*[^\n]", &ttratio); - getchar(); - countup(&loopcount2, 10); - } while (ttratio < 0.0); - break; - - case 'F': - loopcount2 = 0; - do { - basesequal = false; - printf("Frequencies of bases A,C,G,T ?\n"); - scanf("%lf%lf%lf%lf%*[^\n]", &freqa, &freqc, &freqg, &freqt); - getchar(); - if (fabs(freqa + freqc + freqg + freqt - 1.0) >= 1.0e-3) - printf("FREQUENCIES MUST SUM TO 1\n"); - countup(&loopcount2, 10); - } while (fabs(freqa + freqc + freqg + freqt - 1.0) >= 1.0e-3); - break; - - case 'M': - mulsets = !mulsets; - if (mulsets) { - printf("Multiple data sets or multiple weights?"); - loopcount2 = 0; - do { - printf(" (type D or W)\n"); - scanf("%c%*[^\n]", &ch2); - getchar(); - if (ch2 == '\n') - ch2 = ' '; - uppercase(&ch2); - countup(&loopcount2, 10); - } while ((ch2 != 'W') && (ch2 != 'D')); - justwts = (ch2 == 'W'); - if (justwts) - justweights(&datasets); - else - initdatasets(&datasets); - } - break; - - case 'I': - interleaved = !interleaved; - break; - - case '0': - if (ibmpc) { - ibmpc = false; - ansi = true; - } else if (ansi) - ansi = false; - else - ibmpc = true; - break; - - case '1': - printdata = !printdata; - break; - - case '2': - progress = !progress; - break; - } - } else { - if (strchr("CUAPGETFMWI120",ch) == NULL) - printf("Not a possible option!\n"); - else - printf("That option not allowed with these settings\n"); - printf("\nPress Enter or Return key to continue\n"); - getchar(); - } - } - countup(&loopcount, 100); - } while (!done); - if (gama || invar) { - loopcount = 0; - do { - printf( -"\nCoefficient of variation of substitution rate among positions (must be positive)\n"); - printf( - " In gamma distribution parameters, this is 1/(square root of alpha)\n"); - scanf("%lf%*[^\n]", &cvi); - getchar(); - countup(&loopcount, 10); - } while (cvi <= 0.0); - cvi = 1.0 / (cvi * cvi); - } - if (invar) { - loopcount = 0; - do { - printf("Fraction of invariant positions?\n"); - scanf("%lf%*[^\n]", &invarfrac); - getchar(); - countup (&loopcount, 10); - } while ((invarfrac <= 0.0) || (invarfrac >= 1.0)); - } -} /* getoptions */ - - -void transition() -{ - /* calculations related to transition-transversion ratio */ - double aa, bb, freqr, freqy, freqgr, freqty; - - freqr = freqa + freqg; - freqy = freqc + freqt; - freqgr = freqg / freqr; - freqty = freqt / freqy; - aa = ttratio * freqr * freqy - freqa * freqg - freqc * freqt; - bb = freqa * freqgr + freqc * freqty; - xi = aa / (aa + bb); - xv = 1.0 - xi; - if (xi <= 0.0 && xi >= -epsilon) - xi = 0.0; - if (xi < 0.0){ - printf("THIS TRANSITION-TRANSVERSION RATIO IS IMPOSSIBLE WITH"); - printf(" THESE BASE FREQUENCIES\n"); - exxit(-1);} -} /* transition */ - - -void doinit() -{ - /* initializes variables */ - protdist_inputnumbers(); - getoptions(); - transition(); -} /* doinit*/ - - -void printcategories() -{ /* print out list of categories of positions */ - long i, j; - - fprintf(outfile, "Rate categories\n\n"); - for (i = 1; i <= nmlngth + 3; i++) - putc(' ', outfile); - for (i = 1; i <= chars; i++) { - fprintf(outfile, "%ld", category[i - 1]); - if (i % 60 == 0) { - putc('\n', outfile); - for (j = 1; j <= nmlngth + 3; j++) - putc(' ', outfile); - } else if (i % 10 == 0) - putc(' ', outfile); - } - fprintf(outfile, "\n\n"); -} /* printcategories */ - -void reallocchars(void) -{ - int i; - - free(weight); - free(oldweight); - free(category); - for (i = 0; i < spp; i++) { - free(gnode[i]); - gnode[i] = (aas *)Malloc(chars * sizeof(aas )); - } - weight = (steparray)Malloc(chars*sizeof(long)); - oldweight = (steparray)Malloc(chars*sizeof(long)); - category = (steparray)Malloc(chars*sizeof(long)); -} - -void inputoptions() -{ /* input the information on the options */ - long i; - - if (!firstset && !justwts) { - samenumsp(&chars, ith); - reallocchars(); - } if (firstset || !justwts) { - for (i = 0; i < chars; i++) { - category[i] = 1; - oldweight[i] = 1; - weight[i] = 1; - } - } - /* if (!justwts && weights) {*/ - if (justwts || weights) - inputweights(chars, oldweight, &weights); - if (printdata) - putc('\n', outfile); - if (usejtt && printdata) - fprintf(outfile, " Jones-Taylor-Thornton model distance\n"); - if (usepmb && printdata) - fprintf(outfile, " Henikoff/Tillier PMB model distance\n"); - if (usepam && printdata) - fprintf(outfile, " Dayhoff PAM model distance\n"); - if (kimura && printdata) - fprintf(outfile, " Kimura protein distance\n"); - if (!(usejtt || usepmb || usepam || kimura || similarity) && printdata) - fprintf(outfile, " Categories model distance\n"); - if (similarity) - fprintf(outfile, " \n Table of similarity between sequences\n"); - if ((ctgry && categs > 1) && (firstset || !justwts)) { - inputcategs(0, chars, category, categs, "ProtDist"); - if (printdata) - printcategs(outfile, chars, category, "Position categories"); - } else if (printdata && (categs > 1)) { - fprintf(outfile, "\nPosition category Rate of change\n\n"); - for (i = 1; i <= categs; i++) - fprintf(outfile, "%15ld%13.3f\n", i, rate[i - 1]); - putc('\n', outfile); - printcategories(); - } - if (weights && printdata) - printweights(outfile, 0, chars, oldweight, "Positions"); -} /* inputoptions */ - - -void protdist_inputdata() -{ - /* input the names and sequences for each species */ - long i, j, k, l, aasread=0, aasnew=0; - Char charstate; - boolean allread, done; - aas aa=0; /* temporary amino acid for input */ - - if (progress) - putchar('\n'); - j = nmlngth + (chars + (chars - 1) / 10) / 2 - 5; - if (j < nmlngth - 1) - j = nmlngth - 1; - if (j > 37) - j = 37; - if (printdata) { - fprintf(outfile, "\nName"); - for (i = 1; i <= j; i++) - putc(' ', outfile); - fprintf(outfile, "Sequences\n"); - fprintf(outfile, "----"); - for (i = 1; i <= j; i++) - putc(' ', outfile); - fprintf(outfile, "---------\n\n"); - } - aasread = 0; - allread = false; - while (!(allread)) { - /* eat white space -- if the separator line has spaces on it*/ - do { - charstate = gettc(infile); - } while (charstate == ' ' || charstate == '\t'); - ungetc(charstate, infile); - if (eoln(infile)) - scan_eoln(infile); - i = 1; - while (i <= spp) { - if ((interleaved && aasread == 0) || !interleaved) - initname(i-1); - if (interleaved) - j = aasread; - else - j = 0; - done = false; - while (((!done) && (!(eoln(infile) || eoff(infile))))) { - if (interleaved) - done = true; - while (((j < chars) & (!(eoln(infile) | eoff(infile))))) { - charstate = gettc(infile); - if (charstate == '\n' || charstate == '\t') - charstate = ' '; - if (charstate == ' ' || (charstate >= '0' && charstate <= '9')) - continue; - protdist_uppercase(&charstate); - if ((!isalpha(charstate) && charstate != '.' && charstate != '?' && - charstate != '-' && charstate != '*') || charstate == 'J' || - charstate == 'O' || charstate == 'U' || charstate == '.') { - printf("ERROR -- bad amino acid: %c at position %ld of species %3ld\n", - charstate, j, i); - if (charstate == '.') { - printf(" Periods (.) may not be used as gap characters.\n"); - printf(" The correct gap character is (-)\n"); - } - exxit(-1); - } - j++; - - switch (charstate) { - - case 'A': - aa = ala; - break; - - case 'B': - aa = asx; - break; - - case 'C': - aa = cys; - break; - - case 'D': - aa = asp; - break; - - case 'E': - aa = glu; - break; - - case 'F': - aa = phe; - break; - - case 'G': - aa = gly; - break; - - case 'H': - aa = his; - break; - - case 'I': - aa = ileu; - break; - - case 'K': - aa = lys; - break; - - case 'L': - aa = leu; - break; - - case 'M': - aa = met; - break; - - case 'N': - aa = asn; - break; - - case 'P': - aa = pro; - break; - - case 'Q': - aa = gln; - break; - - case 'R': - aa = arg; - break; - - case 'S': - aa = ser; - break; - - case 'T': - aa = thr; - break; - - case 'V': - aa = val; - break; - - case 'W': - aa = trp; - break; - - case 'X': - aa = unk; - break; - - case 'Y': - aa = tyr; - break; - - case 'Z': - aa = glx; - break; - - case '*': - aa = stop; - break; - - case '?': - aa = quest; - break; - - case '-': - aa = del; - break; - } - gnode[i - 1][j - 1] = aa; - } - if (interleaved) - continue; - if (j < chars) - scan_eoln(infile); - else if (j == chars) - done = true; - } - if (interleaved && i == 1) - aasnew = j; - scan_eoln(infile); - if ((interleaved && j != aasnew) || ((!interleaved) && j != chars)){ - printf("ERROR: SEQUENCES OUT OF ALIGNMENT\n"); - exxit(-1);} - i++; - } - if (interleaved) { - aasread = aasnew; - allread = (aasread == chars); - } else - allread = (i > spp); - } - if ( printdata) { - for (i = 1; i <= ((chars - 1) / 60 + 1); i++) { - for (j = 1; j <= spp; j++) { - for (k = 0; k < nmlngth; k++) - putc(nayme[j - 1][k], outfile); - fprintf(outfile, " "); - l = i * 60; - if (l > chars) - l = chars; - for (k = (i - 1) * 60 + 1; k <= l; k++) { - if (j > 1 && gnode[j - 1][k - 1] == gnode[0][k - 1]) - charstate = '.'; - else { - switch (gnode[j - 1][k - 1]) { - - case ala: - charstate = 'A'; - break; - - case asx: - charstate = 'B'; - break; - - case cys: - charstate = 'C'; - break; - - case asp: - charstate = 'D'; - break; - - case glu: - charstate = 'E'; - break; - - case phe: - charstate = 'F'; - break; - - case gly: - charstate = 'G'; - break; - - case his: - charstate = 'H'; - break; - - case ileu: - charstate = 'I'; - break; - - case lys: - charstate = 'K'; - break; - - case leu: - charstate = 'L'; - break; - - case met: - charstate = 'M'; - break; - - case asn: - charstate = 'N'; - break; - - case pro: - charstate = 'P'; - break; - - case gln: - charstate = 'Q'; - break; - - case arg: - charstate = 'R'; - break; - - case ser: - charstate = 'S'; - break; - - case thr: - charstate = 'T'; - break; - - case val: - charstate = 'V'; - break; - - case trp: - charstate = 'W'; - break; - - case tyr: - charstate = 'Y'; - break; - - case glx: - charstate = 'Z'; - break; - - case del: - charstate = '-'; - break; - - case stop: - charstate = '*'; - break; - - case unk: - charstate = 'X'; - break; - - case quest: - charstate = '?'; - break; - - default: /*cases ser1 and ser2 cannot occur*/ - break; - } - } - putc(charstate, outfile); - if (k % 10 == 0 && k % 60 != 0) - putc(' ', outfile); - } - putc('\n', outfile); - } - putc('\n', outfile); - } - putc('\n', outfile); - } - if (printdata) - putc('\n', outfile); -} /* protdist_inputdata */ - - -void doinput() -{ /* reads the input data */ - long i; - double sumrates, weightsum; - - inputoptions(); - if(!justwts || firstset) - protdist_inputdata(); - if (!ctgry) { - categs = 1; - rate[0] = 1.0; - } - weightsum = 0; - for (i = 0; i < chars; i++) - weightsum += oldweight[i]; - sumrates = 0.0; - for (i = 0; i < chars; i++) - sumrates += oldweight[i] * rate[category[i] - 1]; - for (i = 0; i < categs; i++) - rate[i] *= weightsum / sumrates; -} /* doinput */ - - -void code() -{ - /* make up table of the code 1 = u, 2 = c, 3 = a, 4 = g */ - long n; - aas b; - - trans[0][0][0] = phe; - trans[0][0][1] = phe; - trans[0][0][2] = leu; - trans[0][0][3] = leu; - trans[0][1][0] = ser; - trans[0][1][1] = ser; - trans[0][1][2] = ser; - trans[0][1][3] = ser; - trans[0][2][0] = tyr; - trans[0][2][1] = tyr; - trans[0][2][2] = stop; - trans[0][2][3] = stop; - trans[0][3][0] = cys; - trans[0][3][1] = cys; - trans[0][3][2] = stop; - trans[0][3][3] = trp; - trans[1][0][0] = leu; - trans[1][0][1] = leu; - trans[1][0][2] = leu; - trans[1][0][3] = leu; - trans[1][1][0] = pro; - trans[1][1][1] = pro; - trans[1][1][2] = pro; - trans[1][1][3] = pro; - trans[1][2][0] = his; - trans[1][2][1] = his; - trans[1][2][2] = gln; - trans[1][2][3] = gln; - trans[1][3][0] = arg; - trans[1][3][1] = arg; - trans[1][3][2] = arg; - trans[1][3][3] = arg; - trans[2][0][0] = ileu; - trans[2][0][1] = ileu; - trans[2][0][2] = ileu; - trans[2][0][3] = met; - trans[2][1][0] = thr; - trans[2][1][1] = thr; - trans[2][1][2] = thr; - trans[2][1][3] = thr; - trans[2][2][0] = asn; - trans[2][2][1] = asn; - trans[2][2][2] = lys; - trans[2][2][3] = lys; - trans[2][3][0] = ser; - trans[2][3][1] = ser; - trans[2][3][2] = arg; - trans[2][3][3] = arg; - trans[3][0][0] = val; - trans[3][0][1] = val; - trans[3][0][2] = val; - trans[3][0][3] = val; - trans[3][1][0] = ala; - trans[3][1][1] = ala; - trans[3][1][2] = ala; - trans[3][1][3] = ala; - trans[3][2][0] = asp; - trans[3][2][1] = asp; - trans[3][2][2] = glu; - trans[3][2][3] = glu; - trans[3][3][0] = gly; - trans[3][3][1] = gly; - trans[3][3][2] = gly; - trans[3][3][3] = gly; - if (whichcode == mito) - trans[0][3][2] = trp; - if (whichcode == vertmito) { - trans[0][3][2] = trp; - trans[2][3][2] = stop; - trans[2][3][3] = stop; - trans[2][0][2] = met; - } - if (whichcode == flymito) { - trans[0][3][2] = trp; - trans[2][0][2] = met; - trans[2][3][2] = ser; - } - if (whichcode == yeastmito) { - trans[0][3][2] = trp; - trans[1][0][2] = thr; - trans[2][0][2] = met; - } - n = 0; - for (b = ala; (long)b <= (long)val; b = (aas)((long)b + 1)) { - if (b != ser2) { - n++; - numaa[(long)b - (long)ala] = n; - } - } - numaa[(long)ser - (long)ala] = (long)ser1 - (long)(ala) + 1; -} /* code */ - - -void protdist_cats() -{ - /* define categories of amino acids */ - aas b; - - /* fundamental subgroups */ - cat[0] = 1; /* for alanine */ - cat[(long)cys - (long)ala] = 1; - cat[(long)met - (long)ala] = 2; - cat[(long)val - (long)ala] = 3; - cat[(long)leu - (long)ala] = 3; - cat[(long)ileu - (long)ala] = 3; - cat[(long)gly - (long)ala] = 4; - cat[0] = 4; - cat[(long)ser - (long)ala] = 4; - cat[(long)thr - (long)ala] = 4; - cat[(long)pro - (long)ala] = 5; - cat[(long)phe - (long)ala] = 6; - cat[(long)tyr - (long)ala] = 6; - cat[(long)trp - (long)ala] = 6; - cat[(long)glu - (long)ala] = 7; - cat[(long)gln - (long)ala] = 7; - cat[(long)asp - (long)ala] = 7; - cat[(long)asn - (long)ala] = 7; - cat[(long)lys - (long)ala] = 8; - cat[(long)arg - (long)ala] = 8; - cat[(long)his - (long)ala] = 8; - if (whichcat == george) { - /* George, Hunt and Barker: sulfhydryl, small hydrophobic, small hydrophilic, - aromatic, acid/acid-amide/hydrophilic, basic */ - for (b = ala; (long)b <= (long)val; b = (aas)((long)b + 1)) { - if (cat[(long)b - (long)ala] == 3) - cat[(long)b - (long)ala] = 2; - if (cat[(long)b - (long)ala] == 5) - cat[(long)b - (long)ala] = 4; - } - } - if (whichcat == chemical) { - /* Conn and Stumpf: monoamino, aliphatic, heterocyclic, - aromatic, dicarboxylic, basic */ - for (b = ala; (long)b <= (long)val; b = (aas)((long)b + 1)) { - if (cat[(long)b - (long)ala] == 2) - cat[(long)b - (long)ala] = 1; - if (cat[(long)b - (long)ala] == 4) - cat[(long)b - (long)ala] = 3; - } - } - /* Ben Hall's personal opinion */ - if (whichcat != hall) - return; - for (b = ala; (long)b <= (long)val; b = (aas)((long)b + 1)) { - if (cat[(long)b - (long)ala] == 3) - cat[(long)b - (long)ala] = 2; - } -} /* protdist_cats */ - - -void maketrans() -{ - /* Make up transition probability matrix from code and category tables */ - long i, j, k, m, n, s, nb1, nb2; - double x, sum; - long sub[3], newsub[3]; - double f[4], g[4]; - aas b1, b2; - double TEMP, TEMP1, TEMP2, TEMP3; - - for (i = 0; i <= 19; i++) { - pie[i] = 0.0; - for (j = 0; j <= 19; j++) - prob[i][j] = 0.0; - } - f[0] = freqt; - f[1] = freqc; - f[2] = freqa; - f[3] = freqg; - g[0] = freqc + freqt; - g[1] = freqc + freqt; - g[2] = freqa + freqg; - g[3] = freqa + freqg; - TEMP = f[0]; - TEMP1 = f[1]; - TEMP2 = f[2]; - TEMP3 = f[3]; - fracchange = xi * (2 * f[0] * f[1] / g[0] + 2 * f[2] * f[3] / g[2]) + - xv * (1 - TEMP * TEMP - TEMP1 * TEMP1 - TEMP2 * TEMP2 - TEMP3 * TEMP3); - sum = 0.0; - for (i = 0; i <= 3; i++) { - for (j = 0; j <= 3; j++) { - for (k = 0; k <= 3; k++) { - if (trans[i][j][k] != stop) - sum += f[i] * f[j] * f[k]; - } - } - } - for (i = 0; i <= 3; i++) { - sub[0] = i + 1; - for (j = 0; j <= 3; j++) { - sub[1] = j + 1; - for (k = 0; k <= 3; k++) { - sub[2] = k + 1; - b1 = trans[i][j][k]; - for (m = 0; m <= 2; m++) { - s = sub[m]; - for (n = 1; n <= 4; n++) { - memcpy(newsub, sub, sizeof(long) * 3L); - newsub[m] = n; - x = f[i] * f[j] * f[k] / (3.0 * sum); - if (((s == 1 || s == 2) && (n == 3 || n == 4)) || - ((n == 1 || n == 2) && (s == 3 || s == 4))) - x *= xv * f[n - 1]; - else - x *= xi * f[n - 1] / g[n - 1] + xv * f[n - 1]; - b2 = trans[newsub[0] - 1][newsub[1] - 1][newsub[2] - 1]; - if (b1 != stop) { - nb1 = numaa[(long)b1 - (long)ala]; - pie[nb1 - 1] += x; - if (b2 != stop) { - nb2 = numaa[(long)b2 - (long)ala]; - if (cat[(long)b1 - (long)ala] != cat[(long)b2 - (long)ala]) { - prob[nb1 - 1][nb2 - 1] += x * ease; - prob[nb1 - 1][nb1 - 1] += x * (1.0 - ease); - } else - prob[nb1 - 1][nb2 - 1] += x; - } else - prob[nb1 - 1][nb1 - 1] += x; - } - } - } - } - } - } - for (i = 0; i <= 19; i++) - prob[i][i] -= pie[i]; - for (i = 0; i <= 19; i++) { - for (j = 0; j <= 19; j++) - prob[i][j] /= sqrt(pie[i] * pie[j]); - } - /* computes pi^(1/2)*B*pi^(-1/2) */ -} /* maketrans */ - - -void givens(double (*a)[20], long i, long j, long n, double ctheta, - double stheta, boolean left) -{ /* Givens transform at i,j for 1..n with angle theta */ - long k; - double d; - - for (k = 0; k < n; k++) { - if (left) { - d = ctheta * a[i - 1][k] + stheta * a[j - 1][k]; - a[j - 1][k] = ctheta * a[j - 1][k] - stheta * a[i - 1][k]; - a[i - 1][k] = d; - } else { - d = ctheta * a[k][i - 1] + stheta * a[k][j - 1]; - a[k][j - 1] = ctheta * a[k][j - 1] - stheta * a[k][i - 1]; - a[k][i - 1] = d; - } - } -} /* givens */ - - -void coeffs(double x, double y, double *c, double *s, double accuracy) -{ /* compute cosine and sine of theta */ - double root; - - root = sqrt(x * x + y * y); - if (root < accuracy) { - *c = 1.0; - *s = 0.0; - } else { - *c = x / root; - *s = y / root; - } -} /* coeffs */ - - -void tridiag(double (*a)[20], long n, double accuracy) -{ /* Givens tridiagonalization */ - long i, j; - double s, c; - - for (i = 2; i < n; i++) { - for (j = i + 1; j <= n; j++) { - coeffs(a[i - 2][i - 1], a[i - 2][j - 1], &c, &s,accuracy); - givens(a, i, j, n, c, s, true); - givens(a, i, j, n, c, s, false); - givens(eigvecs, i, j, n, c, s, true); - } - } -} /* tridiag */ - - -void shiftqr(double (*a)[20], long n, double accuracy) -{ /* QR eigenvalue-finder */ - long i, j; - double approx, s, c, d, TEMP, TEMP1; - - for (i = n; i >= 2; i--) { - do { - TEMP = a[i - 2][i - 2] - a[i - 1][i - 1]; - TEMP1 = a[i - 1][i - 2]; - d = sqrt(TEMP * TEMP + TEMP1 * TEMP1); - approx = a[i - 2][i - 2] + a[i - 1][i - 1]; - if (a[i - 1][i - 1] < a[i - 2][i - 2]) - approx = (approx - d) / 2.0; - else - approx = (approx + d) / 2.0; - for (j = 0; j < i; j++) - a[j][j] -= approx; - for (j = 1; j < i; j++) { - coeffs(a[j - 1][j - 1], a[j][j - 1], &c, &s, accuracy); - givens(a, j, j + 1, i, c, s, true); - givens(a, j, j + 1, i, c, s, false); - givens(eigvecs, j, j + 1, n, c, s, true); - } - for (j = 0; j < i; j++) - a[j][j] += approx; - } while (fabs(a[i - 1][i - 2]) > accuracy); - } -} /* shiftqr */ - - -void qreigen(double (*prob)[20], long n) -{ /* QR eigenvector/eigenvalue method for symmetric matrix */ - double accuracy; - long i, j; - - accuracy = 1.0e-6; - for (i = 0; i < n; i++) { - for (j = 0; j < n; j++) - eigvecs[i][j] = 0.0; - eigvecs[i][i] = 1.0; - } - tridiag(prob, n, accuracy); - shiftqr(prob, n, accuracy); - for (i = 0; i < n; i++) - eig[i] = prob[i][i]; - for (i = 0; i <= 19; i++) { - for (j = 0; j <= 19; j++) - prob[i][j] = sqrt(pie[j]) * eigvecs[i][j]; - } - /* prob[i][j] is the value of U' times pi^(1/2) */ -} /* qreigen */ - - -void jtteigen() -{ /* eigenanalysis for JTT matrix, precomputed */ - memcpy(prob,jttprobs,sizeof(jttprobs)); - memcpy(eig,jtteigs,sizeof(jtteigs)); - fracchange = 0.01; -} /* jtteigen */ - - -void pmbeigen() -{ /* eigenanalysis for PMB matrix, precomputed */ - memcpy(prob,pmbprobs,sizeof(pmbprobs)); - memcpy(eig,pmbeigs,sizeof(pmbeigs)); - fracchange = 1.0; -} /* pmbeigen */ - - -void pameigen() -{ /* eigenanalysis for PAM matrix, precomputed */ - memcpy(prob,pamprobs,sizeof(pamprobs)); - memcpy(eig,pameigs,sizeof(pameigs)); - fracchange = 0.01; -} /* pameigen */ - - -void predict(long nb1, long nb2, long cat) -{ /* make contribution to prediction of this aa pair */ - long m; - double TEMP; - - for (m = 0; m <= 19; m++) { - if (gama || invar) - elambdat = exp(-cvi*log(1.0-rate[cat-1]*tt*(eig[m]/(1.0-invarfrac))/cvi)); - else - elambdat = exp(rate[cat-1]*tt * eig[m]); - q = prob[m][nb1 - 1] * prob[m][nb2 - 1] * elambdat; - p += q; - if (!gama && !invar) - dp += rate[cat-1]*eig[m] * q; - else - dp += (rate[cat-1]*eig[m]/(1.0-rate[cat-1]*tt*(eig[m]/(1.0-invarfrac))/cvi)) * q; - TEMP = eig[m]; - if (!gama && !invar) - d2p += TEMP * TEMP * q; - else - d2p += (rate[cat-1]*rate[cat-1]*eig[m]*eig[m]*(1.0+1.0/cvi)/ - ((1.0-rate[cat-1]*tt*eig[m]/cvi) - *(1.0-rate[cat-1]*tt*eig[m]/cvi))) * q; - } - if (nb1 == nb2) { - p *= (1.0 - invarfrac); - p += invarfrac; - } - dp *= (1.0 - invarfrac); - d2p *= (1.0 - invarfrac); -} /* predict */ - -void makedists() -{ /* compute the distances */ - long i, j, k, m, n, itterations, nb1, nb2, cat; - double delta, lnlike, slope, curv; - boolean neginfinity, inf, overlap; - aas b1, b2; - - if (!(printdata || similarity)) - fprintf(outfile, "%5ld\n", spp); - if (progress) - printf("Computing distances:\n"); - for (i = 1; i <= spp; i++) { - if (progress) - printf(" "); - if (progress) { - for (j = 0; j < nmlngth; j++) - putchar(nayme[i - 1][j]); - } - if (progress) { - printf(" "); - fflush(stdout); - } - if (similarity) - d[i-1][i-1] = 1.0; - else - d[i-1][i-1] = 0.0; - for (j = 0; j <= i - 2; j++) { - if (!(kimura || similarity)) { - if (usejtt || usepmb || usepam) - tt = 0.1/fracchange; - else - tt = 1.0; - delta = tt / 2.0; - itterations = 0; - inf = false; - do { - lnlike = 0.0; - slope = 0.0; - curv = 0.0; - neginfinity = false; - overlap = false; - for (k = 0; k < chars; k++) { - if (oldweight[k] > 0) { - cat = category[k]; - b1 = gnode[i - 1][k]; - b2 = gnode[j][k]; - if (b1 != stop && b1 != del && b1 != quest && b1 != unk && - b2 != stop && b2 != del && b2 != quest && b2 != unk) { - overlap = true; - p = 0.0; - dp = 0.0; - d2p = 0.0; - nb1 = numaa[(long)b1 - (long)ala]; - nb2 = numaa[(long)b2 - (long)ala]; - if (b1 != asx && b1 != glx && b2 != asx && b2 != glx) - predict(nb1, nb2, cat); - else { - if (b1 == asx) { - if (b2 == asx) { - predict(3L, 3L, cat); - predict(3L, 4L, cat); - predict(4L, 3L, cat); - predict(4L, 4L, cat); - } else { - if (b2 == glx) { - predict(3L, 6L, cat); - predict(3L, 7L, cat); - predict(4L, 6L, cat); - predict(4L, 7L, cat); - } else { - predict(3L, nb2, cat); - predict(4L, nb2, cat); - } - } - } else { - if (b1 == glx) { - if (b2 == asx) { - predict(6L, 3L, cat); - predict(6L, 4L, cat); - predict(7L, 3L, cat); - predict(7L, 4L, cat); - } else { - if (b2 == glx) { - predict(6L, 6L, cat); - predict(6L, 7L, cat); - predict(7L, 6L, cat); - predict(7L, 7L, cat); - } else { - predict(6L, nb2, cat); - predict(7L, nb2, cat); - } - } - } else { - if (b2 == asx) { - predict(nb1, 3L, cat); - predict(nb1, 4L, cat); - predict(nb1, 3L, cat); - predict(nb1, 4L, cat); - } else if (b2 == glx) { - predict(nb1, 6L, cat); - predict(nb1, 7L, cat); - predict(nb1, 6L, cat); - predict(nb1, 7L, cat); - } - } - } - } - if (p <= 0.0) - neginfinity = true; - else { - lnlike += oldweight[k]*log(p); - slope += oldweight[k]*dp / p; - curv += oldweight[k]*(d2p / p - dp * dp / (p * p)); - } - } - } - } - itterations++; - if (!overlap){ - printf("\nWARNING: NO OVERLAP BETWEEN SEQUENCES %ld AND %ld; -1.0 WAS WRITTEN\n", i, j+1); - tt = -1.0/fracchange; - itterations = 20; - inf = true; - } else if (!neginfinity) { - if (curv < 0.0) { - tt -= slope / curv; - if (tt > 10000.0) { - printf("\nWARNING: INFINITE DISTANCE BETWEEN SPECIES %ld AND %ld; -1.0 WAS WRITTEN\n", i, j+1); - tt = -1.0/fracchange; - inf = true; - itterations = 20; - } - } - else { - if ((slope > 0.0 && delta < 0.0) || (slope < 0.0 && delta > 0.0)) - delta /= -2; - tt += delta; - } - } else { - delta /= -2; - tt += delta; - } - if (tt < protepsilon && !inf) - tt = protepsilon; - } while (itterations != 20); - } else { - m = 0; - n = 0; - for (k = 0; k < chars; k++) { - b1 = gnode[i - 1][k]; - b2 = gnode[j][k]; - if ((((long)b1 <= (long)val) || ((long)b1 == (long)ser)) - && (((long)b2 <= (long)val) || ((long)b2 == (long)ser))) { - if (b1 == b2) - m++; - n++; - } - } - p = 1 - (double)m / n; - if (kimura) { - dp = 1.0 - p - 0.2 * p * p; - if (dp < 0.0) { - printf( -"\nDISTANCE BETWEEN SEQUENCES %3ld AND %3ld IS TOO LARGE FOR KIMURA FORMULA\n", - i, j + 1); - tt = -1.0; - } else - tt = -log(dp); - } else { /* if similarity */ - tt = 1.0 - p; - } - } - d[i - 1][j] = fracchange * tt; - d[j][i - 1] = d[i - 1][j]; - if (progress) { - putchar('.'); - fflush(stdout); - } - } - if (progress) { - putchar('\n'); - fflush(stdout); - } - } - if (!similarity) { - for (i = 0; i < spp; i++) { - for (j = 0; j < nmlngth; j++) - putc(nayme[i][j], outfile); - k = spp; - for (j = 1; j <= k; j++) { - fprintf(outfile, "%10.6f", d[i][j - 1]); - if ((j + 1) % 7 == 0 && j < k) - putc('\n', outfile); - } - putc('\n', outfile); - } - } else { - for (i = 0; i < spp; i += 6) { - if ((i+6) < spp) - n = i+6; - else - n = spp; - fprintf(outfile, " "); - for (j = i; j < n ; j++) { - for (k = 0; k < (nmlngth-2); k++) - putc(nayme[j][k], outfile); - putc(' ', outfile); - putc(' ', outfile); - } - putc('\n', outfile); - for (j = 0; j < spp; j++) { - for (k = 0; k < nmlngth; k++) - putc(nayme[j][k], outfile); - if ((i+6) < spp) - n = i+6; - else - n = spp; - for (k = i; k < n ; k++) - fprintf(outfile, "%10.6f", d[j][k]); - putc('\n', outfile); - } - putc('\n', outfile); - } - } - if (progress) - printf("\nOutput written to file \"%s\"\n\n", outfilename); -} /* makedists */ - - -int main(int argc, Char *argv[]) -{ /* ML Protein distances by PMB, JTT, PAM or categories model */ -#ifdef MAC - argc = 1; /* macsetup("Protdist",""); */ - argv[0] = "Protdist"; -#endif - init(argc, argv); - openfile(&infile,INFILE,"input file","r",argv[0],infilename); - openfile(&outfile,OUTFILE,"output file","w",argv[0],outfilename); - ibmpc = IBMCRT; - ansi = ANSICRT; - mulsets = false; - datasets = 1; - firstset = true; - doinit(); - if (!(kimura || similarity)) - code(); - if (!(usejtt || usepmb || usepam || kimura || similarity)) { - protdist_cats(); - maketrans(); - qreigen(prob, 20L); - } else { - if (kimura || similarity) - fracchange = 1.0; - else { - if (usejtt) - jtteigen(); - else { - if (usepmb) - pmbeigen(); - else - pameigen(); - } - } - } - if (ctgry) - openfile(&catfile,CATFILE,"categories file","r",argv[0],catfilename); - if (weights || justwts) - openfile(&weightfile,WEIGHTFILE,"weights file","r",argv[0],weightfilename); - for (ith = 1; ith <= datasets; ith++) { - doinput(); - if (ith == 1) - firstset = false; - if ((datasets > 1) && progress) - printf("\nData set # %ld:\n\n", ith); - makedists(); - } - FClose(outfile); - FClose(infile); -#ifdef MAC - fixmacfile(outfilename); -#endif - return 0; -} /* Protein distances */ - diff --git a/forester/archive/RIO/others/phylip_mod/src/protpars.c b/forester/archive/RIO/others/phylip_mod/src/protpars.c deleted file mode 100644 index 6020255..0000000 --- a/forester/archive/RIO/others/phylip_mod/src/protpars.c +++ /dev/null @@ -1,1925 +0,0 @@ - -#include "phylip.h" -#include "seq.h" - -/* version 3.6. (c) Copyright 1993-2004 by the University of Washington. - Written by Joseph Felsenstein, Akiko Fuseki, Sean Lamont, and Andrew Keeffe. - Permission is granted to copy and use this program provided no fee is - charged for it and provided that this copyright notice is not removed. */ - -#define maxtrees 100 /* maximum number of tied trees stored */ - -typedef enum { - universal, ciliate, mito, vertmito, flymito, yeastmito -} codetype; - -/* nodes will form a binary tree */ - -typedef struct gseq { - seqptr seq; - struct gseq *next; -} gseq; - -#ifndef OLDC -/* function prototypes */ -void protgnu(gseq **); -void protchuck(gseq *); -void code(void); -void setup(void); -void getoptions(void); -void protalloctree(void); -void allocrest(void); -void doinit(void); -void protinputdata(void); - -void protmakevalues(void); -void doinput(void); -void protfillin(node *, node *, node *); -void protpreorder(node *); -void protadd(node *, node *, node *); -void protre_move(node **, node **); -void evaluate(node *); -void protpostorder(node *); -void protreroot(node *); -void protsavetraverse(node *, long *, boolean *); - -void protsavetree(long *, boolean *); -void tryadd(node *, node **, node **); -void addpreorder(node *, node *, node *); -void tryrearr(node *, boolean *); -void repreorder(node *, boolean *); -void rearrange(node **); -void protgetch(Char *); -void protaddelement(node **, long *, long *, boolean *); -void prottreeread(void); -void protancestset(long *, long *, long *, long *, long *); - -void prothyprint(long , long , boolean *, node *, boolean *, boolean *); -void prothyptrav(node *, sitearray *, long, long, long *, boolean *, - sitearray); -void prothypstates(long *); -void describe(void); -void maketree(void); -void reallocnode(node* p); -void reallocchars(void); -/* function prototypes */ -#endif - - - -Char infilename[FNMLNGTH], outfilename[FNMLNGTH], intreename[FNMLNGTH], outtreename[FNMLNGTH], weightfilename[FNMLNGTH]; -node *root; -long chars, col, msets, ith, njumble, jumb; -/* chars = number of sites in actual sequences */ -long inseed, inseed0; -boolean jumble, usertree, weights, thresh, trout, progress, stepbox, - justwts, ancseq, mulsets, firstset; -codetype whichcode; -long fullset, fulldel; -pointarray treenode; /* pointers to all nodes in tree */ -double threshold; -steptr threshwt; -longer seed; -long *enterorder; -sitearray translate[(long)quest - (long)ala + 1]; -aas trans[4][4][4]; -long **fsteps; -bestelm *bestrees; -boolean dummy; -gseq *garbage; -node *temp, *temp1; -Char ch; -aas tmpa; -char *progname; - -/* Local variables for maketree, propagated globally for c version: */ -long minwhich; -double like, bestyet, bestlike, minsteps, bstlike2; -boolean lastrearr, recompute; -node *there; -double nsteps[maxuser]; -long *place; -boolean *names; - - -void protgnu(gseq **p) -{ - /* this and the following are do-it-yourself garbage collectors. - Make a new node or pull one off the garbage list */ - if (garbage != NULL) { - *p = garbage; - free((*p)->seq); - (*p)->seq = (seqptr)Malloc(chars*sizeof(sitearray)); - garbage = garbage->next; - } else { - *p = (gseq *)Malloc(sizeof(gseq)); - (*p)->seq = (seqptr)Malloc(chars*sizeof(sitearray)); - } - (*p)->next = NULL; -} /* protgnu */ - - -void protchuck(gseq *p) -{ - /* collect garbage on p -- put it on front of garbage list */ - p->next = garbage; - garbage = p; -} /* protchuck */ - - -void code() -{ - /* make up table of the code 1 = u, 2 = c, 3 = a, 4 = g */ - trans[0][0][0] = phe; - trans[0][0][1] = phe; - trans[0][0][2] = leu; - trans[0][0][3] = leu; - trans[0][1][0] = ser1; - trans[0][1][1] = ser1; - trans[0][1][2] = ser1; - trans[0][1][3] = ser1; - trans[0][2][0] = tyr; - trans[0][2][1] = tyr; - trans[0][2][2] = stop; - trans[0][2][3] = stop; - trans[0][3][0] = cys; - trans[0][3][1] = cys; - trans[0][3][2] = stop; - trans[0][3][3] = trp; - trans[1][0][0] = leu; - trans[1][0][1] = leu; - trans[1][0][2] = leu; - trans[1][0][3] = leu; - trans[1][1][0] = pro; - trans[1][1][1] = pro; - trans[1][1][2] = pro; - trans[1][1][3] = pro; - trans[1][2][0] = his; - trans[1][2][1] = his; - trans[1][2][2] = gln; - trans[1][2][3] = gln; - trans[1][3][0] = arg; - trans[1][3][1] = arg; - trans[1][3][2] = arg; - trans[1][3][3] = arg; - trans[2][0][0] = ileu; - trans[2][0][1] = ileu; - trans[2][0][2] = ileu; - trans[2][0][3] = met; - trans[2][1][0] = thr; - trans[2][1][1] = thr; - trans[2][1][2] = thr; - trans[2][1][3] = thr; - trans[2][2][0] = asn; - trans[2][2][1] = asn; - trans[2][2][2] = lys; - trans[2][2][3] = lys; - trans[2][3][0] = ser2; - trans[2][3][1] = ser2; - trans[2][3][2] = arg; - trans[2][3][3] = arg; - trans[3][0][0] = val; - trans[3][0][1] = val; - trans[3][0][2] = val; - trans[3][0][3] = val; - trans[3][1][0] = ala; - trans[3][1][1] = ala; - trans[3][1][2] = ala; - trans[3][1][3] = ala; - trans[3][2][0] = asp; - trans[3][2][1] = asp; - trans[3][2][2] = glu; - trans[3][2][3] = glu; - trans[3][3][0] = gly; - trans[3][3][1] = gly; - trans[3][3][2] = gly; - trans[3][3][3] = gly; - if (whichcode == mito) - trans[0][3][2] = trp; - if (whichcode == vertmito) { - trans[0][3][2] = trp; - trans[2][3][2] = stop; - trans[2][3][3] = stop; - trans[2][0][2] = met; - } - if (whichcode == flymito) { - trans[0][3][2] = trp; - trans[2][0][2] = met; - trans[2][3][2] = ser2; - } - if (whichcode == yeastmito) { - trans[0][3][2] = trp; - trans[1][0][2] = thr; - trans[2][0][2] = met; - } -} /* code */ - - -void setup() -{ - /* set up set table to get aasets from aas */ - aas a, b; - long i, j, k, l, s; - - for (a = ala; (long)a <= (long)stop; a = (aas)((long)a + 1)) { - translate[(long)a - (long)ala][0] = 1L << ((long)a); - translate[(long)a - (long)ala][1] = 1L << ((long)a); - } - for (i = 0; i <= 3; i++) { - for (j = 0; j <= 3; j++) { - for (k = 0; k <= 3; k++) { - for (l = 0; l <= 3; l++) { - translate[(long)trans[i][j][k]][1] |= (1L << (long)trans[l][j][k]); - translate[(long)trans[i][j][k]][1] |= (1L << (long)trans[i][l][k]); - translate[(long)trans[i][j][k]][1] |= (1L << (long)trans[i][j][l]); - } - } - } - } - translate[(long)del - (long)ala][1] = 1L << ((long)del); - fulldel = (1L << ((long)stop + 1)) - (1L << ((long)ala)); - fullset = fulldel & (~(1L << ((long)del))); - translate[(long)asx - (long)ala][0] - = (1L << ((long)asn)) | (1L << ((long)asp)); - translate[(long)glx - (long)ala][0] - = (1L << ((long)gln)) | (1L << ((long)glu)); - translate[(long)ser - (long)ala][0] - = (1L << ((long)ser1)) | (1L << ((long)ser2)); - translate[(long)unk - (long)ala][0] = fullset; - translate[(long)quest - (long)ala][0] = fulldel; - translate[(long)asx - (long)ala][1] = translate[(long)asn - (long)ala][1] - | translate[(long)asp - (long)ala][1]; - translate[(long)glx - (long)ala][1] = translate[(long)gln - (long)ala][1] - | translate[(long)glu - (long)ala][1]; - translate[(long)ser - (long)ala][1] = translate[(long)ser1 - (long)ala][1] - | translate[(long)ser2 - (long)ala][1]; - translate[(long)unk - (long)ala][1] = fullset; - translate[(long)quest - (long)ala][1] = fulldel; - for (a = ala; (long)a <= (long)quest; a = (aas)((long)a + 1)) { - s = 0; - for (b = ala; (long)b <= (long)stop; b = (aas)((long)b + 1)) { - if (((1L << ((long)b)) & translate[(long)a - (long)ala][1]) != 0) - s |= translate[(long)b - (long)ala][1]; - } - translate[(long)a - (long)ala][2] = s; - } -} /* setup */ - - -void getoptions() -{ - /* interactively set options */ - long loopcount, loopcount2; - Char ch, ch2; - - fprintf(outfile, "\nProtein parsimony algorithm, version %s\n\n",VERSION); - putchar('\n'); - jumble = false; - njumble = 1; - outgrno = 1; - outgropt = false; - thresh = false; - trout = true; - usertree = false; - weights = false; - whichcode = universal; - printdata = false; - progress = true; - treeprint = true; - stepbox = false; - ancseq = false; - dotdiff = true; - interleaved = true; - loopcount = 0; - for (;;) { - cleerhome(); - printf("\nProtein parsimony algorithm, version %s\n\n",VERSION); - printf("Setting for this run:\n"); - printf(" U Search for best tree? %s\n", - (usertree ? "No, use user trees in input file" : "Yes")); - if (!usertree) { - printf(" J Randomize input order of sequences?"); - if (jumble) - printf(" Yes (seed =%8ld,%3ld times)\n", inseed0, njumble); - else - printf(" No. Use input order\n"); - } - printf(" O Outgroup root?"); - if (outgropt) - printf(" Yes, at sequence number%3ld\n", outgrno); - else - printf(" No, use as outgroup species%3ld\n", outgrno); - printf(" T Use Threshold parsimony?"); - if (thresh) - printf(" Yes, count steps up to%4.1f per site\n", threshold); - else - printf(" No, use ordinary parsimony\n"); - printf(" C Use which genetic code? %s\n", - (whichcode == universal) ? "Universal" : - (whichcode == ciliate) ? "Ciliate" : - (whichcode == mito) ? "Universal mitochondrial" : - (whichcode == vertmito) ? "Vertebrate mitochondrial" : - (whichcode == flymito) ? "Fly mitochondrial" : - (whichcode == yeastmito) ? "Yeast mitochondrial" : ""); - printf(" W Sites weighted? %s\n", - (weights ? "Yes" : "No")); - printf(" M Analyze multiple data sets?"); - if (mulsets) - printf(" Yes, %2ld %s\n", msets, - (justwts ? "sets of weights" : "data sets")); - else - printf(" No\n"); - printf(" I Input sequences interleaved? %s\n", - (interleaved ? "Yes" : "No")); - printf(" 0 Terminal type (IBM PC, ANSI, none)? %s\n", - (ibmpc ? "IBM PC" : ansi ? "ANSI" : "(none)")); - printf(" 1 Print out the data at start of run %s\n", - (printdata ? "Yes" : "No")); - printf(" 2 Print indications of progress of run %s\n", - (progress ? "Yes" : "No")); - printf(" 3 Print out tree %s\n", - (treeprint ? "Yes" : "No")); - printf(" 4 Print out steps in each site %s\n", - (stepbox ? "Yes" : "No")); - printf(" 5 Print sequences at all nodes of tree %s\n", - (ancseq ? "Yes" : "No")); - if (ancseq || printdata) - printf(" . Use dot-differencing to display them %s\n", - dotdiff ? "Yes" : "No"); - printf(" 6 Write out trees onto tree file? %s\n", - (trout ? "Yes" : "No")); - if(weights && justwts){ - printf( - "WARNING: W option and Multiple Weights options are both on. "); - printf( - "The W menu option is unnecessary and has no additional effect. \n"); - } - printf( - "\nAre these settings correct? (type Y or the letter for one to change)\n"); - scanf("%c%*[^\n]", &ch); - getchar(); - uppercase(&ch); - if (ch == 'Y') - break; - if (strchr("WCJOTUMI12345.60",ch) != NULL) { - switch (ch) { - - case 'J': - jumble = !jumble; - if (jumble) - initjumble(&inseed, &inseed0, seed, &njumble); - else njumble = 1; - break; - - case 'W': - weights = !weights; - break; - - case 'O': - outgropt = !outgropt; - if (outgropt) - initoutgroup(&outgrno, spp); - else outgrno = 1; - break; - - case 'T': - thresh = !thresh; - if (thresh) - initthreshold(&threshold); - break; - - case 'C': - printf("\nWhich genetic code?\n"); - printf(" type for\n\n"); - printf(" U Universal\n"); - printf(" M Mitochondrial\n"); - printf(" V Vertebrate mitochondrial\n"); - printf(" F Fly mitochondrial\n"); - printf(" Y Yeast mitochondrial\n\n"); - loopcount2 = 0; - do { - printf("type U, M, V, F, or Y\n"); - scanf("%c%*[^\n]", &ch); - getchar(); - if (ch == '\n') - ch = ' '; - uppercase(&ch); - countup(&loopcount2, 10); - } while (ch != 'U' && ch != 'M' && ch != 'V' - && ch != 'F' && ch != 'Y'); - switch (ch) { - - case 'U': - whichcode = universal; - break; - - case 'M': - whichcode = mito; - break; - - case 'V': - whichcode = vertmito; - break; - - case 'F': - whichcode = flymito; - break; - - case 'Y': - whichcode = yeastmito; - break; - } - break; - - case 'M': - mulsets = !mulsets; - if (mulsets){ - printf("Multiple data sets or multiple weights?"); - loopcount2 = 0; - do { - printf(" (type D or W)\n"); -#ifdef WIN32 - phyFillScreenColor(); -#endif - scanf("%c%*[^\n]", &ch2); - getchar(); - if (ch2 == '\n') - ch2 = ' '; - uppercase(&ch2); - countup(&loopcount2, 10); - } while ((ch2 != 'W') && (ch2 != 'D')); - justwts = (ch2 == 'W'); - if (justwts) - justweights(&msets); - else - initdatasets(&msets); - if (!jumble) { - jumble = true; - initjumble(&inseed, &inseed0, seed, &njumble); - } - } - break; - - case 'I': - interleaved = !interleaved; - break; - - case 'U': - usertree = !usertree; - break; - - case '0': - initterminal(&ibmpc, &ansi); - break; - - case '1': - printdata = !printdata; - break; - - case '2': - progress = !progress; - break; - - case '3': - treeprint = !treeprint; - break; - - case '4': - stepbox = !stepbox; - break; - - case '5': - ancseq = !ancseq; - break; - - case '.': - dotdiff = !dotdiff; - break; - - case '6': - trout = !trout; - break; - } - } else - printf("Not a possible option!\n"); - countup(&loopcount, 100); - } -} /* getoptions */ - - -void protalloctree() -{ /* allocate treenode dynamically */ - long i, j; - node *p, *q; - - treenode = (pointarray)Malloc(nonodes*sizeof(node *)); - for (i = 0; i < (spp); i++) { - treenode[i] = (node *)Malloc(sizeof(node)); - treenode[i]->numsteps = (steptr)Malloc(chars*sizeof(long)); - treenode[i]->siteset = (seqptr)Malloc(chars*sizeof(sitearray)); - treenode[i]->seq = (aas *)Malloc(chars*sizeof(aas)); - } - for (i = spp; i < (nonodes); i++) { - q = NULL; - for (j = 1; j <= 3; j++) { - p = (node *)Malloc(sizeof(node)); - p->numsteps = (steptr)Malloc(chars*sizeof(long)); - p->siteset = (seqptr)Malloc(chars*sizeof(sitearray)); - p->seq = (aas *)Malloc(chars*sizeof(aas)); - p->next = q; - q = p; - } - p->next->next->next = p; - treenode[i] = p; - } -} /* protalloctree */ - - -void reallocnode(node* p) -{ - free(p->numsteps); - free(p->siteset); - free(p->seq); - p->numsteps = (steptr)Malloc(chars*sizeof(long)); - p->siteset = (seqptr)Malloc(chars*sizeof(sitearray)); - p->seq = (aas *)Malloc(chars*sizeof(aas)); -} - - -void reallocchars(void) -{ /* reallocates variables that are dependand on the number of chars - * do we need to reallocate the garbage list too? */ - long i; - node *p; - - if (usertree) - for (i = 0; i < maxuser; i++) { - free(fsteps[i]); - fsteps[i] = (long *)Malloc(chars*sizeof(long)); - } - - for (i = 0; i < nonodes; i++) { - reallocnode(treenode[i]); - if (i >= spp) { - p=treenode[i]->next; - while (p != treenode[i]) { - reallocnode(p); - p = p->next; - } - } - } - - free(weight); - free(threshwt); - free(temp->numsteps); - free(temp->siteset); - free(temp->seq); - free(temp1->numsteps); - free(temp1->siteset); - free(temp1->seq); - - weight = (steptr)Malloc(chars*sizeof(long)); - threshwt = (steptr)Malloc(chars*sizeof(long)); - temp->numsteps = (steptr)Malloc(chars*sizeof(long)); - temp->siteset = (seqptr)Malloc(chars*sizeof(sitearray)); - temp->seq = (aas *)Malloc(chars*sizeof(aas)); - temp1->numsteps = (steptr)Malloc(chars*sizeof(long)); - temp1->siteset = (seqptr)Malloc(chars*sizeof(sitearray)); - temp1->seq = (aas *)Malloc(chars*sizeof(aas)); -} - - -void allocrest() -{ /* allocate remaining global arrays and variables dynamically */ - long i; - - if (usertree) { - fsteps = (long **)Malloc(maxuser*sizeof(long *)); - for (i = 0; i < maxuser; i++) - fsteps[i] = (long *)Malloc(chars*sizeof(long)); - } - bestrees = (bestelm *)Malloc(maxtrees*sizeof(bestelm)); - for (i = 1; i <= maxtrees; i++) - bestrees[i - 1].btree = (long *)Malloc(spp*sizeof(long)); - nayme = (naym *)Malloc(spp*sizeof(naym)); - enterorder = (long *)Malloc(spp*sizeof(long)); - place = (long *)Malloc(nonodes*sizeof(long)); - weight = (steptr)Malloc(chars*sizeof(long)); - threshwt = (steptr)Malloc(chars*sizeof(long)); - temp = (node *)Malloc(sizeof(node)); - temp->numsteps = (steptr)Malloc(chars*sizeof(long)); - temp->siteset = (seqptr)Malloc(chars*sizeof(sitearray)); - temp->seq = (aas *)Malloc(chars*sizeof(aas)); - temp1 = (node *)Malloc(sizeof(node)); - temp1->numsteps = (steptr)Malloc(chars*sizeof(long)); - temp1->siteset = (seqptr)Malloc(chars*sizeof(sitearray)); - temp1->seq = (aas *)Malloc(chars*sizeof(aas)); -} /* allocrest */ - - -void doinit() -{ - /* initializes variables */ - - inputnumbers(&spp, &chars, &nonodes, 1); - getoptions(); - if (printdata) - fprintf(outfile, "%2ld species, %3ld sites\n\n", spp, chars); - protalloctree(); - allocrest(); -} /* doinit*/ - - -void protinputdata() -{ - /* input the names and sequences for each species */ - long i, j, k, l, aasread, aasnew = 0; - Char charstate; - boolean allread, done; - aas aa; /* temporary amino acid for input */ - - if (printdata) - headings(chars, "Sequences", "---------"); - aasread = 0; - allread = false; - while (!(allread)) { - /* eat white space -- if the separator line has spaces on it*/ - do { - charstate = gettc(infile); - } while (charstate == ' ' || charstate == '\t'); - ungetc(charstate, infile); - if (eoln(infile)) { - scan_eoln(infile); - } - i = 1; - while (i <= spp) { - if ((interleaved && aasread == 0) || !interleaved) - initname(i - 1); - j = interleaved ? aasread : 0; - done = false; - while (!done && !eoff(infile)) { - if (interleaved) - done = true; - while (j < chars && !(eoln(infile) || eoff(infile))) { - charstate = gettc(infile); - if (charstate == '\n' || charstate == '\t') - charstate = ' '; - if (charstate == ' ' || (charstate >= '0' && charstate <= '9')) - continue; - uppercase(&charstate); - if ((!isalpha(charstate) && charstate != '?' && - charstate != '-' && charstate != '*') || charstate == 'J' || - charstate == 'O' || charstate == 'U') { - printf("WARNING -- BAD AMINO ACID:%c",charstate); - printf(" AT POSITION%5ld OF SPECIES %3ld\n",j,i); - exxit(-1); - } - j++; - aa = (charstate == 'A') ? ala : - (charstate == 'B') ? asx : - (charstate == 'C') ? cys : - (charstate == 'D') ? asp : - (charstate == 'E') ? glu : - (charstate == 'F') ? phe : - (charstate == 'G') ? gly : aa; - aa = (charstate == 'H') ? his : - (charstate == 'I') ? ileu : - (charstate == 'K') ? lys : - (charstate == 'L') ? leu : - (charstate == 'M') ? met : - (charstate == 'N') ? asn : - (charstate == 'P') ? pro : - (charstate == 'Q') ? gln : - (charstate == 'R') ? arg : aa; - aa = (charstate == 'S') ? ser : - (charstate == 'T') ? thr : - (charstate == 'V') ? val : - (charstate == 'W') ? trp : - (charstate == 'X') ? unk : - (charstate == 'Y') ? tyr : - (charstate == 'Z') ? glx : - (charstate == '*') ? stop : - (charstate == '?') ? quest: - (charstate == '-') ? del : aa; - - treenode[i - 1]->seq[j - 1] = aa; - memcpy(treenode[i - 1]->siteset[j - 1], - translate[(long)aa - (long)ala], sizeof(sitearray)); - } - if (interleaved) - continue; - if (j < chars) - scan_eoln(infile); - else if (j == chars) - done = true; - } - if (interleaved && i == 1) - aasnew = j; - scan_eoln(infile); - if ((interleaved && j != aasnew) || ((!interleaved) && j != chars)){ - printf("ERROR: SEQUENCES OUT OF ALIGNMENT\n"); - exxit(-1);} - i++; - } - if (interleaved) { - aasread = aasnew; - allread = (aasread == chars); - } else - allread = (i > spp); - } - if (printdata) { - for (i = 1; i <= ((chars - 1) / 60 + 1); i++) { - for (j = 1; j <= (spp); j++) { - for (k = 0; k < nmlngth; k++) - putc(nayme[j - 1][k], outfile); - fprintf(outfile, " "); - l = i * 60; - if (l > chars) - l = chars; - for (k = (i - 1) * 60 + 1; k <= l; k++) { - if (j > 1 && treenode[j - 1]->seq[k - 1] == treenode[0]->seq[k - 1]) - charstate = '.'; - else { - tmpa = treenode[j-1]->seq[k-1]; - charstate = (tmpa == ala) ? 'A' : - (tmpa == asx) ? 'B' : - (tmpa == cys) ? 'C' : - (tmpa == asp) ? 'D' : - (tmpa == glu) ? 'E' : - (tmpa == phe) ? 'F' : - (tmpa == gly) ? 'G' : - (tmpa == his) ? 'H' : - (tmpa ==ileu) ? 'I' : - (tmpa == lys) ? 'K' : - (tmpa == leu) ? 'L' : charstate; - charstate = (tmpa == met) ? 'M' : - (tmpa == asn) ? 'N' : - (tmpa == pro) ? 'P' : - (tmpa == gln) ? 'Q' : - (tmpa == arg) ? 'R' : - (tmpa == ser) ? 'S' : - (tmpa ==ser1) ? 'S' : - (tmpa ==ser2) ? 'S' : charstate; - charstate = (tmpa == thr) ? 'T' : - (tmpa == val) ? 'V' : - (tmpa == trp) ? 'W' : - (tmpa == unk) ? 'X' : - (tmpa == tyr) ? 'Y' : - (tmpa == glx) ? 'Z' : - (tmpa == del) ? '-' : - (tmpa ==stop) ? '*' : - (tmpa==quest) ? '?' : charstate; - } - putc(charstate, outfile); - if (k % 10 == 0 && k % 60 != 0) - putc(' ', outfile); - } - putc('\n', outfile); - } - putc('\n', outfile); - } - putc('\n', outfile); - } - putc('\n', outfile); -} /* protinputdata */ - - -void protmakevalues() -{ - /* set up fractional likelihoods at tips */ - long i, j; - node *p; - - for (i = 1; i <= nonodes; i++) { - treenode[i - 1]->back = NULL; - treenode[i - 1]->tip = (i <= spp); - treenode[i - 1]->index = i; - for (j = 0; j < (chars); j++) - treenode[i - 1]->numsteps[j] = 0; - if (i > spp) { - p = treenode[i - 1]->next; - while (p != treenode[i - 1]) { - p->back = NULL; - p->tip = false; - p->index = i; - for (j = 0; j < (chars); j++) - p->numsteps[j] = 0; - p = p->next; - } - } - } -} /* protmakevalues */ - - -void doinput() -{ - /* reads the input data */ - long i; - - if (justwts) { - if (firstset) - protinputdata(); - for (i = 0; i < chars; i++) - weight[i] = 1; - inputweights(chars, weight, &weights); - if (justwts) { - fprintf(outfile, "\n\nWeights set # %ld:\n\n", ith); - if (progress) - printf("\nWeights set # %ld:\n\n", ith); - } - if (printdata) - printweights(outfile, 0, chars, weight, "Sites"); - } else { - if (!firstset){ - samenumsp(&chars, ith); - reallocchars(); - } - for (i = 0; i < chars; i++) - weight[i] = 1; - if (weights) { - inputweights(chars, weight, &weights); - } - if (weights) - printweights(outfile, 0, chars, weight, "Sites"); - protinputdata(); - } - if(!thresh) - threshold = spp * 3.0; - for(i = 0 ; i < (chars) ; i++){ - weight[i]*=10; - threshwt[i] = (long)(threshold * weight[i] + 0.5); - } - - protmakevalues(); -} /* doinput */ - - -void protfillin(node *p, node *left, node *rt) -{ - /* sets up for each node in the tree the aa set for site m - at that point and counts the changes. The program - spends much of its time in this function */ - boolean counted, done; - aas aa; - long s = 0; - sitearray ls, rs, qs; - long i, j, m, n; - - for (m = 0; m < chars; m++) { - if (left != NULL) - memcpy(ls, left->siteset[m], sizeof(sitearray)); - if (rt != NULL) - memcpy(rs, rt->siteset[m], sizeof(sitearray)); - if (left == NULL) { - n = rt->numsteps[m]; - memcpy(qs, rs, sizeof(sitearray)); - } - else if (rt == NULL) { - n = left->numsteps[m]; - memcpy(qs, ls, sizeof(sitearray)); - } - else { - n = left->numsteps[m] + rt->numsteps[m]; - if ((ls[0] == rs[0]) && (ls[1] == rs[1]) && (ls[2] == rs[2])) { - qs[0] = ls[0]; - qs[1] = ls[1]; - qs[2] = ls[2]; - } - else { - counted = false; - for (i = 0; (!counted) && (i <= 3); i++) { - switch (i) { - - case 0: - s = ls[0] & rs[0]; - break; - - case 1: - s = (ls[0] & rs[1]) | (ls[1] & rs[0]); - break; - - case 2: - s = (ls[0] & rs[2]) | (ls[1] & rs[1]) | (ls[2] & rs[0]); - break; - - case 3: - s = ls[0] | (ls[1] & rs[2]) | (ls[2] & rs[1]) | rs[0]; - break; - - } - if (s != 0) { - qs[0] = s; - counted = true; - } else - n += weight[m]; - } - switch (i) { - case 1: - qs[1] = qs[0] | (ls[0] & rs[1]) | (ls[1] & rs[0]); - qs[2] = qs[1] | (ls[0] & rs[2]) | (ls[1] & rs[1]) | (ls[2] & rs[0]); - break; - case 2: - qs[1] = qs[0] | (ls[0] & rs[2]) | (ls[1] & rs[1]) | (ls[2] & rs[0]); - qs[2] = qs[1] | ls[0] | (ls[1] & rs[2]) | (ls[2] & rs[1]) | rs[0]; - break; - case 3: - qs[1] = qs[0] | ls[0] | (ls[1] & rs[2]) | (ls[2] & rs[1]) | rs[0]; - qs[2] = qs[1] | ls[1] | (ls[2] & rs[2]) | rs[1]; - break; - case 4: - qs[1] = qs[0] | ls[1] | (ls[2] & rs[2]) | rs[1]; - qs[2] = qs[1] | ls[2] | rs[2]; - break; - } - for (aa = ala; (long)aa <= (long)stop; aa = (aas)((long)aa + 1)) { - done = false; - for (i = 0; (!done) && (i <= 1); i++) { - if (((1L << ((long)aa)) & qs[i]) != 0) { - for (j = i+1; j <= 2; j++) - qs[j] |= translate[(long)aa - (long)ala][j-i]; - done = true; - } - } - } - } - } - p->numsteps[m] = n; - memcpy(p->siteset[m], qs, sizeof(sitearray)); - } -} /* protfillin */ - - -void protpreorder(node *p) -{ - /* recompute number of steps in preorder taking both ancestoral and - descendent steps into account */ - if (p != NULL && !p->tip) { - protfillin (p->next, p->next->next->back, p->back); - protfillin (p->next->next, p->back, p->next->back); - protpreorder (p->next->back); - protpreorder (p->next->next->back); - } -} /* protpreorder */ - - -void protadd(node *below, node *newtip, node *newfork) -{ - /* inserts the nodes newfork and its left descendant, newtip, - to the tree. below becomes newfork's right descendant */ - - if (below != treenode[below->index - 1]) - below = treenode[below->index - 1]; - if (below->back != NULL) - below->back->back = newfork; - newfork->back = below->back; - below->back = newfork->next->next; - newfork->next->next->back = below; - newfork->next->back = newtip; - newtip->back = newfork->next; - if (root == below) - root = newfork; - root->back = NULL; - - if (recompute) { - protfillin (newfork, newfork->next->back, newfork->next->next->back); - protpreorder(newfork); - if (newfork != root) - protpreorder(newfork->back); - } -} /* protadd */ - - -void protre_move(node **item, node **fork) -{ - /* removes nodes item and its ancestor, fork, from the tree. - the new descendant of fork's ancestor is made to be - fork's second descendant (other than item). Also - returns pointers to the deleted nodes, item and fork */ - node *p, *q, *other; - - if ((*item)->back == NULL) { - *fork = NULL; - return; - } - *fork = treenode[(*item)->back->index - 1]; - if ((*item) == (*fork)->next->back) - other = (*fork)->next->next->back; - else other = (*fork)->next->back; - if (root == *fork) - root = other; - p = (*item)->back->next->back; - q = (*item)->back->next->next->back; - if (p != NULL) p->back = q; - if (q != NULL) q->back = p; - (*fork)->back = NULL; - p = (*fork)->next; - do { - p->back = NULL; - p = p->next; - } while (p != (*fork)); - (*item)->back = NULL; - if (recompute) { - protpreorder(other); - if (other != root) protpreorder(other->back); - } -} /* protre_move */ - - -void evaluate(node *r) -{ - /* determines the number of steps needed for a tree. this is the - minimum number of steps needed to evolve sequences on this tree */ - long i, steps, term; - double sum; - - sum = 0.0; - for (i = 0; i < (chars); i++) { - steps = r->numsteps[i]; - if (steps <= threshwt[i]) - term = steps; - else - term = threshwt[i]; - sum += term; - if (usertree && which <= maxuser) - fsteps[which - 1][i] = term; - } - if (usertree && which <= maxuser) { - nsteps[which - 1] = sum; - if (which == 1) { - minwhich = 1; - minsteps = sum; - } else if (sum < minsteps) { - minwhich = which; - minsteps = sum; - } - } - like = -sum; -} /* evaluate */ - - -void protpostorder(node *p) -{ - /* traverses a binary tree, calling PROCEDURE fillin at a - node's descendants before calling fillin at the node */ - if (p->tip) - return; - protpostorder(p->next->back); - protpostorder(p->next->next->back); - protfillin(p, p->next->back, p->next->next->back); -} /* protpostorder */ - - -void protreroot(node *outgroup) -{ - /* reorients tree, putting outgroup in desired position. */ - node *p, *q; - - if (outgroup->back->index == root->index) - return; - p = root->next; - q = root->next->next; - p->back->back = q->back; - q->back->back = p->back; - p->back = outgroup; - q->back = outgroup->back; - outgroup->back->back = q; - outgroup->back = p; -} /* protreroot */ - - -void protsavetraverse(node *p, long *pos, boolean *found) -{ - /* sets BOOLEANs that indicate which way is down */ - p->bottom = true; - if (p->tip) - return; - p->next->bottom = false; - protsavetraverse(p->next->back, pos,found); - p->next->next->bottom = false; - protsavetraverse(p->next->next->back, pos,found); -} /* protsavetraverse */ - - -void protsavetree(long *pos, boolean *found) -{ - /* record in place where each species has to be - added to reconstruct this tree */ - long i, j; - node *p; - boolean done; - - protreroot(treenode[outgrno - 1]); - protsavetraverse(root, pos,found); - for (i = 0; i < (nonodes); i++) - place[i] = 0; - place[root->index - 1] = 1; - for (i = 1; i <= (spp); i++) { - p = treenode[i - 1]; - while (place[p->index - 1] == 0) { - place[p->index - 1] = i; - while (!p->bottom) - p = p->next; - p = p->back; - } - if (i > 1) { - place[i - 1] = place[p->index - 1]; - j = place[p->index - 1]; - done = false; - while (!done) { - place[p->index - 1] = spp + i - 1; - while (!p->bottom) - p = p->next; - p = p->back; - done = (p == NULL); - if (!done) - done = (place[p->index - 1] != j); - } - } - } -} /* protsavetree */ - - -void tryadd(node *p, node **item, node **nufork) -{ - /* temporarily adds one fork and one tip to the tree. - if the location where they are added yields greater - "likelihood" than other locations tested up to that - time, then keeps that location as there */ - long pos; - boolean found; - node *rute, *q; - - if (p == root) - protfillin(temp, *item, p); - else { - protfillin(temp1, *item, p); - protfillin(temp, temp1, p->back); - } - evaluate(temp); - if (lastrearr) { - if (like < bestlike) { - if ((*item) == (*nufork)->next->next->back) { - q = (*nufork)->next; - (*nufork)->next = (*nufork)->next->next; - (*nufork)->next->next = q; - q->next = (*nufork); - } - } - else if (like >= bstlike2) { - recompute = false; - protadd(p, (*item), (*nufork)); - rute = root->next->back; - protsavetree(&pos,&found); - protreroot(rute); - if (like > bstlike2) { - bestlike = bstlike2 = like; - pos = 1; - nextree = 1; - addtree(pos, &nextree, dummy, place, bestrees); - } else { - pos = 0; - findtree(&found, &pos, nextree, place, bestrees); - if (!found) { - if (nextree <= maxtrees) - addtree(pos, &nextree, dummy, place, bestrees); - } - } - protre_move (item, nufork); - recompute = true; - } - } - if (like >= bestyet) { - bestyet = like; - there = p; - } -} /* tryadd */ - - -void addpreorder(node *p, node *item, node *nufork) -{ - /* traverses a binary tree, calling PROCEDURE tryadd - at a node before calling tryadd at its descendants */ - - if (p == NULL) - return; - tryadd(p, &item,&nufork); - if (!p->tip) { - addpreorder(p->next->back, item, nufork); - addpreorder(p->next->next->back, item, nufork); - } -} /* addpreorder */ - - -void tryrearr(node *p, boolean *success) -{ - /* evaluates one rearrangement of the tree. - if the new tree has greater "likelihood" than the old - one sets success := TRUE and keeps the new tree. - otherwise, restores the old tree */ - node *frombelow, *whereto, *forknode, *q; - double oldlike; - - if (p->back == NULL) - return; - forknode = treenode[p->back->index - 1]; - if (forknode->back == NULL) - return; - oldlike = bestyet; - if (p->back->next->next == forknode) - frombelow = forknode->next->next->back; - else - frombelow = forknode->next->back; - whereto = treenode[forknode->back->index - 1]; - if (whereto->next->back == forknode) - q = whereto->next->next->back; - else - q = whereto->next->back; - protfillin(temp1, frombelow, q); - protfillin(temp, temp1, p); - protfillin(temp1, temp, whereto->back); - evaluate(temp1); - if (like <= oldlike) { - if (p == forknode->next->next->back) { - q = forknode->next; - forknode->next = forknode->next->next; - forknode->next->next = q; - q->next = forknode; - } - } - else { - recompute = false; - protre_move(&p, &forknode); - protfillin(whereto, whereto->next->back, whereto->next->next->back); - recompute = true; - protadd(whereto, p, forknode); - *success = true; - bestyet = like; - } -} /* tryrearr */ - - -void repreorder(node *p, boolean *success) -{ - /* traverses a binary tree, calling PROCEDURE tryrearr - at a node before calling tryrearr at its descendants */ - if (p == NULL) - return; - tryrearr(p,success); - if (!p->tip) { - repreorder(p->next->back,success); - repreorder(p->next->next->back,success); - } -} /* repreorder */ - - -void rearrange(node **r) -{ - /* traverses the tree (preorder), finding any local - rearrangement which decreases the number of steps. - if traversal succeeds in increasing the tree's - "likelihood", PROCEDURE rearrange runs traversal again */ - boolean success = true; - while (success) { - success = false; - repreorder(*r, &success); - } -} /* rearrange */ - - -void protgetch(Char *c) -{ - /* get next nonblank character */ - do { - if (eoln(intree)) - scan_eoln(intree); - *c = gettc(intree); - if (*c == '\n' || *c == '\t') - *c = ' '; - } while (!(*c != ' ' || eoff(intree))); -} /* protgetch */ - - -void protaddelement(node **p,long *nextnode,long *lparens,boolean *names) -{ - /* recursive procedure adds nodes to user-defined tree */ - node *q; - long i, n; - boolean found; - Char str[nmlngth]; - - protgetch(&ch); - - if (ch == '(' ) { - if ((*lparens) >= spp - 1) { - printf("\nERROR IN USER TREE: TOO MANY LEFT PARENTHESES\n"); - exxit(-1); - } - (*nextnode)++; - (*lparens)++; - q = treenode[(*nextnode) - 1]; - protaddelement(&q->next->back, nextnode,lparens,names); - q->next->back->back = q->next; - findch(',', &ch, which); - protaddelement(&q->next->next->back, nextnode,lparens,names); - q->next->next->back->back = q->next->next; - findch(')', &ch, which); - *p = q; - return; - } - for (i = 0; i < nmlngth; i++) - str[i] = ' '; - n = 1; - do { - if (ch == '_') - ch = ' '; - str[n - 1] = ch; - if (eoln(intree)) - scan_eoln(intree); - ch = gettc(intree); - n++; - } while (ch != ',' && ch != ')' && ch != ':' && n <= nmlngth); - n = 1; - do { - found = true; - for (i = 0; i < nmlngth; i++) - found = (found && ((str[i] == nayme[n - 1][i]) || - ((nayme[n - 1][i] == '_') && (str[i] == ' ')))); - if (found) { - if (names[n - 1] == false) { - *p = treenode[n - 1]; - names[n - 1] = true; - } else { - printf("\nERROR IN USER TREE: DUPLICATE NAME FOUND -- "); - for (i = 0; i < nmlngth; i++) - putchar(nayme[n - 1][i]); - putchar('\n'); - exxit(-1); - } - } else - n++; - } while (!(n > spp || found)); - if (n <= spp) - return; - printf("CANNOT FIND SPECIES: "); - for (i = 0; i < nmlngth; i++) - putchar(str[i]); - putchar('\n'); -} /* protaddelement */ - - -void prottreeread() -{ - /* read in user-defined tree and set it up */ - long nextnode, lparens, i; - - root = treenode[spp]; - nextnode = spp; - root->back = NULL; - names = (boolean *)Malloc(spp*sizeof(boolean)); - for (i = 0; i < (spp); i++) - names[i] = false; - lparens = 0; - protaddelement(&root, &nextnode,&lparens,names); - if (ch == '[') { - do - ch = gettc(intree); - while (ch != ']'); - ch = gettc(intree); - } - findch(';', &ch, which); - scan_eoln(intree); - free(names); -} /* prottreeread */ - - -void protancestset(long *a, long *b, long *c, long *d, long *k) -{ - /* sets up the aa set array. */ - aas aa; - long s, sa, sb; - long i, j, m, n; - boolean counted; - - counted = false; - *k = 0; - for (i = 0; i <= 5; i++) { - if (*k < 3) { - s = 0; - if (i > 3) - n = i - 3; - else - n = 0; - for (j = n; j <= (i - n); j++) { - if (j < 3) - sa = a[j]; - else - sa = fullset; - for (m = n; m <= (i - j - n); m++) { - if (m < 3) - sb = sa & b[m]; - else - sb = sa; - if (i - j - m < 3) - sb &= c[i - j - m]; - s |= sb; - } - } - if (counted || s != 0) { - d[*k] = s; - (*k)++; - counted = true; - } - } - } - for (i = 0; i <= 1; i++) { - for (aa = ala; (long)aa <= (long)stop; aa = (aas)((long)aa + 1)) { - if (((1L << ((long)aa)) & d[i]) != 0) { - for (j = i + 1; j <= 2; j++) - d[j] |= translate[(long)aa - (long)ala][j - i]; - } - } - } -} /* protancestset */ - - -void prothyprint(long b1, long b2, boolean *bottom, node *r, - boolean *nonzero, boolean *maybe) -{ - /* print out states in sites b1 through b2 at node */ - long i; - boolean dot; - Char ch = 0; - aas aa; - - if (*bottom) { - if (!outgropt) - fprintf(outfile, " "); - else - fprintf(outfile, "root "); - } else - fprintf(outfile, "%3ld ", r->back->index - spp); - if (r->tip) { - for (i = 0; i < nmlngth; i++) - putc(nayme[r->index - 1][i], outfile); - } else - fprintf(outfile, "%4ld ", r->index - spp); - if (*bottom) - fprintf(outfile, " "); - else if (*nonzero) - fprintf(outfile, " yes "); - else if (*maybe) - fprintf(outfile, " maybe "); - else - fprintf(outfile, " no "); - for (i = b1 - 1; i < b2; i++) { - aa = r->seq[i]; - switch (aa) { - - case ala: - ch = 'A'; - break; - - case asx: - ch = 'B'; - break; - - case cys: - ch = 'C'; - break; - - case asp: - ch = 'D'; - break; - - case glu: - ch = 'E'; - break; - - case phe: - ch = 'F'; - break; - - case gly: - ch = 'G'; - break; - - case his: - ch = 'H'; - break; - - case ileu: - ch = 'I'; - break; - - case lys: - ch = 'K'; - break; - - case leu: - ch = 'L'; - break; - - case met: - ch = 'M'; - break; - - case asn: - ch = 'N'; - break; - - case pro: - ch = 'P'; - break; - - case gln: - ch = 'Q'; - break; - - case arg: - ch = 'R'; - break; - - case ser: - ch = 'S'; - break; - - case ser1: - ch = 'S'; - break; - - case ser2: - ch = 'S'; - break; - - case thr: - ch = 'T'; - break; - - case trp: - ch = 'W'; - break; - - case tyr: - ch = 'Y'; - break; - - case val: - ch = 'V'; - break; - - case glx: - ch = 'Z'; - break; - - case del: - ch = '-'; - break; - - case stop: - ch = '*'; - break; - - case unk: - ch = 'X'; - break; - - case quest: - ch = '?'; - break; - } - if (!(*bottom) && dotdiff) - dot = (r->siteset[i] [0] == treenode[r->back->index - 1]->siteset[i][0] - || ((r->siteset[i][0] & - (~((1L << ((long)ser1)) | (1L << ((long)ser2)) | - (1L << ((long)ser))))) == 0 && - (treenode[r->back->index - 1]->siteset[i] [0] & - (~((1L << ((long)ser1)) | (1L << ((long)ser2)) | - (1L << ((long)ser))))) == 0)); - else - dot = false; - if (dot) - putc('.', outfile); - else - putc(ch, outfile); - if ((i + 1) % 10 == 0) - putc(' ', outfile); - } - putc('\n', outfile); -} /* prothyprint */ - - -void prothyptrav(node *r, sitearray *hypset, long b1, long b2, long *k, - boolean *bottom, sitearray nothing) -{ - boolean maybe, nonzero; - long i; - aas aa; - long anc = 0, hset; - gseq *ancset, *temparray; - - protgnu(&ancset); - protgnu(&temparray); - maybe = false; - nonzero = false; - for (i = b1 - 1; i < b2; i++) { - if (!r->tip) { - protancestset(hypset[i], r->next->back->siteset[i], - r->next->next->back->siteset[i], temparray->seq[i], k); - memcpy(r->siteset[i], temparray->seq[i], sizeof(sitearray)); - } - if (!(*bottom)) - anc = treenode[r->back->index - 1]->siteset[i][0]; - if (!r->tip) { - hset = r->siteset[i][0]; - r->seq[i] = quest; - for (aa = ala; (long)aa <= (long)stop; aa = (aas)((long)aa + 1)) { - if (hset == 1L << ((long)aa)) - r->seq[i] = aa; - } - if (hset == ((1L << ((long)asn)) | (1L << ((long)asp)))) - r->seq[i] = asx; - if (hset == ((1L << ((long)gln)) | (1L << ((long)gly)))) - r->seq[i] = glx; - if (hset == ((1L << ((long)ser1)) | (1L << ((long)ser2)))) - r->seq[i] = ser; - if (hset == fullset) - r->seq[i] = unk; - } - nonzero = (nonzero || (r->siteset[i][0] & anc) == 0); - maybe = (maybe || r->siteset[i][0] != anc); - } - prothyprint(b1, b2,bottom,r,&nonzero,&maybe); - *bottom = false; - if (!r->tip) { - memcpy(temparray->seq, r->next->back->siteset, chars*sizeof(sitearray)); - for (i = b1 - 1; i < b2; i++) - protancestset(hypset[i], r->next->next->back->siteset[i], nothing, - ancset->seq[i], k); - prothyptrav(r->next->back, ancset->seq, b1, b2,k,bottom,nothing ); - for (i = b1 - 1; i < b2; i++) - protancestset(hypset[i], temparray->seq[i], nothing, ancset->seq[i],k); - prothyptrav(r->next->next->back, ancset->seq, b1, b2, k,bottom,nothing); - } - protchuck(temparray); - protchuck(ancset); -} /* prothyptrav */ - - -void prothypstates(long *k) -{ - /* fill in and describe states at interior nodes */ - boolean bottom; - sitearray nothing; - long i, n; - seqptr hypset; - - fprintf(outfile, "\nFrom To Any Steps? State at upper node\n"); - fprintf(outfile, " "); - fprintf(outfile, "( . means same as in the node below it on tree)\n\n"); - memcpy(nothing, translate[(long)quest - (long)ala], sizeof(sitearray)); - hypset = (seqptr)Malloc(chars*sizeof(sitearray)); - for (i = 0; i < (chars); i++) - memcpy(hypset[i], nothing, sizeof(sitearray)); - bottom = true; - for (i = 1; i <= ((chars - 1) / 40 + 1); i++) { - putc('\n', outfile); - n = i * 40; - if (n > chars) - n = chars; - bottom = true; - prothyptrav(root, hypset, i * 40 - 39, n, k,&bottom,nothing); - } - free(hypset); -} /* prothypstates */ - - -void describe() -{ - /* prints ancestors, steps and table of numbers of steps in - each site */ - long i,j,k; - - if (treeprint) - fprintf(outfile, "\nrequires a total of %10.3f\n", like / -10); - if (stepbox) { - putc('\n', outfile); - if (weights) - fprintf(outfile, "weighted "); - fprintf(outfile, "steps in each position:\n"); - fprintf(outfile, " "); - for (i = 0; i <= 9; i++) - fprintf(outfile, "%4ld", i); - fprintf(outfile, "\n *-----------------------------------------\n"); - for (i = 0; i <= (chars / 10); i++) { - fprintf(outfile, "%5ld", i * 10); - putc('!', outfile); - for (j = 0; j <= 9; j++) { - k = i * 10 + j; - if (k == 0 || k > chars) - fprintf(outfile, " "); - else - fprintf(outfile, "%4ld", root->numsteps[k - 1] / 10); - } - putc('\n', outfile); - } - } - if (ancseq) { - prothypstates(&k); - putc('\n', outfile); - } - putc('\n', outfile); - if (trout) { - col = 0; - treeout(root, nextree, &col, root); - } -} /* describe */ - - -void maketree() -{ - /* constructs a binary tree from the pointers in treenode. - adds each node at location which yields highest "likelihood" - then rearranges the tree for greatest "likelihood" */ - long i, j, numtrees; - double gotlike; - node *item, *nufork, *dummy; - - if (!usertree) { - for (i = 1; i <= (spp); i++) - enterorder[i - 1] = i; - if (jumble) - randumize(seed, enterorder); - root = treenode[enterorder[0] - 1]; - recompute = true; - protadd(treenode[enterorder[0] - 1], treenode[enterorder[1] - 1], - treenode[spp]); - if (progress) { - printf("\nAdding species:\n"); - writename(0, 2, enterorder); - } - lastrearr = false; - for (i = 3; i <= (spp); i++) { - bestyet = -30.0*spp*chars; - there = root; - item = treenode[enterorder[i - 1] - 1]; - nufork = treenode[spp + i - 2]; - addpreorder(root, item, nufork); - protadd(there, item, nufork); - like = bestyet; - rearrange(&root); - if (progress) - writename(i - 1, 1, enterorder); - lastrearr = (i == spp); - if (lastrearr) { - if (progress) { - printf("\nDoing global rearrangements\n"); - printf(" !"); - for (j = 1; j <= nonodes; j++) - if ( j % (( nonodes / 72 ) + 1 ) == 0 ) - putchar('-'); - printf("!\n"); - } - bestlike = bestyet; - if (jumb == 1) { - bstlike2 = bestlike = -30.0*spp*chars; - nextree = 1; - } - do { - if (progress) - printf(" "); - gotlike = bestlike; - for (j = 0; j < (nonodes); j++) { - bestyet = -30.0*spp*chars; - item = treenode[j]; - if (item != root) { - nufork = treenode[treenode[j]->back->index - 1]; - protre_move(&item, &nufork); - there = root; - addpreorder(root, item, nufork); - protadd(there, item, nufork); - } - if (progress) { - if ( j % (( nonodes / 72 ) + 1 ) == 0 ) - putchar('.'); - fflush(stdout); - } - } - if (progress) - putchar('\n'); - } while (bestlike > gotlike); - } - } - if (progress) - putchar('\n'); - for (i = spp - 1; i >= 1; i--) - protre_move(&treenode[i], &dummy); - if (jumb == njumble) { - if (treeprint) { - putc('\n', outfile); - if (nextree == 2) - fprintf(outfile, "One most parsimonious tree found:\n"); - else - fprintf(outfile, "%6ld trees in all found\n", nextree - 1); - } - if (nextree > maxtrees + 1) { - if (treeprint) - fprintf(outfile, "here are the first%4ld of them\n", (long)maxtrees); - nextree = maxtrees + 1; - } - if (treeprint) - putc('\n', outfile); - recompute = false; - for (i = 0; i <= (nextree - 2); i++) { - root = treenode[0]; - protadd(treenode[0], treenode[1], treenode[spp]); - for (j = 3; j <= (spp); j++) - protadd(treenode[bestrees[i].btree[j - 1] - 1], treenode[j - 1], - treenode[spp + j - 2]); - protreroot(treenode[outgrno - 1]); - protpostorder(root); - evaluate(root); - printree(root, 1.0); - describe(); - for (j = 1; j < (spp); j++) - protre_move(&treenode[j], &dummy); - } - } - } else { - openfile(&intree,INTREE,"input tree file", "r",progname,intreename); - numtrees = countsemic(&intree); - if (treeprint) { - fprintf(outfile, "User-defined tree"); - if (numtrees > 1) - putc('s', outfile); - fprintf(outfile, ":\n\n\n\n"); - } - which = 1; - while (which <= numtrees) { - prottreeread(); - if (outgropt) - protreroot(treenode[outgrno - 1]); - protpostorder(root); - evaluate(root); - printree(root, 1.0); - describe(); - which++; - } - printf("\n"); - FClose(intree); - putc('\n', outfile); - if (numtrees > 1 && chars > 1 ) - standev(chars, numtrees, minwhich, minsteps, nsteps, fsteps, seed); - } - if (jumb == njumble && progress) { - printf("Output written to file \"%s\"\n\n", outfilename); - if (trout) - printf("Trees also written onto file \"%s\"\n\n", outtreename); - } -} /* maketree */ - - -int main(int argc, Char *argv[]) -{ /* Protein parsimony by uphill search */ -#ifdef MAC - argc = 1; /* macsetup("Protpars",""); */ - argv[0] = "Protpars"; -#endif - init(argc,argv); - progname = argv[0]; - openfile(&infile,INFILE,"input file", "r",argv[0],infilename); - openfile(&outfile,OUTFILE,"output file", "w",argv[0],outfilename); - - ibmpc = IBMCRT; - ansi = ANSICRT; - garbage = NULL; - mulsets = false; - msets = 1; - firstset = true; - code(); - setup(); - doinit(); - if (weights || justwts) - openfile(&weightfile,WEIGHTFILE,"weights file","r",argv[0],weightfilename); - if (trout) - openfile(&outtree,OUTTREE,"output tree file", "w",argv[0],outtreename); - for (ith = 1; ith <= msets; ith++) { - doinput(); - if (ith == 1) - firstset = false; - if (msets > 1 && !justwts) { - fprintf(outfile, "Data set # %ld:\n\n",ith); - if (progress) - printf("Data set # %ld:\n\n",ith); - } - for (jumb = 1; jumb <= njumble; jumb++) - maketree(); - } - FClose(infile); - FClose(outfile); - FClose(outtree); -#ifdef MAC - fixmacfile(outfilename); - fixmacfile(outtreename); -#endif - return 0; -} /* Protein parsimony by uphill search */ diff --git a/forester/archive/RIO/others/phylip_mod/src/seq.c b/forester/archive/RIO/others/phylip_mod/src/seq.c deleted file mode 100644 index ab0c7d9..0000000 --- a/forester/archive/RIO/others/phylip_mod/src/seq.c +++ /dev/null @@ -1,4178 +0,0 @@ - -#include "phylip.h" -#include "seq.h" - -/* version 3.6. (c) Copyright 1993-2004 by the University of Washington. - Written by Joseph Felsenstein, Akiko Fuseki, Sean Lamont, and Andrew Keeffe. - Permission is granted to copy and use this program provided no fee is - charged for it and provided that this copyright notice is not removed. */ - -long nonodes, endsite, outgrno, nextree, which; -boolean interleaved, printdata, outgropt, treeprint, dotdiff, transvp; -steptr weight, category, alias, location, ally; -sequence y; - - -void fix_x(node* p,long site, double maxx, long rcategs) -{ /* dnaml dnamlk */ - long i,j; - p->underflows[site] += log(maxx); - - for ( i = 0 ; i < rcategs ; i++ ) { - for ( j = 0 ; j < ((long)T - (long)A + 1) ; j++) - p->x[site][i][j] /= maxx; - } -} /* fix_x */ - - -void fix_protx(node* p,long site, double maxx, long rcategs) -{ /* proml promlk */ - long i,m; - - p->underflows[site] += log(maxx); - - for ( i = 0 ; i < rcategs ; i++ ) - for (m = 0; m <= 19; m++) - p->protx[site][i][m] /= maxx; -} /* fix_protx */ - - -void alloctemp(node **temp, long *zeros, long endsite) -{ - /*used in dnacomp and dnapenny */ - *temp = (node *)Malloc(sizeof(node)); - (*temp)->numsteps = (steptr)Malloc(endsite*sizeof(long)); - (*temp)->base = (baseptr)Malloc(endsite*sizeof(long)); - (*temp)->numnuc = (nucarray *)Malloc(endsite*sizeof(nucarray)); - memcpy((*temp)->base, zeros, endsite*sizeof(long)); - memcpy((*temp)->numsteps, zeros, endsite*sizeof(long)); - zeronumnuc(*temp, endsite); -} /* alloctemp */ - - -void freetemp(node **temp) -{ - /* used in dnacomp, dnapars, & dnapenny */ - free((*temp)->numsteps); - free((*temp)->base); - free((*temp)->numnuc); - free(*temp); -} /* freetemp */ - - -void freetree2 (pointarray treenode, long nonodes) -{ - /* The natural complement to alloctree2. Free all elements of all - the rings (normally triads) in treenode */ - long i; - node *p, *q; - - /* The first spp elements are just nodes, not rings */ - for (i = 0; i < spp; i++) - free (treenode[i]); - - /* The rest are rings */ - for (i = spp; i < nonodes; i++) { - p = treenode[i]->next; - while (p != treenode[i]) { - q = p->next; - free (p); - p = q; - } - /* p should now point to treenode[i], which has yet to be freed */ - free (p); - } - free (treenode); -} /* freetree2 */ - - -void inputdata(long chars) -{ - /* input the names and sequences for each species */ - /* used by dnacomp, dnadist, dnainvar, dnaml, dnamlk, dnapars, & dnapenny */ - long i, j, k, l, basesread, basesnew=0; - Char charstate; - boolean allread, done; - - if (printdata) - headings(chars, "Sequences", "---------"); - basesread = 0; - allread = false; - while (!(allread)) { - /* eat white space -- if the separator line has spaces on it*/ - do { - charstate = gettc(infile); - } while (charstate == ' ' || charstate == '\t'); - ungetc(charstate, infile); - if (eoln(infile)) - scan_eoln(infile); - i = 1; - while (i <= spp) { - if ((interleaved && basesread == 0) || !interleaved) - initname(i-1); - j = (interleaved) ? basesread : 0; - done = false; - while (!done && !eoff(infile)) { - if (interleaved) - done = true; - while (j < chars && !(eoln(infile) || eoff(infile))) { - charstate = gettc(infile); - if (charstate == '\n' || charstate == '\t') - charstate = ' '; - if (charstate == ' ' || (charstate >= '0' && charstate <= '9')) - continue; - uppercase(&charstate); - if ((strchr("ABCDGHKMNRSTUVWXY?O-",charstate)) == NULL){ - printf("ERROR: bad base: %c at site %5ld of species %3ld\n", - charstate, j+1, i); - if (charstate == '.') { - printf(" Periods (.) may not be used as gap characters.\n"); - printf(" The correct gap character is (-)\n"); - } - exxit(-1); - } - j++; - y[i - 1][j - 1] = charstate; - } - if (interleaved) - continue; - if (j < chars) - scan_eoln(infile); - else if (j == chars) - done = true; - } - if (interleaved && i == 1) - basesnew = j; - - scan_eoln(infile); - - if ((interleaved && j != basesnew) || - (!interleaved && j != chars)) { - printf("\nERROR: sequences out of alignment at position %ld", j+1); - printf(" of species %ld\n\n", i); - exxit(-1); - } - i++; - } - - if (interleaved) { - basesread = basesnew; - allread = (basesread == chars); - } else - allread = (i > spp); - } - if (!printdata) - return; - for (i = 1; i <= ((chars - 1) / 60 + 1); i++) { - for (j = 1; j <= spp; j++) { - for (k = 0; k < nmlngth; k++) - putc(nayme[j - 1][k], outfile); - fprintf(outfile, " "); - l = i * 60; - if (l > chars) - l = chars; - for (k = (i - 1) * 60 + 1; k <= l; k++) { - if (dotdiff && (j > 1 && y[j - 1][k - 1] == y[0][k - 1])) - charstate = '.'; - else - charstate = y[j - 1][k - 1]; - putc(charstate, outfile); - if (k % 10 == 0 && k % 60 != 0) - putc(' ', outfile); - } - putc('\n', outfile); - } - putc('\n', outfile); - } - putc('\n', outfile); -} /* inputdata */ - - -void alloctree(pointarray *treenode, long nonodes, boolean usertree) -{ - /* allocate treenode dynamically */ - /* used in dnapars, dnacomp, dnapenny & dnamove */ - long i, j; - node *p, *q; - - *treenode = (pointarray)Malloc(nonodes*sizeof(node *)); - for (i = 0; i < spp; i++) { - (*treenode)[i] = (node *)Malloc(sizeof(node)); - (*treenode)[i]->tip = true; - (*treenode)[i]->index = i+1; - (*treenode)[i]->iter = true; - (*treenode)[i]->branchnum = 0; - (*treenode)[i]->initialized = true; - } - if (!usertree) - for (i = spp; i < nonodes; i++) { - q = NULL; - for (j = 1; j <= 3; j++) { - p = (node *)Malloc(sizeof(node)); - p->tip = false; - p->index = i+1; - p->iter = true; - p->branchnum = 0; - p->initialized = false; - p->next = q; - q = p; - } - p->next->next->next = p; - (*treenode)[i] = p; - } -} /* alloctree */ - - -void allocx(long nonodes, long rcategs, pointarray treenode, boolean usertree) -{ - /* allocate x dynamically */ - /* used in dnaml & dnamlk */ - long i, j, k; - node *p; - - for (i = 0; i < spp; i++){ - treenode[i]->x = (phenotype)Malloc(endsite*sizeof(ratelike)); - treenode[i]->underflows = Malloc(endsite * sizeof (double)); - for (j = 0; j < endsite; j++) - treenode[i]->x[j] = (ratelike)Malloc(rcategs*sizeof(sitelike)); - } - if (!usertree) { - for (i = spp; i < nonodes; i++) { - p = treenode[i]; - for (j = 1; j <= 3; j++) { - p->underflows = Malloc (endsite * sizeof (double)); - p->x = (phenotype)Malloc(endsite*sizeof(ratelike)); - for (k = 0; k < endsite; k++) - p->x[k] = (ratelike)Malloc(rcategs*sizeof(sitelike)); - p = p->next; - } - } - } -} /* allocx */ - - -void prot_allocx(long nonodes, long rcategs, pointarray treenode, - boolean usertree) -{ - /* allocate x dynamically */ - /* used in proml */ - long i, j, k; - node *p; - - for (i = 0; i < spp; i++){ - treenode[i]->protx = (pphenotype)Malloc(endsite*sizeof(pratelike)); - treenode[i]->underflows = Malloc(endsite*sizeof(double)); - for (j = 0; j < endsite; j++) - treenode[i]->protx[j] = (pratelike)Malloc(rcategs*sizeof(psitelike)); - } - if (!usertree) { - for (i = spp; i < nonodes; i++) { - p = treenode[i]; - for (j = 1; j <= 3; j++) { - p->protx = (pphenotype)Malloc(endsite*sizeof(pratelike)); - p->underflows = Malloc(endsite*sizeof(double)); - for (k = 0; k < endsite; k++) - p->protx[k] = (pratelike)Malloc(rcategs*sizeof(psitelike)); - p = p->next; - } - } - } -} /* prot_allocx */ - - -void allocx2(long nonodes, long endsite, long sitelength, pointarray treenode, - boolean usertree) -{ - /* allocate x2 dynamically */ - /* used in restml */ - long i, j, k, l; - node *p; - - for (i = 0; i < spp; i++) { - treenode[i]->x2 = (phenotype2)Malloc(endsite*sizeof(sitelike2)); - for ( j = 0 ; j < endsite ; j++ ) - treenode[i]->x2[j] = Malloc((sitelength + 1) * sizeof(double)); - } - if (!usertree) { - for (i = spp; i < nonodes; i++) { - p = treenode[i]; - for (j = 1; j <= 3; j++) { - p->x2 = (phenotype2)Malloc(endsite*sizeof(sitelike2)); - for (k = 0; k < endsite; k++) { - p->x2[k] = Malloc((sitelength + 1) * sizeof(double)); - for (l = 0; l < sitelength; l++) - p->x2[k][l] = 1.0; - } - p = p->next; - } - } - } -} /* allocx2 */ - - -void setuptree(pointarray treenode, long nonodes, boolean usertree) -{ - /* initialize treenodes */ - long i; - node *p; - - for (i = 1; i <= nonodes; i++) { - if (i <= spp || !usertree) { - treenode[i-1]->back = NULL; - treenode[i-1]->tip = (i <= spp); - treenode[i-1]->index = i; - treenode[i-1]->numdesc = 0; - treenode[i-1]->iter = true; - treenode[i-1]->initialized = true; - treenode[i-1]->tyme = 0.0; - } - } - if (!usertree) { - for (i = spp + 1; i <= nonodes; i++) { - p = treenode[i-1]->next; - while (p != treenode[i-1]) { - p->back = NULL; - p->tip = false; - p->index = i; - p->numdesc = 0; - p->iter = true; - p->initialized = false; - p->tyme = 0.0; - p = p->next; - } - } - } -} /* setuptree */ - - -void setuptree2(tree a) -{ - /* initialize a tree */ - /* used in dnaml, dnamlk, & restml */ - - a.likelihood = -999999.0; - a.start = a.nodep[0]->back; - a.root = NULL; -} /* setuptree2 */ - - -void alloctip(node *p, long *zeros) -{ /* allocate a tip node */ - /* used by dnacomp, dnapars, & dnapenny */ - - p->numsteps = (steptr)Malloc(endsite*sizeof(long)); - p->oldnumsteps = (steptr)Malloc(endsite*sizeof(long)); - p->base = (baseptr)Malloc(endsite*sizeof(long)); - p->oldbase = (baseptr)Malloc(endsite*sizeof(long)); - memcpy(p->base, zeros, endsite*sizeof(long)); - memcpy(p->numsteps, zeros, endsite*sizeof(long)); - memcpy(p->oldbase, zeros, endsite*sizeof(long)); - memcpy(p->oldnumsteps, zeros, endsite*sizeof(long)); -} /* alloctip */ - - -void freetrans(transptr *trans, long nonodes,long sitelength) -{ - long i ,j; - for ( i = 0 ; i < nonodes ; i++ ) { - for ( j = 0 ; j < sitelength + 1; j++) { - free ((*trans)[i][j]); - } - free ((*trans)[i]); - } - free(*trans); -} - - -void getbasefreqs(double freqa, double freqc, double freqg, double freqt, - double *freqr, double *freqy, double *freqar, double *freqcy, - double *freqgr, double *freqty, double *ttratio, double *xi, - double *xv, double *fracchange, boolean freqsfrom, - boolean printdata) -{ - /* used by dnadist, dnaml, & dnamlk */ - double aa, bb; - - if (printdata) { - putc('\n', outfile); - if (freqsfrom) - fprintf(outfile, "Empirical "); - fprintf(outfile, "Base Frequencies:\n\n"); - fprintf(outfile, " A %10.5f\n", freqa); - fprintf(outfile, " C %10.5f\n", freqc); - fprintf(outfile, " G %10.5f\n", freqg); - fprintf(outfile, " T(U) %10.5f\n", freqt); - } - *freqr = freqa + freqg; - *freqy = freqc + freqt; - *freqar = freqa / *freqr; - *freqcy = freqc / *freqy; - *freqgr = freqg / *freqr; - *freqty = freqt / *freqy; - aa = *ttratio * (*freqr) * (*freqy) - freqa * freqg - freqc * freqt; - bb = freqa * (*freqgr) + freqc * (*freqty); - *xi = aa / (aa + bb); - *xv = 1.0 - *xi; - if (*xi < 0.0) { - printf("\n WARNING: This transition/transversion ratio\n"); - printf(" is impossible with these base frequencies!\n"); - *xi = 0.0; - *xv = 1.0; - (*ttratio) = (freqa*freqg+freqc*freqt)/((*freqr)*(*freqy)); - printf(" Transition/transversion parameter reset\n"); - printf(" so transition/transversion ratio is %10.6f\n\n", (*ttratio)); - } - if (freqa <= 0.0) - freqa = 0.000001; - if (freqc <= 0.0) - freqc = 0.000001; - if (freqg <= 0.0) - freqg = 0.000001; - if (freqt <= 0.0) - freqt = 0.000001; - *fracchange = (*xi) * (2 * freqa * (*freqgr) + 2 * freqc * (*freqty)) + - (*xv) * (1.0 - freqa * freqa - freqc * freqc - freqg * freqg - - freqt * freqt); -} /* getbasefreqs */ - - -void empiricalfreqs(double *freqa, double *freqc, double *freqg, - double *freqt, steptr weight, pointarray treenode) -{ - /* Get empirical base frequencies from the data */ - /* used in dnaml & dnamlk */ - long i, j, k; - double sum, suma, sumc, sumg, sumt, w; - - *freqa = 0.25; - *freqc = 0.25; - *freqg = 0.25; - *freqt = 0.25; - for (k = 1; k <= 8; k++) { - suma = 0.0; - sumc = 0.0; - sumg = 0.0; - sumt = 0.0; - for (i = 0; i < spp; i++) { - for (j = 0; j < endsite; j++) { - w = weight[j]; - sum = (*freqa) * treenode[i]->x[j][0][0]; - sum += (*freqc) * treenode[i]->x[j][0][(long)C - (long)A]; - sum += (*freqg) * treenode[i]->x[j][0][(long)G - (long)A]; - sum += (*freqt) * treenode[i]->x[j][0][(long)T - (long)A]; - suma += w * (*freqa) * treenode[i]->x[j][0][0] / sum; - sumc += w * (*freqc) * treenode[i]->x[j][0][(long)C - (long)A] / sum; - sumg += w * (*freqg) * treenode[i]->x[j][0][(long)G - (long)A] / sum; - sumt += w * (*freqt) * treenode[i]->x[j][0][(long)T - (long)A] / sum; - } - } - sum = suma + sumc + sumg + sumt; - *freqa = suma / sum; - *freqc = sumc / sum; - *freqg = sumg / sum; - *freqt = sumt / sum; - } - if (*freqa <= 0.0) - *freqa = 0.000001; - if (*freqc <= 0.0) - *freqc = 0.000001; - if (*freqg <= 0.0) - *freqg = 0.000001; - if (*freqt <= 0.0) - *freqt = 0.000001; -} /* empiricalfreqs */ - - -void sitesort(long chars, steptr weight) -{ - /* Shell sort keeping sites, weights in same order */ - /* used in dnainvar, dnapars, dnacomp & dnapenny */ - long gap, i, j, jj, jg, k, itemp; - boolean flip, tied; - - gap = chars / 2; - while (gap > 0) { - for (i = gap + 1; i <= chars; i++) { - j = i - gap; - flip = true; - while (j > 0 && flip) { - jj = alias[j - 1]; - jg = alias[j + gap - 1]; - tied = true; - k = 1; - while (k <= spp && tied) { - flip = (y[k - 1][jj - 1] > y[k - 1][jg - 1]); - tied = (tied && y[k - 1][jj - 1] == y[k - 1][jg - 1]); - k++; - } - if (!flip) - break; - itemp = alias[j - 1]; - alias[j - 1] = alias[j + gap - 1]; - alias[j + gap - 1] = itemp; - itemp = weight[j - 1]; - weight[j - 1] = weight[j + gap - 1]; - weight[j + gap - 1] = itemp; - j -= gap; - } - } - gap /= 2; - } -} /* sitesort */ - - -void sitecombine(long chars) -{ - /* combine sites that have identical patterns */ - /* used in dnapars, dnapenny, & dnacomp */ - long i, j, k; - boolean tied; - - i = 1; - while (i < chars) { - j = i + 1; - tied = true; - while (j <= chars && tied) { - k = 1; - while (k <= spp && tied) { - tied = (tied && - y[k - 1][alias[i - 1] - 1] == y[k - 1][alias[j - 1] - 1]); - k++; - } - if (tied) { - weight[i - 1] += weight[j - 1]; - weight[j - 1] = 0; - ally[alias[j - 1] - 1] = alias[i - 1]; - } - j++; - } - i = j - 1; - } -} /* sitecombine */ - - -void sitescrunch(long chars) -{ - /* move so one representative of each pattern of - sites comes first */ - /* used in dnapars & dnacomp */ - long i, j, itemp; - boolean done, found; - - done = false; - i = 1; - j = 2; - while (!done) { - if (ally[alias[i - 1] - 1] != alias[i - 1]) { - if (j <= i) - j = i + 1; - if (j <= chars) { - do { - found = (ally[alias[j - 1] - 1] == alias[j - 1]); - j++; - } while (!(found || j > chars)); - if (found) { - j--; - itemp = alias[i - 1]; - alias[i - 1] = alias[j - 1]; - alias[j - 1] = itemp; - itemp = weight[i - 1]; - weight[i - 1] = weight[j - 1]; - weight[j - 1] = itemp; - } else - done = true; - } else - done = true; - } - i++; - done = (done || i >= chars); - } -} /* sitescrunch */ - - -void sitesort2(long sites, steptr aliasweight) -{ - /* Shell sort keeping sites, weights in same order */ - /* used in dnaml & dnamnlk */ - long gap, i, j, jj, jg, k, itemp; - boolean flip, tied, samewt; - - gap = sites / 2; - while (gap > 0) { - for (i = gap + 1; i <= sites; i++) { - j = i - gap; - flip = true; - while (j > 0 && flip) { - jj = alias[j - 1]; - jg = alias[j + gap - 1]; - samewt = ((weight[jj - 1] != 0) && (weight[jg - 1] != 0)) - || ((weight[jj - 1] == 0) && (weight[jg - 1] == 0)); - tied = samewt && (category[jj - 1] == category[jg - 1]); - flip = ((!samewt) && (weight[jj - 1] == 0)) - || (samewt && (category[jj - 1] > category[jg - 1])); - k = 1; - while (k <= spp && tied) { - flip = (y[k - 1][jj - 1] > y[k - 1][jg - 1]); - tied = (tied && y[k - 1][jj - 1] == y[k - 1][jg - 1]); - k++; - } - if (!flip) - break; - itemp = alias[j - 1]; - alias[j - 1] = alias[j + gap - 1]; - alias[j + gap - 1] = itemp; - itemp = aliasweight[j - 1]; - aliasweight[j - 1] = aliasweight[j + gap - 1]; - aliasweight[j + gap - 1] = itemp; - j -= gap; - } - } - gap /= 2; - } -} /* sitesort2 */ - - -void sitecombine2(long sites, steptr aliasweight) -{ - /* combine sites that have identical patterns */ - /* used in dnaml & dnamlk */ - long i, j, k; - boolean tied, samewt; - - i = 1; - while (i < sites) { - j = i + 1; - tied = true; - while (j <= sites && tied) { - samewt = ((aliasweight[i - 1] != 0) && (aliasweight[j - 1] != 0)) - || ((aliasweight[i - 1] == 0) && (aliasweight[j - 1] == 0)); - tied = samewt - && (category[alias[i - 1] - 1] == category[alias[j - 1] - 1]); - k = 1; - while (k <= spp && tied) { - tied = (tied && - y[k - 1][alias[i - 1] - 1] == y[k - 1][alias[j - 1] - 1]); - k++; - } - if (!tied) - break; - aliasweight[i - 1] += aliasweight[j - 1]; - aliasweight[j - 1] = 0; - ally[alias[j - 1] - 1] = alias[i - 1]; - j++; - } - i = j; - } -} /* sitecombine2 */ - - -void sitescrunch2(long sites, long i, long j, steptr aliasweight) -{ - /* move so positively weighted sites come first */ - /* used by dnainvar, dnaml, dnamlk, & restml */ - long itemp; - boolean done, found; - - done = false; - while (!done) { - if (aliasweight[i - 1] > 0) - i++; - else { - if (j <= i) - j = i + 1; - if (j <= sites) { - do { - found = (aliasweight[j - 1] > 0); - j++; - } while (!(found || j > sites)); - if (found) { - j--; - itemp = alias[i - 1]; - alias[i - 1] = alias[j - 1]; - alias[j - 1] = itemp; - itemp = aliasweight[i - 1]; - aliasweight[i - 1] = aliasweight[j - 1]; - aliasweight[j - 1] = itemp; - } else - done = true; - } else - done = true; - } - done = (done || i >= sites); - } -} /* sitescrunch2 */ - - -void makevalues(pointarray treenode, long *zeros, boolean usertree) -{ - /* set up fractional likelihoods at tips */ - /* used by dnacomp, dnapars, & dnapenny */ - long i, j; - char ns = 0; - node *p; - - setuptree(treenode, nonodes, usertree); - for (i = 0; i < spp; i++) - alloctip(treenode[i], zeros); - if (!usertree) { - for (i = spp; i < nonodes; i++) { - p = treenode[i]; - do { - allocnontip(p, zeros, endsite); - p = p->next; - } while (p != treenode[i]); - } - } - for (j = 0; j < endsite; j++) { - for (i = 0; i < spp; i++) { - switch (y[i][alias[j] - 1]) { - - case 'A': - ns = 1 << A; - break; - - case 'C': - ns = 1 << C; - break; - - case 'G': - ns = 1 << G; - break; - - case 'U': - ns = 1 << T; - break; - - case 'T': - ns = 1 << T; - break; - - case 'M': - ns = (1 << A) | (1 << C); - break; - - case 'R': - ns = (1 << A) | (1 << G); - break; - - case 'W': - ns = (1 << A) | (1 << T); - break; - - case 'S': - ns = (1 << C) | (1 << G); - break; - - case 'Y': - ns = (1 << C) | (1 << T); - break; - - case 'K': - ns = (1 << G) | (1 << T); - break; - - case 'B': - ns = (1 << C) | (1 << G) | (1 << T); - break; - - case 'D': - ns = (1 << A) | (1 << G) | (1 << T); - break; - - case 'H': - ns = (1 << A) | (1 << C) | (1 << T); - break; - - case 'V': - ns = (1 << A) | (1 << C) | (1 << G); - break; - - case 'N': - ns = (1 << A) | (1 << C) | (1 << G) | (1 << T); - break; - - case 'X': - ns = (1 << A) | (1 << C) | (1 << G) | (1 << T); - break; - - case '?': - ns = (1 << A) | (1 << C) | (1 << G) | (1 << T) | (1 << O); - break; - - case 'O': - ns = 1 << O; - break; - - case '-': - ns = 1 << O; - break; - } - treenode[i]->base[j] = ns; - treenode[i]->numsteps[j] = 0; - } - } -} /* makevalues */ - - -void makevalues2(long categs, pointarray treenode, long endsite, - long spp, sequence y, steptr alias) -{ - /* set up fractional likelihoods at tips */ - /* used by dnaml & dnamlk */ - long i, j, k, l; - bases b; - - for (k = 0; k < endsite; k++) { - j = alias[k]; - for (i = 0; i < spp; i++) { - for (l = 0; l < categs; l++) { - for (b = A; (long)b <= (long)T; b = (bases)((long)b + 1)) - treenode[i]->x[k][l][(long)b - (long)A] = 0.0; - switch (y[i][j - 1]) { - - case 'A': - treenode[i]->x[k][l][0] = 1.0; - break; - - case 'C': - treenode[i]->x[k][l][(long)C - (long)A] = 1.0; - break; - - case 'G': - treenode[i]->x[k][l][(long)G - (long)A] = 1.0; - break; - - case 'T': - treenode[i]->x[k][l][(long)T - (long)A] = 1.0; - break; - - case 'U': - treenode[i]->x[k][l][(long)T - (long)A] = 1.0; - break; - - case 'M': - treenode[i]->x[k][l][0] = 1.0; - treenode[i]->x[k][l][(long)C - (long)A] = 1.0; - break; - - case 'R': - treenode[i]->x[k][l][0] = 1.0; - treenode[i]->x[k][l][(long)G - (long)A] = 1.0; - break; - - case 'W': - treenode[i]->x[k][l][0] = 1.0; - treenode[i]->x[k][l][(long)T - (long)A] = 1.0; - break; - - case 'S': - treenode[i]->x[k][l][(long)C - (long)A] = 1.0; - treenode[i]->x[k][l][(long)G - (long)A] = 1.0; - break; - - case 'Y': - treenode[i]->x[k][l][(long)C - (long)A] = 1.0; - treenode[i]->x[k][l][(long)T - (long)A] = 1.0; - break; - - case 'K': - treenode[i]->x[k][l][(long)G - (long)A] = 1.0; - treenode[i]->x[k][l][(long)T - (long)A] = 1.0; - break; - - case 'B': - treenode[i]->x[k][l][(long)C - (long)A] = 1.0; - treenode[i]->x[k][l][(long)G - (long)A] = 1.0; - treenode[i]->x[k][l][(long)T - (long)A] = 1.0; - break; - - case 'D': - treenode[i]->x[k][l][0] = 1.0; - treenode[i]->x[k][l][(long)G - (long)A] = 1.0; - treenode[i]->x[k][l][(long)T - (long)A] = 1.0; - break; - - case 'H': - treenode[i]->x[k][l][0] = 1.0; - treenode[i]->x[k][l][(long)C - (long)A] = 1.0; - treenode[i]->x[k][l][(long)T - (long)A] = 1.0; - break; - - case 'V': - treenode[i]->x[k][l][0] = 1.0; - treenode[i]->x[k][l][(long)C - (long)A] = 1.0; - treenode[i]->x[k][l][(long)G - (long)A] = 1.0; - break; - - case 'N': - for (b = A; (long)b <= (long)T; b = (bases)((long)b + 1)) - treenode[i]->x[k][l][(long)b - (long)A] = 1.0; - break; - - case 'X': - for (b = A; (long)b <= (long)T; b = (bases)((long)b + 1)) - treenode[i]->x[k][l][(long)b - (long)A] = 1.0; - break; - - case '?': - for (b = A; (long)b <= (long)T; b = (bases)((long)b + 1)) - treenode[i]->x[k][l][(long)b - (long)A] = 1.0; - break; - - case 'O': - for (b = A; (long)b <= (long)T; b = (bases)((long)b + 1)) - treenode[i]->x[k][l][(long)b - (long)A] = 1.0; - break; - - case '-': - for (b = A; (long)b <= (long)T; b = (bases)((long)b + 1)) - treenode[i]->x[k][l][(long)b - (long)A] = 1.0; - break; - } - } - } - } -} /* makevalues2 */ - - -void fillin(node *p, node *left, node *rt) -{ - /* sets up for each node in the tree the base sequence - at that point and counts the changes. */ - long i, j, k, n, purset, pyrset; - node *q; - - purset = (1 << (long)A) + (1 << (long)G); - pyrset = (1 << (long)C) + (1 << (long)T); - if (!left) { - memcpy(p->base, rt->base, endsite*sizeof(long)); - memcpy(p->numsteps, rt->numsteps, endsite*sizeof(long)); - q = rt; - } else if (!rt) { - memcpy(p->base, left->base, endsite*sizeof(long)); - memcpy(p->numsteps, left->numsteps, endsite*sizeof(long)); - q = left; - } else { - for (i = 0; i < endsite; i++) { - p->base[i] = left->base[i] & rt->base[i]; - p->numsteps[i] = left->numsteps[i] + rt->numsteps[i]; - if (p->base[i] == 0) { - p->base[i] = left->base[i] | rt->base[i]; - if (transvp) { - if (!((p->base[i] == purset) || (p->base[i] == pyrset))) - p->numsteps[i] += weight[i]; - } - else p->numsteps[i] += weight[i]; - } - } - q = rt; - } - if (left && rt) n = 2; - else n = 1; - for (i = 0; i < endsite; i++) - for (j = (long)A; j <= (long)O; j++) - p->numnuc[i][j] = 0; - for (k = 1; k <= n; k++) { - if (k == 2) q = left; - for (i = 0; i < endsite; i++) { - for (j = (long)A; j <= (long)O; j++) { - if (q->base[i] & (1 << j)) - p->numnuc[i][j]++; - } - } - } -} /* fillin */ - - -long getlargest(long *numnuc) -{ - /* find the largest in array numnuc */ - long i, largest; - - largest = 0; - for (i = (long)A; i <= (long)O; i++) - if (numnuc[i] > largest) - largest = numnuc[i]; - return largest; -} /* getlargest */ - - -void multifillin(node *p, node *q, long dnumdesc) -{ - /* sets up for each node in the tree the base sequence - at that point and counts the changes according to the - changes in q's base */ - long i, j, b, largest, descsteps, purset, pyrset; - - memcpy(p->oldbase, p->base, endsite*sizeof(long)); - memcpy(p->oldnumsteps, p->numsteps, endsite*sizeof(long)); - purset = (1 << (long)A) + (1 << (long)G); - pyrset = (1 << (long)C) + (1 << (long)T); - for (i = 0; i < endsite; i++) { - descsteps = 0; - for (j = (long)A; j <= (long)O; j++) { - b = 1 << j; - if ((descsteps == 0) && (p->base[i] & b)) - descsteps = p->numsteps[i] - - (p->numdesc - dnumdesc - p->numnuc[i][j]) * weight[i]; - } - if (dnumdesc == -1) - descsteps -= q->oldnumsteps[i]; - else if (dnumdesc == 0) - descsteps += (q->numsteps[i] - q->oldnumsteps[i]); - else - descsteps += q->numsteps[i]; - if (q->oldbase[i] != q->base[i]) { - for (j = (long)A; j <= (long)O; j++) { - b = 1 << j; - if (transvp) { - if (b & purset) b = purset; - if (b & pyrset) b = pyrset; - } - if ((q->oldbase[i] & b) && !(q->base[i] & b)) - p->numnuc[i][j]--; - else if (!(q->oldbase[i] & b) && (q->base[i] & b)) - p->numnuc[i][j]++; - } - } - largest = getlargest(p->numnuc[i]); - if (q->oldbase[i] != q->base[i]) { - p->base[i] = 0; - for (j = (long)A; j <= (long)O; j++) { - if (p->numnuc[i][j] == largest) - p->base[i] |= (1 << j); - } - } - p->numsteps[i] = (p->numdesc - largest) * weight[i] + descsteps; - } -} /* multifillin */ - - -void sumnsteps(node *p, node *left, node *rt, long a, long b) -{ - /* sets up for each node in the tree the base sequence - at that point and counts the changes. */ - long i; - long ns, rs, ls, purset, pyrset; - - if (!left) { - memcpy(p->numsteps, rt->numsteps, endsite*sizeof(long)); - memcpy(p->base, rt->base, endsite*sizeof(long)); - } else if (!rt) { - memcpy(p->numsteps, left->numsteps, endsite*sizeof(long)); - memcpy(p->base, left->base, endsite*sizeof(long)); - } else { - purset = (1 << (long)A) + (1 << (long)G); - pyrset = (1 << (long)C) + (1 << (long)T); - for (i = a; i < b; i++) { - ls = left->base[i]; - rs = rt->base[i]; - ns = ls & rs; - p->numsteps[i] = left->numsteps[i] + rt->numsteps[i]; - if (ns == 0) { - ns = ls | rs; - if (transvp) { - if (!((ns == purset) || (ns == pyrset))) - p->numsteps[i] += weight[i]; - } - else p->numsteps[i] += weight[i]; - } - p->base[i] = ns; - } - } -} /* sumnsteps */ - - -void sumnsteps2(node *p,node *left,node *rt,long a,long b,long *threshwt) -{ - /* counts the changes at each node. */ - long i, steps; - long ns, rs, ls, purset, pyrset; - long term; - - if (a == 0) p->sumsteps = 0.0; - if (!left) - memcpy(p->numsteps, rt->numsteps, endsite*sizeof(long)); - else if (!rt) - memcpy(p->numsteps, left->numsteps, endsite*sizeof(long)); - else { - purset = (1 << (long)A) + (1 << (long)G); - pyrset = (1 << (long)C) + (1 << (long)T); - for (i = a; i < b; i++) { - ls = left->base[i]; - rs = rt->base[i]; - ns = ls & rs; - p->numsteps[i] = left->numsteps[i] + rt->numsteps[i]; - if (ns == 0) { - ns = ls | rs; - if (transvp) { - if (!((ns == purset) || (ns == pyrset))) - p->numsteps[i] += weight[i]; - } - else p->numsteps[i] += weight[i]; - } - } - } - for (i = a; i < b; i++) { - steps = p->numsteps[i]; - if ((long)steps <= threshwt[i]) - term = steps; - else - term = threshwt[i]; - p->sumsteps += (double)term; - } -} /* sumnsteps2 */ - - -void multisumnsteps(node *p, node *q, long a, long b, long *threshwt) -{ - /* computes the number of steps between p and q */ - long i, j, steps, largest, descsteps, purset, pyrset, b1; - long term; - - if (a == 0) p->sumsteps = 0.0; - purset = (1 << (long)A) + (1 << (long)G); - pyrset = (1 << (long)C) + (1 << (long)T); - for (i = a; i < b; i++) { - descsteps = 0; - for (j = (long)A; j <= (long)O; j++) { - if ((descsteps == 0) && (p->base[i] & (1 << j))) - descsteps = p->numsteps[i] - - (p->numdesc - 1 - p->numnuc[i][j]) * weight[i]; - } - descsteps += q->numsteps[i]; - largest = 0; - for (j = (long)A; j <= (long)O; j++) { - b1 = (1 << j); - if (transvp) { - if (b1 & purset) b1 = purset; - if (b1 & pyrset) b1 = pyrset; - } - if (q->base[i] & b1) - p->numnuc[i][j]++; - if (p->numnuc[i][j] > largest) - largest = p->numnuc[i][j]; - } - steps = (p->numdesc - largest) * weight[i] + descsteps; - if ((long)steps <= threshwt[i]) - term = steps; - else - term = threshwt[i]; - p->sumsteps += (double)term; - } -} /* multisumnsteps */ - - -void multisumnsteps2(node *p) -{ - /* counts the changes at each multi-way node. Sums up - steps of all descendants */ - long i, j, largest, purset, pyrset, b1; - node *q; - baseptr b; - - purset = (1 << (long)A) + (1 << (long)G); - pyrset = (1 << (long)C) + (1 << (long)T); - for (i = 0; i < endsite; i++) { - p->numsteps[i] = 0; - q = p->next; - while (q != p) { - if (q->back) { - p->numsteps[i] += q->back->numsteps[i]; - b = q->back->base; - for (j = (long)A; j <= (long)O; j++) { - b1 = (1 << j); - if (transvp) { - if (b1 & purset) b1 = purset; - if (b1 & pyrset) b1 = pyrset; - } - if (b[i] & b1) p->numnuc[i][j]++; - } - } - q = q->next; - } - largest = getlargest(p->numnuc[i]); - p->base[i] = 0; - for (j = (long)A; j <= (long)O; j++) { - if (p->numnuc[i][j] == largest) - p->base[i] |= (1 << j); - } - p->numsteps[i] += ((p->numdesc - largest) * weight[i]); - } -} /* multisumnsteps2 */ - -boolean alltips(node *forknode, node *p) -{ - /* returns true if all descendants of forknode except p are tips; - false otherwise. */ - node *q, *r; - boolean tips; - - tips = true; - r = forknode; - q = forknode->next; - do { - if (q->back && q->back != p && !q->back->tip) - tips = false; - q = q->next; - } while (tips && q != r); - return tips; -} /* alltips */ - - -void gdispose(node *p, node **grbg, pointarray treenode) -{ - /* go through tree throwing away nodes */ - node *q, *r; - - p->back = NULL; - if (p->tip) - return; - treenode[p->index - 1] = NULL; - q = p->next; - while (q != p) { - gdispose(q->back, grbg, treenode); - q->back = NULL; - r = q; - q = q->next; - chucktreenode(grbg, r); - } - chucktreenode(grbg, q); -} /* gdispose */ - - -void preorder(node *p, node *r, node *root, node *removing, node *adding, - node *changing, long dnumdesc) -{ - /* recompute number of steps in preorder taking both ancestoral and - descendent steps into account. removing points to a node being - removed, if any */ - node *q, *p1, *p2; - - if (p && !p->tip && p != adding) { - q = p; - do { - if (p->back != r) { - if (p->numdesc > 2) { - if (changing) - multifillin (p, r, dnumdesc); - else - multifillin (p, r, 0); - } else { - p1 = p->next; - if (!removing) - while (!p1->back) - p1 = p1->next; - else - while (!p1->back || p1->back == removing) - p1 = p1->next; - p2 = p1->next; - if (!removing) - while (!p2->back) - p2 = p2->next; - else - while (!p2->back || p2->back == removing) - p2 = p2->next; - p1 = p1->back; - p2 = p2->back; - if (p->back == p1) p1 = NULL; - else if (p->back == p2) p2 = NULL; - memcpy(p->oldbase, p->base, endsite*sizeof(long)); - memcpy(p->oldnumsteps, p->numsteps, endsite*sizeof(long)); - fillin(p, p1, p2); - } - } - p = p->next; - } while (p != q); - q = p; - do { - preorder(p->next->back, p->next, root, removing, adding, NULL, 0); - p = p->next; - } while (p->next != q); - } -} /* preorder */ - - -void updatenumdesc(node *p, node *root, long n) -{ - /* set p's numdesc to n. If p is the root, numdesc of p's - descendants are set to n-1. */ - node *q; - - q = p; - if (p == root && n > 0) { - p->numdesc = n; - n--; - q = q->next; - } - do { - q->numdesc = n; - q = q->next; - } while (q != p); -} /* updatenumdesc */ - - -void add(node *below,node *newtip,node *newfork,node **root, - boolean recompute,pointarray treenode,node **grbg,long *zeros) -{ - /* inserts the nodes newfork and its left descendant, newtip, - to the tree. below becomes newfork's right descendant. - if newfork is NULL, newtip is added as below's sibling */ - /* used in dnacomp & dnapars */ - node *p; - - if (below != treenode[below->index - 1]) - below = treenode[below->index - 1]; - if (newfork) { - if (below->back != NULL) - below->back->back = newfork; - newfork->back = below->back; - below->back = newfork->next->next; - newfork->next->next->back = below; - newfork->next->back = newtip; - newtip->back = newfork->next; - if (*root == below) - *root = newfork; - updatenumdesc(newfork, *root, 2); - } else { - gnutreenode(grbg, &p, below->index, endsite, zeros); - p->back = newtip; - newtip->back = p; - p->next = below->next; - below->next = p; - updatenumdesc(below, *root, below->numdesc + 1); - } - if (!newtip->tip) - updatenumdesc(newtip, *root, newtip->numdesc); - (*root)->back = NULL; - if (!recompute) - return; - if (!newfork) { - memcpy(newtip->back->base, below->base, endsite*sizeof(long)); - memcpy(newtip->back->numsteps, below->numsteps, endsite*sizeof(long)); - memcpy(newtip->back->numnuc, below->numnuc, endsite*sizeof(nucarray)); - if (below != *root) { - memcpy(below->back->oldbase, zeros, endsite*sizeof(long)); - memcpy(below->back->oldnumsteps, zeros, endsite*sizeof(long)); - multifillin(newtip->back, below->back, 1); - } - if (!newtip->tip) { - memcpy(newtip->back->oldbase, zeros, endsite*sizeof(long)); - memcpy(newtip->back->oldnumsteps, zeros, endsite*sizeof(long)); - preorder(newtip, newtip->back, *root, NULL, NULL, below, 1); - } - memcpy(newtip->oldbase, zeros, endsite*sizeof(long)); - memcpy(newtip->oldnumsteps, zeros, endsite*sizeof(long)); - preorder(below, newtip, *root, NULL, newtip, below, 1); - if (below != *root) - preorder(below->back, below, *root, NULL, NULL, NULL, 0); - } else { - fillin(newtip->back, newtip->back->next->back, - newtip->back->next->next->back); - if (!newtip->tip) { - memcpy(newtip->back->oldbase, zeros, endsite*sizeof(long)); - memcpy(newtip->back->oldnumsteps, zeros, endsite*sizeof(long)); - preorder(newtip, newtip->back, *root, NULL, NULL, newfork, 1); - } - if (newfork != *root) { - memcpy(below->back->base, newfork->back->base, endsite*sizeof(long)); - memcpy(below->back->numsteps, newfork->back->numsteps, endsite*sizeof(long)); - preorder(newfork, newtip, *root, NULL, newtip, NULL, 0); - } else { - fillin(below->back, newtip, NULL); - fillin(newfork, newtip, below); - memcpy(below->back->oldbase, zeros, endsite*sizeof(long)); - memcpy(below->back->oldnumsteps, zeros, endsite*sizeof(long)); - preorder(below, below->back, *root, NULL, NULL, newfork, 1); - } - if (newfork != *root) { - memcpy(newfork->oldbase, below->base, endsite*sizeof(long)); - memcpy(newfork->oldnumsteps, below->numsteps, endsite*sizeof(long)); - preorder(newfork->back, newfork, *root, NULL, NULL, NULL, 0); - } - } -} /* add */ - - -void findbelow(node **below, node *item, node *fork) -{ - /* decide which of fork's binary children is below */ - - if (fork->next->back == item) - *below = fork->next->next->back; - else - *below = fork->next->back; -} /* findbelow */ - - -void re_move(node *item, node **fork, node **root, boolean recompute, - pointarray treenode, node **grbg, long *zeros) -{ - /* removes nodes item and its ancestor, fork, from the tree. - the new descendant of fork's ancestor is made to be - fork's second descendant (other than item). Also - returns pointers to the deleted nodes, item and fork. - If item belongs to a node with more than 2 descendants, - fork will not be deleted */ - /* used in dnacomp & dnapars */ - node *p, *q, *other = NULL, *otherback = NULL; - - if (item->back == NULL) { - *fork = NULL; - return; - } - *fork = treenode[item->back->index - 1]; - if ((*fork)->numdesc == 2) { - updatenumdesc(*fork, *root, 0); - findbelow(&other, item, *fork); - otherback = other->back; - if (*root == *fork) { - *root = other; - if (!other->tip) - updatenumdesc(other, *root, other->numdesc); - } - p = item->back->next->back; - q = item->back->next->next->back; - if (p != NULL) - p->back = q; - if (q != NULL) - q->back = p; - (*fork)->back = NULL; - p = (*fork)->next; - while (p != *fork) { - p->back = NULL; - p = p->next; - } - } else { - updatenumdesc(*fork, *root, (*fork)->numdesc - 1); - p = *fork; - while (p->next != item->back) - p = p->next; - p->next = item->back->next; - } - if (!item->tip) { - updatenumdesc(item, item, item->numdesc); - if (recompute) { - memcpy(item->back->oldbase, item->back->base, endsite*sizeof(long)); - memcpy(item->back->oldnumsteps, item->back->numsteps, endsite*sizeof(long)); - memcpy(item->back->base, zeros, endsite*sizeof(long)); - memcpy(item->back->numsteps, zeros, endsite*sizeof(long)); - preorder(item, item->back, *root, item->back, NULL, item, -1); - } - } - if ((*fork)->numdesc >= 2) - chucktreenode(grbg, item->back); - item->back = NULL; - if (!recompute) - return; - if ((*fork)->numdesc == 0) { - memcpy(otherback->oldbase, otherback->base, endsite*sizeof(long)); - memcpy(otherback->oldnumsteps, otherback->numsteps, endsite*sizeof(long)); - if (other == *root) { - memcpy(otherback->base, zeros, endsite*sizeof(long)); - memcpy(otherback->numsteps, zeros, endsite*sizeof(long)); - } else { - memcpy(otherback->base, other->back->base, endsite*sizeof(long)); - memcpy(otherback->numsteps, other->back->numsteps, endsite*sizeof(long)); - } - p = other->back; - other->back = otherback; - if (other == *root) - preorder(other, otherback, *root, otherback, NULL, other, -1); - else - preorder(other, otherback, *root, NULL, NULL, NULL, 0); - other->back = p; - if (other != *root) { - memcpy(other->oldbase,(*fork)->base, endsite*sizeof(long)); - memcpy(other->oldnumsteps,(*fork)->numsteps, endsite*sizeof(long)); - preorder(other->back, other, *root, NULL, NULL, NULL, 0); - } - } else { - memcpy(item->oldbase, item->base, endsite*sizeof(long)); - memcpy(item->oldnumsteps, item->numsteps, endsite*sizeof(long)); - memcpy(item->base, zeros, endsite*sizeof(long)); - memcpy(item->numsteps, zeros, endsite*sizeof(long)); - preorder(*fork, item, *root, NULL, NULL, *fork, -1); - if (*fork != *root) - preorder((*fork)->back, *fork, *root, NULL, NULL, NULL, 0); - memcpy(item->base, item->oldbase, endsite*sizeof(long)); - memcpy(item->numsteps, item->oldnumsteps, endsite*sizeof(long)); - } -} /* remove */ - - -void postorder(node *p) -{ - /* traverses an n-ary tree, suming up steps at a node's descendants */ - /* used in dnacomp, dnapars, & dnapenny */ - node *q; - - if (p->tip) - return; - q = p->next; - while (q != p) { - postorder(q->back); - q = q->next; - } - zeronumnuc(p, endsite); - if (p->numdesc > 2) - multisumnsteps2(p); - else - fillin(p, p->next->back, p->next->next->back); -} /* postorder */ - - -void getnufork(node **nufork,node **grbg,pointarray treenode,long *zeros) -{ - /* find a fork not used currently */ - long i; - - i = spp; - while (treenode[i] && treenode[i]->numdesc > 0) i++; - if (!treenode[i]) - gnutreenode(grbg, &treenode[i], i, endsite, zeros); - *nufork = treenode[i]; -} /* getnufork */ - - -void reroot(node *outgroup, node *root) -{ - /* reorients tree, putting outgroup in desired position. used if - the root is binary. */ - /* used in dnacomp & dnapars */ - node *p, *q; - - if (outgroup->back->index == root->index) - return; - p = root->next; - q = root->next->next; - p->back->back = q->back; - q->back->back = p->back; - p->back = outgroup; - q->back = outgroup->back; - outgroup->back->back = q; - outgroup->back = p; -} /* reroot */ - - -void reroot2(node *outgroup, node *root) -{ - /* reorients tree, putting outgroup in desired position. */ - /* used in dnacomp & dnapars */ - node *p; - - p = outgroup->back->next; - while (p->next != outgroup->back) - p = p->next; - root->next = outgroup->back; - p->next = root; -} /* reroot2 */ - - -void reroot3(node *outgroup, node *root, node *root2, node *lastdesc, - node **grbg) -{ - /* reorients tree, putting back outgroup in original position. */ - /* used in dnacomp & dnapars */ - node *p; - - p = root->next; - while (p->next != root) - p = p->next; - chucktreenode(grbg, root); - p->next = outgroup->back; - root2->next = lastdesc->next; - lastdesc->next = root2; -} /* reroot3 */ - - -void savetraverse(node *p) -{ - /* sets BOOLEANs that indicate which way is down */ - node *q; - - p->bottom = true; - if (p->tip) - return; - q = p->next; - while (q != p) { - q->bottom = false; - savetraverse(q->back); - q = q->next; - } -} /* savetraverse */ - - -void newindex(long i, node *p) -{ - /* assigns index i to node p */ - - while (p->index != i) { - p->index = i; - p = p->next; - } -} /* newindex */ - - -void flipindexes(long nextnode, pointarray treenode) -{ - /* flips index of nodes between nextnode and last node. */ - long last; - node *temp; - - last = nonodes; - while (treenode[last - 1]->numdesc == 0) - last--; - if (last > nextnode) { - temp = treenode[nextnode - 1]; - treenode[nextnode - 1] = treenode[last - 1]; - treenode[last - 1] = temp; - newindex(nextnode, treenode[nextnode - 1]); - newindex(last, treenode[last - 1]); - } -} /* flipindexes */ - - -boolean parentinmulti(node *anode) -{ - /* sees if anode's parent has more than 2 children */ - node *p; - - while (!anode->bottom) anode = anode->next; - p = anode->back; - while (!p->bottom) - p = p->next; - return (p->numdesc > 2); -} /* parentinmulti */ - - -long sibsvisited(node *anode, long *place) -{ - /* computes the number of nodes which are visited earlier than anode among - its siblings */ - node *p; - long nvisited; - - while (!anode->bottom) anode = anode->next; - p = anode->back->next; - nvisited = 0; - do { - if (!p->bottom && place[p->back->index - 1] != 0) - nvisited++; - p = p->next; - } while (p != anode->back); - return nvisited; -} /* sibsvisited */ - - -long smallest(node *anode, long *place) -{ - /* finds the smallest index of sibling of anode */ - node *p; - long min; - - while (!anode->bottom) anode = anode->next; - p = anode->back->next; - if (p->bottom) p = p->next; - min = nonodes; - do { - if (p->back && place[p->back->index - 1] != 0) { - if (p->back->index <= spp) { - if (p->back->index < min) - min = p->back->index; - } else { - if (place[p->back->index - 1] < min) - min = place[p->back->index - 1]; - } - } - p = p->next; - if (p->bottom) p = p->next; - } while (p != anode->back); - return min; -} /* smallest */ - - -void bintomulti(node **root, node **binroot, node **grbg, long *zeros) -{ /* attaches root's left child to its right child and makes - the right child new root */ - node *left, *right, *newnode, *temp; - - right = (*root)->next->next->back; - left = (*root)->next->back; - if (right->tip) { - (*root)->next = right->back; - (*root)->next->next = left->back; - temp = left; - left = right; - right = temp; - right->back->next = *root; - } - gnutreenode(grbg, &newnode, right->index, endsite, zeros); - newnode->next = right->next; - newnode->back = left; - left->back = newnode; - right->next = newnode; - (*root)->next->back = (*root)->next->next->back = NULL; - *binroot = *root; - (*binroot)->numdesc = 0; - *root = right; - (*root)->numdesc++; - (*root)->back = NULL; -} /* bintomulti */ - - -void backtobinary(node **root, node *binroot, node **grbg) -{ /* restores binary root */ - node *p; - - binroot->next->back = (*root)->next->back; - (*root)->next->back->back = binroot->next; - p = (*root)->next; - (*root)->next = p->next; - binroot->next->next->back = *root; - (*root)->back = binroot->next->next; - chucktreenode(grbg, p); - (*root)->numdesc--; - *root = binroot; - (*root)->numdesc = 2; -} /* backtobinary */ - - -boolean outgrin(node *root, node *outgrnode) -{ /* checks if outgroup node is a child of root */ - node *p; - - p = root->next; - while (p != root) { - if (p->back == outgrnode) - return true; - p = p->next; - } - return false; -} /* outgrin */ - - -void flipnodes(node *nodea, node *nodeb) -{ /* flip nodes */ - node *backa, *backb; - - backa = nodea->back; - backb = nodeb->back; - backa->back = nodeb; - backb->back = nodea; - nodea->back = backb; - nodeb->back = backa; -} /* flipnodes */ - - -void moveleft(node *root, node *outgrnode, node **flipback) -{ /* makes outgroup node to leftmost child of root */ - node *p; - boolean done; - - p = root->next; - done = false; - while (p != root && !done) { - if (p->back == outgrnode) { - *flipback = p; - flipnodes(root->next->back, p->back); - done = true; - } - p = p->next; - } -} /* moveleft */ - - -void savetree(node *root, long *place, pointarray treenode, - node **grbg, long *zeros) -{ /* record in place where each species has to be - added to reconstruct this tree */ - /* used by dnacomp & dnapars */ - long i, j, nextnode, nvisited; - node *p, *q, *r = NULL, *root2, *lastdesc, - *outgrnode, *binroot, *flipback; - boolean done, newfork; - - binroot = NULL; - lastdesc = NULL; - root2 = NULL; - flipback = NULL; - outgrnode = treenode[outgrno - 1]; - if (root->numdesc == 2) - bintomulti(&root, &binroot, grbg, zeros); - if (outgrin(root, outgrnode)) { - if (outgrnode != root->next->back) - moveleft(root, outgrnode, &flipback); - } else { - root2 = root; - lastdesc = root->next; - while (lastdesc->next != root) - lastdesc = lastdesc->next; - lastdesc->next = root->next; - gnutreenode(grbg, &root, outgrnode->back->index, endsite, zeros); - root->numdesc = root2->numdesc; - reroot2(outgrnode, root); - } - savetraverse(root); - nextnode = spp + 1; - for (i = nextnode; i <= nonodes; i++) - if (treenode[i - 1]->numdesc == 0) - flipindexes(i, treenode); - for (i = 0; i < nonodes; i++) - place[i] = 0; - place[root->index - 1] = 1; - for (i = 1; i <= spp; i++) { - p = treenode[i - 1]; - while (place[p->index - 1] == 0) { - place[p->index - 1] = i; - while (!p->bottom) - p = p->next; - r = p; - p = p->back; - } - if (i > 1) { - q = treenode[i - 1]; - newfork = true; - nvisited = sibsvisited(q, place); - if (nvisited == 0) { - if (parentinmulti(r)) { - nvisited = sibsvisited(r, place); - if (nvisited == 0) - place[i - 1] = place[p->index - 1]; - else if (nvisited == 1) - place[i - 1] = smallest(r, place); - else { - place[i - 1] = -smallest(r, place); - newfork = false; - } - } else - place[i - 1] = place[p->index - 1]; - } else if (nvisited == 1) { - place[i - 1] = place[p->index - 1]; - } else { - place[i - 1] = -smallest(q, place); - newfork = false; - } - if (newfork) { - j = place[p->index - 1]; - done = false; - while (!done) { - place[p->index - 1] = nextnode; - while (!p->bottom) - p = p->next; - p = p->back; - done = (p == NULL); - if (!done) - done = (place[p->index - 1] != j); - if (done) { - nextnode++; - } - } - } - } - } - if (flipback) - flipnodes(outgrnode, flipback->back); - else { - if (root2) { - reroot3(outgrnode, root, root2, lastdesc, grbg); - root = root2; - } - } - if (binroot) - backtobinary(&root, binroot, grbg); -} /* savetree */ - - -void addnsave(node *p, node *item, node *nufork, node **root, node **grbg, - boolean multf, pointarray treenode, long *place, long *zeros) -{ /* adds item to tree and save it. Then removes item. */ - node *dummy; - - if (!multf) - add(p, item, nufork, root, false, treenode, grbg, zeros); - else - add(p, item, NULL, root, false, treenode, grbg, zeros); - savetree(*root, place, treenode, grbg, zeros); - if (!multf) - re_move(item, &nufork, root, false, treenode, grbg, zeros); - else - re_move(item, &dummy, root, false, treenode, grbg, zeros); -} /* addnsave */ - - -void addbestever(long *pos, long *nextree, long maxtrees, boolean collapse, - long *place, bestelm *bestrees) -{ /* adds first best tree */ - - *pos = 1; - *nextree = 1; - initbestrees(bestrees, maxtrees, true); - initbestrees(bestrees, maxtrees, false); - addtree(*pos, nextree, collapse, place, bestrees); -} /* addbestever */ - - -void addtiedtree(long pos, long *nextree, long maxtrees, boolean collapse, - long *place, bestelm *bestrees) -{ /* add tied tree */ - - if (*nextree <= maxtrees) - addtree(pos, nextree, collapse, place, bestrees); -} /* addtiedtree */ - - -void clearcollapse(pointarray treenode) -{ - /* clears collapse status at a node */ - long i; - node *p; - - for (i = 0; i < nonodes; i++) { - treenode[i]->collapse = undefined; - if (!treenode[i]->tip) { - p = treenode[i]->next; - while (p != treenode[i]) { - p->collapse = undefined; - p = p->next; - } - } - } -} /* clearcollapse */ - - -void clearbottom(pointarray treenode) -{ - /* clears boolean bottom at a node */ - long i; - node *p; - - for (i = 0; i < nonodes; i++) { - treenode[i]->bottom = false; - if (!treenode[i]->tip) { - p = treenode[i]->next; - while (p != treenode[i]) { - p->bottom = false; - p = p->next; - } - } - } -} /* clearbottom */ - - -void collabranch(node *collapfrom, node *tempfrom, node *tempto) -{ /* collapse branch from collapfrom */ - long i, j, b, largest, descsteps; - boolean done; - - for (i = 0; i < endsite; i++) { - descsteps = 0; - for (j = (long)A; j <= (long)O; j++) { - b = 1 << j; - if ((descsteps == 0) && (collapfrom->base[i] & b)) - descsteps = tempfrom->oldnumsteps[i] - - (collapfrom->numdesc - collapfrom->numnuc[i][j]) - * weight[i]; - } - done = false; - for (j = (long)A; j <= (long)O; j++) { - b = 1 << j; - if (!done && (tempto->base[i] & b)) { - descsteps += (tempto->numsteps[i] - - (tempto->numdesc - collapfrom->numdesc - - tempto->numnuc[i][j]) * weight[i]); - done = true; - } - } - for (j = (long)A; j <= (long)O; j++) - tempto->numnuc[i][j] += collapfrom->numnuc[i][j]; - largest = getlargest(tempto->numnuc[i]); - tempto->base[i] = 0; - for (j = (long)A; j <= (long)O; j++) { - if (tempto->numnuc[i][j] == largest) - tempto->base[i] |= (1 << j); - } - tempto->numsteps[i] = (tempto->numdesc - largest) * weight[i] + descsteps; - } -} /* collabranch */ - - -boolean allcommonbases(node *a, node *b, boolean *allsame) -{ /* see if bases are common at all sites for nodes a and b */ - long i; - boolean allcommon; - - allcommon = true; - *allsame = true; - for (i = 0; i < endsite; i++) { - if ((a->base[i] & b->base[i]) == 0) - allcommon = false; - else if (a->base[i] != b->base[i]) - *allsame = false; - } - return allcommon; -} /* allcommonbases */ - - -void findbottom(node *p, node **bottom) -{ /* find a node with field bottom set at node p */ - node *q; - - if (p->bottom) - *bottom = p; - else { - q = p->next; - while(!q->bottom && q != p) - q = q->next; - *bottom = q; - } -} /* findbottom */ - - -boolean moresteps(node *a, node *b) -{ /* see if numsteps of node a exceeds those of node b */ - long i; - - for (i = 0; i < endsite; i++) - if (a->numsteps[i] > b->numsteps[i]) - return true; - return false; -} /* moresteps */ - - -boolean passdown(node *desc, node *parent, node *start, node *below, - node *item, node *added, node *total, node *tempdsc, - node *tempprt, boolean multf) -{ /* track down to node start to see if an ancestor branch can be collapsed */ - node *temp; - boolean done, allsame; - - done = (parent == start); - while (!done) { - desc = parent; - findbottom(parent->back, &parent); - if (multf && start == below && parent == below) - parent = added; - memcpy(tempdsc->base, tempprt->base, endsite*sizeof(long)); - memcpy(tempdsc->numsteps, tempprt->numsteps, endsite*sizeof(long)); - memcpy(tempdsc->oldbase, desc->base, endsite*sizeof(long)); - memcpy(tempdsc->oldnumsteps, desc->numsteps, endsite*sizeof(long)); - memcpy(tempprt->base, parent->base, endsite*sizeof(long)); - memcpy(tempprt->numsteps, parent->numsteps, endsite*sizeof(long)); - memcpy(tempprt->numnuc, parent->numnuc, endsite*sizeof(nucarray)); - tempprt->numdesc = parent->numdesc; - multifillin(tempprt, tempdsc, 0); - if (!allcommonbases(tempprt, parent, &allsame)) - return false; - else if (moresteps(tempprt, parent)) - return false; - else if (allsame) - return true; - if (parent == added) - parent = below; - done = (parent == start); - if (done && ((start == item) || (!multf && start == below))) { - memcpy(tempdsc->base, tempprt->base, endsite*sizeof(long)); - memcpy(tempdsc->numsteps, tempprt->numsteps, endsite*sizeof(long)); - memcpy(tempdsc->oldbase, start->base, endsite*sizeof(long)); - memcpy(tempdsc->oldnumsteps, start->numsteps, endsite*sizeof(long)); - multifillin(added, tempdsc, 0); - tempprt = added; - } - } - temp = tempdsc; - if (start == below || start == item) - fillin(temp, tempprt, below->back); - else - fillin(temp, tempprt, added); - return !moresteps(temp, total); -} /* passdown */ - - -boolean trycollapdesc(node *desc, node *parent, node *start, - node *below, node *item, node *added, node *total, - node *tempdsc, node *tempprt, boolean multf, long *zeros) - { /* see if branch between nodes desc and parent can be collapsed */ - boolean allsame; - - if (desc->numdesc == 1) - return true; - if (multf && start == below && parent == below) - parent = added; - memcpy(tempdsc->base, zeros, endsite*sizeof(long)); - memcpy(tempdsc->numsteps, zeros, endsite*sizeof(long)); - memcpy(tempdsc->oldbase, desc->base, endsite*sizeof(long)); - memcpy(tempdsc->oldnumsteps, desc->numsteps, endsite*sizeof(long)); - memcpy(tempprt->base, parent->base, endsite*sizeof(long)); - memcpy(tempprt->numsteps, parent->numsteps, endsite*sizeof(long)); - memcpy(tempprt->numnuc, parent->numnuc, endsite*sizeof(nucarray)); - tempprt->numdesc = parent->numdesc - 1; - multifillin(tempprt, tempdsc, -1); - tempprt->numdesc += desc->numdesc; - collabranch(desc, tempdsc, tempprt); - if (!allcommonbases(tempprt, parent, &allsame) || - moresteps(tempprt, parent)) { - if (parent != added) { - desc->collapse = nocollap; - parent->collapse = nocollap; - } - return false; - } else if (allsame) { - if (parent != added) { - desc->collapse = tocollap; - parent->collapse = tocollap; - } - return true; - } - if (parent == added) - parent = below; - if ((start == item && parent == item) || - (!multf && start == below && parent == below)) { - memcpy(tempdsc->base, tempprt->base, endsite*sizeof(long)); - memcpy(tempdsc->numsteps, tempprt->numsteps, endsite*sizeof(long)); - memcpy(tempdsc->oldbase, start->base, endsite*sizeof(long)); - memcpy(tempdsc->oldnumsteps, start->numsteps, endsite*sizeof(long)); - memcpy(tempprt->base, added->base, endsite*sizeof(long)); - memcpy(tempprt->numsteps, added->numsteps, endsite*sizeof(long)); - memcpy(tempprt->numnuc, added->numnuc, endsite*sizeof(nucarray)); - tempprt->numdesc = added->numdesc; - multifillin(tempprt, tempdsc, 0); - if (!allcommonbases(tempprt, added, &allsame)) - return false; - else if (moresteps(tempprt, added)) - return false; - else if (allsame) - return true; - } - return passdown(desc, parent, start, below, item, added, total, tempdsc, - tempprt, multf); -} /* trycollapdesc */ - - -void setbottom(node *p) -{ /* set field bottom at node p */ - node *q; - - p->bottom = true; - q = p->next; - do { - q->bottom = false; - q = q->next; - } while (q != p); -} /* setbottom */ - -boolean zeroinsubtree(node *subtree, node *start, node *below, node *item, - node *added, node *total, node *tempdsc, node *tempprt, - boolean multf, node* root, long *zeros) -{ /* sees if subtree contains a zero length branch */ - node *p; - - if (!subtree->tip) { - setbottom(subtree); - p = subtree->next; - do { - if (p->back && !p->back->tip && - !((p->back->collapse == nocollap) && (subtree->collapse == nocollap)) - && (subtree->numdesc != 1)) { - if ((p->back->collapse == tocollap) && (subtree->collapse == tocollap) - && multf && (subtree != below)) - return true; - /* when root->numdesc == 2 - * there is no mandatory step at the root, - * instead of checking at the root we check around it - * we only need to check p because the first if - * statement already gets rid of it for the subtree */ - else if ((p->back->index != root->index || root->numdesc > 2) && - trycollapdesc(p->back, subtree, start, below, item, added, total, - tempdsc, tempprt, multf, zeros)) - return true; - else if ((p->back->index == root->index && root->numdesc == 2) && - !(root->next->back->tip) && !(root->next->next->back->tip) && - trycollapdesc(root->next->back, root->next->next->back, start, - below, item,added, total, tempdsc, tempprt, multf, zeros)) - return true; - } - p = p->next; - } while (p != subtree); - p = subtree->next; - do { - if (p->back && !p->back->tip) { - if (zeroinsubtree(p->back, start, below, item, added, total, - tempdsc, tempprt, multf, root, zeros)) - return true; - } - p = p->next; - } while (p != subtree); - } - return false; -} /* zeroinsubtree */ - - -boolean collapsible(node *item, node *below, node *temp, node *temp1, - node *tempdsc, node *tempprt, node *added, node *total, - boolean multf, node *root, long *zeros, pointarray treenode) -{ - /* sees if any branch can be collapsed */ - node *belowbk; - boolean allsame; - - if (multf) { - memcpy(tempdsc->base, item->base, endsite*sizeof(long)); - memcpy(tempdsc->numsteps, item->numsteps, endsite*sizeof(long)); - memcpy(tempdsc->oldbase, zeros, endsite*sizeof(long)); - memcpy(tempdsc->oldnumsteps, zeros, endsite*sizeof(long)); - memcpy(added->base, below->base, endsite*sizeof(long)); - memcpy(added->numsteps, below->numsteps, endsite*sizeof(long)); - memcpy(added->numnuc, below->numnuc, endsite*sizeof(nucarray)); - added->numdesc = below->numdesc + 1; - multifillin(added, tempdsc, 1); - } else { - fillin(added, item, below); - added->numdesc = 2; - } - fillin(total, added, below->back); - clearbottom(treenode); - if (below->back) { - if (zeroinsubtree(below->back, below->back, below, item, added, total, - tempdsc, tempprt, multf, root, zeros)) - return true; - } - if (multf) { - if (zeroinsubtree(below, below, below, item, added, total, - tempdsc, tempprt, multf, root, zeros)) - return true; - } else if (!below->tip) { - if (zeroinsubtree(below, below, below, item, added, total, - tempdsc, tempprt, multf, root, zeros)) - return true; - } - if (!item->tip) { - if (zeroinsubtree(item, item, below, item, added, total, - tempdsc, tempprt, multf, root, zeros)) - return true; - } - if (multf && below->back && !below->back->tip) { - memcpy(tempdsc->base, zeros, endsite*sizeof(long)); - memcpy(tempdsc->numsteps, zeros, endsite*sizeof(long)); - memcpy(tempdsc->oldbase, added->base, endsite*sizeof(long)); - memcpy(tempdsc->oldnumsteps, added->numsteps, endsite*sizeof(long)); - if (below->back == treenode[below->back->index - 1]) - belowbk = below->back->next; - else - belowbk = treenode[below->back->index - 1]; - memcpy(tempprt->base, belowbk->base, endsite*sizeof(long)); - memcpy(tempprt->numsteps, belowbk->numsteps, endsite*sizeof(long)); - memcpy(tempprt->numnuc, belowbk->numnuc, endsite*sizeof(nucarray)); - tempprt->numdesc = belowbk->numdesc - 1; - multifillin(tempprt, tempdsc, -1); - tempprt->numdesc += added->numdesc; - collabranch(added, tempdsc, tempprt); - if (!allcommonbases(tempprt, belowbk, &allsame)) - return false; - else if (allsame && !moresteps(tempprt, belowbk)) - return true; - else if (belowbk->back) { - fillin(temp, tempprt, belowbk->back); - fillin(temp1, belowbk, belowbk->back); - return !moresteps(temp, temp1); - } - } - return false; -} /* collapsible */ - - -void replaceback(node **oldback, node *item, node *forknode, - node **grbg, long *zeros) -{ /* replaces back node of item with another */ - node *p; - - p = forknode; - while (p->next->back != item) - p = p->next; - *oldback = p->next; - gnutreenode(grbg, &p->next, forknode->index, endsite, zeros); - p->next->next = (*oldback)->next; - p->next->back = (*oldback)->back; - p->next->back->back = p->next; - (*oldback)->next = (*oldback)->back = NULL; -} /* replaceback */ - - -void putback(node *oldback, node *item, node *forknode, node **grbg) -{ /* restores node to back of item */ - node *p, *q; - - p = forknode; - while (p->next != item->back) - p = p->next; - q = p->next; - oldback->next = p->next->next; - p->next = oldback; - oldback->back = item; - item->back = oldback; - oldback->index = forknode->index; - chucktreenode(grbg, q); -} /* putback */ - - -void savelocrearr(node *item, node *forknode, node *below, node *tmp, - node *tmp1, node *tmp2, node *tmp3, node *tmprm, node *tmpadd, - node **root, long maxtrees, long *nextree, boolean multf, - boolean bestever, boolean *saved, long *place, - bestelm *bestrees, pointarray treenode, node **grbg, - long *zeros) -{ /* saves tied or better trees during local rearrangements by removing - item from forknode and adding to below */ - node *other, *otherback = NULL, *oldfork, *nufork, *oldback; - long pos; - boolean found, collapse; - - if (forknode->numdesc == 2) { - findbelow(&other, item, forknode); - otherback = other->back; - oldback = NULL; - } else { - other = NULL; - replaceback(&oldback, item, forknode, grbg, zeros); - } - re_move(item, &oldfork, root, false, treenode, grbg, zeros); - if (!multf) - getnufork(&nufork, grbg, treenode, zeros); - else - nufork = NULL; - addnsave(below, item, nufork, root, grbg, multf, treenode, place, zeros); - pos = 0; - findtree(&found, &pos, *nextree, place, bestrees); - if (other) { - add(other, item, oldfork, root, false, treenode, grbg, zeros); - if (otherback->back != other) - flipnodes(item, other); - } else - add(forknode, item, NULL, root, false, treenode, grbg, zeros); - *saved = false; - if (found) { - if (oldback) - putback(oldback, item, forknode, grbg); - } else { - if (oldback) - chucktreenode(grbg, oldback); - re_move(item, &oldfork, root, true, treenode, grbg, zeros); - collapse = collapsible(item, below, tmp, tmp1, tmp2, tmp3, tmprm, - tmpadd, multf, *root, zeros, treenode); - if (!collapse) { - if (bestever) - addbestever(&pos, nextree, maxtrees, collapse, place, bestrees); - else - addtiedtree(pos, nextree, maxtrees, collapse, place, bestrees); - } - if (other) - add(other, item, oldfork, root, true, treenode, grbg, zeros); - else - add(forknode, item, NULL, root, true, treenode, grbg, zeros); - *saved = !collapse; - } -} /* savelocrearr */ - - -void clearvisited(pointarray treenode) -{ - /* clears boolean visited at a node */ - long i; - node *p; - - for (i = 0; i < nonodes; i++) { - treenode[i]->visited = false; - if (!treenode[i]->tip) { - p = treenode[i]->next; - while (p != treenode[i]) { - p->visited = false; - p = p->next; - } - } - } -} /* clearvisited */ - - -void hyprint(long b1, long b2, struct LOC_hyptrav *htrav, - pointarray treenode, Char *basechar) -{ - /* print out states in sites b1 through b2 at node */ - long i, j, k, n; - boolean dot; - bases b; - - if (htrav->bottom) { - if (!outgropt) - fprintf(outfile, " "); - else - fprintf(outfile, "root "); - } else - fprintf(outfile, "%4ld ", htrav->r->back->index - spp); - if (htrav->r->tip) { - for (i = 0; i < nmlngth; i++) - putc(nayme[htrav->r->index - 1][i], outfile); - } else - fprintf(outfile, "%4ld ", htrav->r->index - spp); - if (htrav->bottom) - fprintf(outfile, " "); - else if (htrav->nonzero) - fprintf(outfile, " yes "); - else if (htrav->maybe) - fprintf(outfile, " maybe "); - else - fprintf(outfile, " no "); - for (i = b1; i <= b2; i++) { - j = location[ally[i - 1] - 1]; - htrav->tempset = htrav->r->base[j - 1]; - htrav->anc = htrav->hypset[j - 1]; - if (!htrav->bottom) - htrav->anc = treenode[htrav->r->back->index - 1]->base[j - 1]; - dot = dotdiff && (htrav->tempset == htrav->anc && !htrav->bottom); - if (dot) - putc('.', outfile); - else if (htrav->tempset == (1 << A)) - putc('A', outfile); - else if (htrav->tempset == (1 << C)) - putc('C', outfile); - else if (htrav->tempset == (1 << G)) - putc('G', outfile); - else if (htrav->tempset == (1 << T)) - putc('T', outfile); - else if (htrav->tempset == (1 << O)) - putc('-', outfile); - else { - k = 1; - n = 0; - for (b = A; b <= O; b = b + 1) { - if (((1 << b) & htrav->tempset) != 0) - n += k; - k += k; - } - putc(basechar[n - 1], outfile); - } - if (i % 10 == 0) - putc(' ', outfile); - } - putc('\n', outfile); -} /* hyprint */ - - -void gnubase(gbases **p, gbases **garbage, long endsite) -{ - /* this and the following are do-it-yourself garbage collectors. - Make a new node or pull one off the garbage list */ - if (*garbage != NULL) { - *p = *garbage; - *garbage = (*garbage)->next; - } else { - *p = (gbases *)Malloc(sizeof(gbases)); - (*p)->base = (baseptr)Malloc(endsite*sizeof(long)); - } - (*p)->next = NULL; -} /* gnubase */ - - -void chuckbase(gbases *p, gbases **garbage) -{ - /* collect garbage on p -- put it on front of garbage list */ - p->next = *garbage; - *garbage = p; -} /* chuckbase */ - - -void hyptrav(node *r_, long *hypset_, long b1, long b2, boolean bottom_, - pointarray treenode, gbases **garbage, Char *basechar) -{ - /* compute, print out states at one interior node */ - struct LOC_hyptrav Vars; - long i, j, k; - long largest; - gbases *ancset; - nucarray *tempnuc; - node *p, *q; - - Vars.bottom = bottom_; - Vars.r = r_; - Vars.hypset = hypset_; - gnubase(&ancset, garbage, endsite); - tempnuc = (nucarray *)Malloc(endsite*sizeof(nucarray)); - Vars.maybe = false; - Vars.nonzero = false; - if (!Vars.r->tip) - zeronumnuc(Vars.r, endsite); - for (i = b1 - 1; i < b2; i++) { - j = location[ally[i] - 1]; - Vars.anc = Vars.hypset[j - 1]; - if (!Vars.r->tip) { - p = Vars.r->next; - for (k = (long)A; k <= (long)O; k++) - if (Vars.anc & (1 << k)) - Vars.r->numnuc[j - 1][k]++; - do { - for (k = (long)A; k <= (long)O; k++) - if (p->back->base[j - 1] & (1 << k)) - Vars.r->numnuc[j - 1][k]++; - p = p->next; - } while (p != Vars.r); - largest = getlargest(Vars.r->numnuc[j - 1]); - Vars.tempset = 0; - for (k = (long)A; k <= (long)O; k++) { - if (Vars.r->numnuc[j - 1][k] == largest) - Vars.tempset |= (1 << k); - } - Vars.r->base[j - 1] = Vars.tempset; - } - if (!Vars.bottom) - Vars.anc = treenode[Vars.r->back->index - 1]->base[j - 1]; - Vars.nonzero = (Vars.nonzero || (Vars.r->base[j - 1] & Vars.anc) == 0); - Vars.maybe = (Vars.maybe || Vars.r->base[j - 1] != Vars.anc); - } - hyprint(b1, b2, &Vars, treenode, basechar); - Vars.bottom = false; - if (!Vars.r->tip) { - memcpy(tempnuc, Vars.r->numnuc, endsite*sizeof(nucarray)); - q = Vars.r->next; - do { - memcpy(Vars.r->numnuc, tempnuc, endsite*sizeof(nucarray)); - for (i = b1 - 1; i < b2; i++) { - j = location[ally[i] - 1]; - for (k = (long)A; k <= (long)O; k++) - if (q->back->base[j - 1] & (1 << k)) - Vars.r->numnuc[j - 1][k]--; - largest = getlargest(Vars.r->numnuc[j - 1]); - ancset->base[j - 1] = 0; - for (k = (long)A; k <= (long)O; k++) - if (Vars.r->numnuc[j - 1][k] == largest) - ancset->base[j - 1] |= (1 << k); - if (!Vars.bottom) - Vars.anc = ancset->base[j - 1]; - } - hyptrav(q->back, ancset->base, b1, b2, Vars.bottom, - treenode, garbage, basechar); - q = q->next; - } while (q != Vars.r); - } - chuckbase(ancset, garbage); -} /* hyptrav */ - - -void hypstates(long chars, node *root, pointarray treenode, - gbases **garbage, Char *basechar) -{ - /* fill in and describe states at interior nodes */ - /* used in dnacomp, dnapars, & dnapenny */ - long i, n; - baseptr nothing; - - fprintf(outfile, "\nFrom To Any Steps? State at upper node\n"); - fprintf(outfile, " "); - if (dotdiff) - fprintf(outfile, " ( . means same as in the node below it on tree)\n"); - nothing = (baseptr)Malloc(endsite*sizeof(long)); - for (i = 0; i < endsite; i++) - nothing[i] = 0; - for (i = 1; i <= ((chars - 1) / 40 + 1); i++) { - putc('\n', outfile); - n = i * 40; - if (n > chars) - n = chars; - hyptrav(root, nothing, i * 40 - 39, n, true, treenode, garbage, basechar); - } - free(nothing); -} /* hypstates */ - - -void initbranchlen(node *p) -{ - node *q; - - p->v = 0.0; - if (p->back) - p->back->v = 0.0; - if (p->tip) - return; - q = p->next; - while (q != p) { - initbranchlen(q->back); - q = q->next; - } - q = p->next; - while (q != p) { - q->v = 0.0; - q = q->next; - } -} /* initbranchlen */ - - -void initmin(node *p, long sitei, boolean internal) -{ - long i; - - if (internal) { - for (i = (long)A; i <= (long)O; i++) { - p->cumlengths[i] = 0; - p->numreconst[i] = 1; - } - } else { - for (i = (long)A; i <= (long)O; i++) { - if (p->base[sitei - 1] & (1 << i)) { - p->cumlengths[i] = 0; - p->numreconst[i] = 1; - } else { - p->cumlengths[i] = -1; - p->numreconst[i] = 0; - } - } - } -} /* initmin */ - - -void initbase(node *p, long sitei) -{ - /* traverse tree to initialize base at internal nodes */ - node *q; - long i, largest; - - if (p->tip) - return; - q = p->next; - while (q != p) { - if (q->back) { - memcpy(q->numnuc, p->numnuc, endsite*sizeof(nucarray)); - for (i = (long)A; i <= (long)O; i++) { - if (q->back->base[sitei - 1] & (1 << i)) - q->numnuc[sitei - 1][i]--; - } - if (p->back) { - for (i = (long)A; i <= (long)O; i++) { - if (p->back->base[sitei - 1] & (1 << i)) - q->numnuc[sitei - 1][i]++; - } - } - largest = getlargest(q->numnuc[sitei - 1]); - q->base[sitei - 1] = 0; - for (i = (long)A; i <= (long)O; i++) { - if (q->numnuc[sitei - 1][i] == largest) - q->base[sitei - 1] |= (1 << i); - } - } - q = q->next; - } - q = p->next; - while (q != p) { - initbase(q->back, sitei); - q = q->next; - } -} /* initbase */ - - -void inittreetrav(node *p, long sitei) -{ - /* traverse tree to clear boolean initialized and set up base */ - node *q; - - if (p->tip) { - initmin(p, sitei, false); - p->initialized = true; - return; - } - q = p->next; - while (q != p) { - inittreetrav(q->back, sitei); - q = q->next; - } - initmin(p, sitei, true); - p->initialized = false; - q = p->next; - while (q != p) { - initmin(q, sitei, true); - q->initialized = false; - q = q->next; - } -} /* inittreetrav */ - - -void compmin(node *p, node *desc) -{ - /* computes minimum lengths up to p */ - long i, j, minn, cost, desclen, descrecon=0, maxx; - - maxx = 10 * spp; - for (i = (long)A; i <= (long)O; i++) { - minn = maxx; - for (j = (long)A; j <= (long)O; j++) { - if (transvp) { - if ( - ( - ((i == (long)A) || (i == (long)G)) - && ((j == (long)A) || (j == (long)G)) - ) - || ( - ((j == (long)C) || (j == (long)T)) - && ((i == (long)C) || (i == (long)T)) - ) - ) - cost = 0; - else - cost = 1; - } else { - if (i == j) - cost = 0; - else - cost = 1; - } - if (desc->cumlengths[j] == -1) { - desclen = maxx; - } else { - desclen = desc->cumlengths[j]; - } - if (minn > cost + desclen) { - minn = cost + desclen; - descrecon = 0; - } - if (minn == cost + desclen) { - descrecon += desc->numreconst[j]; - } - } - p->cumlengths[i] += minn; - p->numreconst[i] *= descrecon; - } - p->initialized = true; -} /* compmin */ - - -void minpostorder(node *p, pointarray treenode) -{ - /* traverses an n-ary tree, computing minimum steps at each node */ - node *q; - - if (p->tip) { - return; - } - q = p->next; - while (q != p) { - if (q->back) - minpostorder(q->back, treenode); - q = q->next; - } - if (!p->initialized) { - q = p->next; - while (q != p) { - if (q->back) - compmin(p, q->back); - q = q->next; - } - } -} /* minpostorder */ - - -void branchlength(node *subtr1, node *subtr2, double *brlen, - pointarray treenode) -{ - /* computes a branch length between two subtrees for a given site */ - long i, j, minn, cost, nom, denom; - node *temp; - - if (subtr1->tip) { - temp = subtr1; - subtr1 = subtr2; - subtr2 = temp; - } - if (subtr1->index == outgrno) { - temp = subtr1; - subtr1 = subtr2; - subtr2 = temp; - } - minpostorder(subtr1, treenode); - minpostorder(subtr2, treenode); - minn = 10 * spp; - nom = 0; - denom = 0; - for (i = (long)A; i <= (long)O; i++) { - for (j = (long)A; j <= (long)O; j++) { - if (transvp) { - if ( - ( - ((i == (long)A) || (i == (long)G)) - && ((j == (long)A) || (j == (long)G)) - ) - || ( - ((j == (long)C) || (j == (long)T)) - && ((i == (long)C) || (i == (long)T)) - ) - ) - cost = 0; - else - cost = 1; - } else { - if (i == j) - cost = 0; - else - cost = 1; - } - if (subtr1->cumlengths[i] != -1 && (subtr2->cumlengths[j] != -1)) { - if (subtr1->cumlengths[i] + cost + subtr2->cumlengths[j] < minn) { - minn = subtr1->cumlengths[i] + cost + subtr2->cumlengths[j]; - nom = 0; - denom = 0; - } - if (subtr1->cumlengths[i] + cost + subtr2->cumlengths[j] == minn) { - nom += subtr1->numreconst[i] * subtr2->numreconst[j] * cost; - denom += subtr1->numreconst[i] * subtr2->numreconst[j]; - } - } - } - } - *brlen = (double)nom/(double)denom; -} /* branchlength */ - - -void printbranchlengths(node *p) -{ - node *q; - long i; - - if (p->tip) - return; - q = p->next; - do { - fprintf(outfile, "%6ld ",q->index - spp); - if (q->back->tip) { - for (i = 0; i < nmlngth; i++) - putc(nayme[q->back->index - 1][i], outfile); - } else - fprintf(outfile, "%6ld ", q->back->index - spp); - fprintf(outfile, " %f\n",q->v); - if (q->back) - printbranchlengths(q->back); - q = q->next; - } while (q != p); -} /* printbranchlengths */ - - -void branchlentrav(node *p, node *root, long sitei, long chars, - double *brlen, pointarray treenode) - { - /* traverses the tree computing tree length at each branch */ - node *q; - - if (p->tip) - return; - if (p->index == outgrno) - p = p->back; - q = p->next; - do { - if (q->back) { - branchlength(q, q->back, brlen, treenode); - q->v += ((weight[sitei - 1] / 10.0) * (*brlen)/chars); - q->back->v += ((weight[sitei - 1] / 10.0) * (*brlen)/chars); - if (!q->back->tip) - branchlentrav(q->back, root, sitei, chars, brlen, treenode); - } - q = q->next; - } while (q != p); -} /* branchlentrav */ - - -void treelength(node *root, long chars, pointarray treenode) - { - /* calls branchlentrav at each site */ - long sitei; - double trlen; - - initbranchlen(root); - for (sitei = 1; sitei <= endsite; sitei++) { - trlen = 0.0; - initbase(root, sitei); - inittreetrav(root, sitei); - branchlentrav(root, root, sitei, chars, &trlen, treenode); - } -} /* treelength */ - - -void coordinates(node *p, long *tipy, double f, long *fartemp) -{ - /* establishes coordinates of nodes for display without lengths */ - node *q, *first, *last; - node *mid1 = NULL, *mid2 = NULL; - long numbranches, numb2; - - if (p->tip) { - p->xcoord = 0; - p->ycoord = *tipy; - p->ymin = *tipy; - p->ymax = *tipy; - (*tipy) += down; - return; - } - numbranches = 0; - q = p->next; - do { - coordinates(q->back, tipy, f, fartemp); - numbranches += 1; - q = q->next; - } while (p != q); - first = p->next->back; - q = p->next; - while (q->next != p) - q = q->next; - last = q->back; - numb2 = 1; - q = p->next; - while (q != p) { - if (numb2 == (long)(numbranches + 1)/2) - mid1 = q->back; - if (numb2 == (long)(numbranches/2 + 1)) - mid2 = q->back; - numb2 += 1; - q = q->next; - } - p->xcoord = (long)((double)(last->ymax - first->ymin) * f); - p->ycoord = (long)((mid1->ycoord + mid2->ycoord) / 2); - p->ymin = first->ymin; - p->ymax = last->ymax; - if (p->xcoord > *fartemp) - *fartemp = p->xcoord; -} /* coordinates */ - - -void drawline(long i, double scale, node *root) -{ - /* draws one row of the tree diagram by moving up tree */ - node *p, *q, *r, *first =NULL, *last =NULL; - long n, j; - boolean extra, done, noplus; - - p = root; - q = root; - extra = false; - noplus = false; - if (i == (long)p->ycoord && p == root) { - if (p->index - spp >= 10) - fprintf(outfile, " %2ld", p->index - spp); - else - fprintf(outfile, " %ld", p->index - spp); - extra = true; - noplus = true; - } else - fprintf(outfile, " "); - do { - if (!p->tip) { - r = p->next; - done = false; - do { - if (i >= r->back->ymin && i <= r->back->ymax) { - q = r->back; - done = true; - } - r = r->next; - } while (!(done || r == p)); - first = p->next->back; - r = p->next; - while (r->next != p) - r = r->next; - last = r->back; - } - done = (p == q); - n = (long)(scale * (p->xcoord - q->xcoord) + 0.5); - if (n < 3 && !q->tip) - n = 3; - if (extra) { - n--; - extra = false; - } - if ((long)q->ycoord == i && !done) { - if (noplus) { - putc('-', outfile); - noplus = false; - } - else - putc('+', outfile); - if (!q->tip) { - for (j = 1; j <= n - 2; j++) - putc('-', outfile); - if (q->index - spp >= 10) - fprintf(outfile, "%2ld", q->index - spp); - else - fprintf(outfile, "-%ld", q->index - spp); - extra = true; - noplus = true; - } else { - for (j = 1; j < n; j++) - putc('-', outfile); - } - } else if (!p->tip) { - if ((long)last->ycoord > i && (long)first->ycoord < i - && i != (long)p->ycoord) { - putc('!', outfile); - for (j = 1; j < n; j++) - putc(' ', outfile); - } else { - for (j = 1; j <= n; j++) - putc(' ', outfile); - } - noplus = false; - } else { - for (j = 1; j <= n; j++) - putc(' ', outfile); - noplus = false; - } - if (p != q) - p = q; - } while (!done); - if ((long)p->ycoord == i && p->tip) { - for (j = 0; j < nmlngth; j++) - putc(nayme[p->index - 1][j], outfile); - } - putc('\n', outfile); -} /* drawline */ - - -void printree(node *root, double f) -{ - /* prints out diagram of the tree */ - /* used in dnacomp, dnapars, & dnapenny */ - long i, tipy, dummy; - double scale; - - putc('\n', outfile); - if (!treeprint) - return; - putc('\n', outfile); - tipy = 1; - dummy = 0; - coordinates(root, &tipy, f, &dummy); - scale = 1.5; - putc('\n', outfile); - for (i = 1; i <= (tipy - down); i++) - drawline(i, scale, root); - fprintf(outfile, "\n remember:"); - if (outgropt) - fprintf(outfile, " (although rooted by outgroup)"); - fprintf(outfile, " this is an unrooted tree!\n\n"); -} /* printree */ - - -void writesteps(long chars, boolean weights, steptr oldweight, node *root) -{ - /* used in dnacomp, dnapars, & dnapenny */ - long i, j, k, l; - - putc('\n', outfile); - if (weights) - fprintf(outfile, "weighted "); - fprintf(outfile, "steps in each site:\n"); - fprintf(outfile, " "); - for (i = 0; i <= 9; i++) - fprintf(outfile, "%4ld", i); - fprintf(outfile, "\n *------------------------------------"); - fprintf(outfile, "-----\n"); - for (i = 0; i <= (chars / 10); i++) { - fprintf(outfile, "%5ld", i * 10); - putc('|', outfile); - for (j = 0; j <= 9; j++) { - k = i * 10 + j; - if (k == 0 || k > chars) - fprintf(outfile, " "); - else { - l = location[ally[k - 1] - 1]; - if (oldweight[k - 1] > 0) - fprintf(outfile, "%4ld", - oldweight[k - 1] * - (root->numsteps[l - 1] / weight[l - 1])); - else - fprintf(outfile, " 0"); - } - } - putc('\n', outfile); - } -} /* writesteps */ - - -void treeout(node *p, long nextree, long *col, node *root) -{ - /* write out file with representation of final tree */ - /* used in dnacomp, dnamove, dnapars, & dnapenny */ - node *q; - long i, n; - Char c; - - if (p->tip) { - n = 0; - for (i = 1; i <= nmlngth; i++) { - if (nayme[p->index - 1][i - 1] != ' ') - n = i; - } - for (i = 0; i < n; i++) { - c = nayme[p->index - 1][i]; - if (c == ' ') - c = '_'; - putc(c, outtree); - } - *col += n; - } else { - putc('(', outtree); - (*col)++; - q = p->next; - while (q != p) { - treeout(q->back, nextree, col, root); - q = q->next; - if (q == p) - break; - putc(',', outtree); - (*col)++; - if (*col > 60) { - putc('\n', outtree); - *col = 0; - } - } - putc(')', outtree); - (*col)++; - } - if (p != root) - return; - if (nextree > 2) - fprintf(outtree, "[%6.4f];\n", 1.0 / (nextree - 1)); - else - fprintf(outtree, ";\n"); -} /* treeout */ - - -void treeout3(node *p, long nextree, long *col, node *root) -{ - /* write out file with representation of final tree */ - /* used in dnapars -- writes branch lengths */ - node *q; - long i, n, w; - double x; - Char c; - - if (p->tip) { - n = 0; - for (i = 1; i <= nmlngth; i++) { - if (nayme[p->index - 1][i - 1] != ' ') - n = i; - } - for (i = 0; i < n; i++) { - c = nayme[p->index - 1][i]; - if (c == ' ') - c = '_'; - putc(c, outtree); - } - *col += n; - } else { - putc('(', outtree); - (*col)++; - q = p->next; - while (q != p) { - treeout3(q->back, nextree, col, root); - q = q->next; - if (q == p) - break; - putc(',', outtree); - (*col)++; - if (*col > 60) { - putc('\n', outtree); - *col = 0; - } - } - putc(')', outtree); - (*col)++; - } - x = p->v; - if (x > 0.0) - w = (long)(0.43429448222 * log(x)); - else if (x == 0.0) - w = 0; - else - w = (long)(0.43429448222 * log(-x)) + 1; - if (w < 0) - w = 0; - if (p != root) { - fprintf(outtree, ":%*.5f", (int)(w + 7), x); - *col += w + 8; - } - if (p != root) - return; - if (nextree > 2) - fprintf(outtree, "[%6.4f];\n", 1.0 / (nextree - 1)); - else - fprintf(outtree, ";\n"); -} /* treeout3 */ - - -void drawline2(long i, double scale, tree curtree) -{ - /* draws one row of the tree diagram by moving up tree */ - /* used in dnaml, proml, & restml */ - node *p, *q; - long n, j; - boolean extra; - node *r, *first =NULL, *last =NULL; - boolean done; - - p = curtree.start; - q = curtree.start; - extra = false; - if (i == (long)p->ycoord && p == curtree.start) { - if (p->index - spp >= 10) - fprintf(outfile, " %2ld", p->index - spp); - else - fprintf(outfile, " %ld", p->index - spp); - extra = true; - } else - fprintf(outfile, " "); - do { - if (!p->tip) { - r = p->next; - done = false; - do { - if (i >= r->back->ymin && i <= r->back->ymax) { - q = r->back; - done = true; - } - r = r->next; - } while (!(done || (p != curtree.start && r == p) || - (p == curtree.start && r == p->next))); - first = p->next->back; - r = p; - while (r->next != p) - r = r->next; - last = r->back; - if (p == curtree.start) - last = p->back; - } - done = (p->tip || p == q); - n = (long)(scale * (q->xcoord - p->xcoord) + 0.5); - if (n < 3 && !q->tip) - n = 3; - if (extra) { - n--; - extra = false; - } - if ((long)q->ycoord == i && !done) { - if ((long)p->ycoord != (long)q->ycoord) - putc('+', outfile); - else - putc('-', outfile); - if (!q->tip) { - for (j = 1; j <= n - 2; j++) - putc('-', outfile); - if (q->index - spp >= 10) - fprintf(outfile, "%2ld", q->index - spp); - else - fprintf(outfile, "-%ld", q->index - spp); - extra = true; - } else { - for (j = 1; j < n; j++) - putc('-', outfile); - } - } else if (!p->tip) { - if ((long)last->ycoord > i && (long)first->ycoord < i && - (i != (long)p->ycoord || p == curtree.start)) { - putc('|', outfile); - for (j = 1; j < n; j++) - putc(' ', outfile); - } else { - for (j = 1; j <= n; j++) - putc(' ', outfile); - } - } else { - for (j = 1; j <= n; j++) - putc(' ', outfile); - } - if (q != p) - p = q; - } while (!done); - if ((long)p->ycoord == i && p->tip) { - for (j = 0; j < nmlngth; j++) - putc(nayme[p->index-1][j], outfile); - } - putc('\n', outfile); -} /* drawline2 */ - - -void drawline3(long i, double scale, node *start) -{ - /* draws one row of the tree diagram by moving up tree */ - /* used in dnapars */ - node *p, *q; - long n, j; - boolean extra; - node *r, *first =NULL, *last =NULL; - boolean done; - - p = start; - q = start; - extra = false; - if (i == (long)p->ycoord) { - if (p->index - spp >= 10) - fprintf(outfile, " %2ld", p->index - spp); - else - fprintf(outfile, " %ld", p->index - spp); - extra = true; - } else - fprintf(outfile, " "); - do { - if (!p->tip) { - r = p->next; - done = false; - do { - if (i >= r->back->ymin && i <= r->back->ymax) { - q = r->back; - done = true; - } - r = r->next; - } while (!(done || (r == p))); - first = p->next->back; - r = p; - while (r->next != p) - r = r->next; - last = r->back; - } - done = (p->tip || p == q); - n = (long)(scale * (q->xcoord - p->xcoord) + 0.5); - if (n < 3 && !q->tip) - n = 3; - if (extra) { - n--; - extra = false; - } - if ((long)q->ycoord == i && !done) { - if ((long)p->ycoord != (long)q->ycoord) - putc('+', outfile); - else - putc('-', outfile); - if (!q->tip) { - for (j = 1; j <= n - 2; j++) - putc('-', outfile); - if (q->index - spp >= 10) - fprintf(outfile, "%2ld", q->index - spp); - else - fprintf(outfile, "-%ld", q->index - spp); - extra = true; - } else { - for (j = 1; j < n; j++) - putc('-', outfile); - } - } else if (!p->tip) { - if ((long)last->ycoord > i && (long)first->ycoord < i && - (i != (long)p->ycoord || p == start)) { - putc('|', outfile); - for (j = 1; j < n; j++) - putc(' ', outfile); - } else { - for (j = 1; j <= n; j++) - putc(' ', outfile); - } - } else { - for (j = 1; j <= n; j++) - putc(' ', outfile); - } - if (q != p) - p = q; - } while (!done); - if ((long)p->ycoord == i && p->tip) { - for (j = 0; j < nmlngth; j++) - putc(nayme[p->index-1][j], outfile); - } - putc('\n', outfile); -} /* drawline3 */ - - -void copynode(node *c, node *d, long categs) -{ - long i, j; - - for (i = 0; i < endsite; i++) - for (j = 0; j < categs; j++) - memcpy(d->x[i][j], c->x[i][j], sizeof(sitelike)); - memcpy(d->underflows,c->underflows,sizeof(double) * endsite); - d->tyme = c->tyme; - d->v = c->v; - d->xcoord = c->xcoord; - d->ycoord = c->ycoord; - d->ymin = c->ymin; - d->ymax = c->ymax; - d->iter = c->iter; /* iter used in dnaml only */ - d->haslength = c->haslength; /* haslength used in dnamlk only */ - d->initialized = c->initialized; /* initialized used in dnamlk only */ -} /* copynode */ - - -void prot_copynode(node *c, node *d, long categs) -{ - /* a version of copynode for proml */ - long i, j; - - for (i = 0; i < endsite; i++) - for (j = 0; j < categs; j++) - memcpy(d->protx[i][j], c->protx[i][j], sizeof(psitelike)); - memcpy(d->underflows,c->underflows,sizeof(double) * endsite); - d->tyme = c->tyme; - d->v = c->v; - d->xcoord = c->xcoord; - d->ycoord = c->ycoord; - d->ymin = c->ymin; - d->ymax = c->ymax; - d->iter = c->iter; /* iter used in dnaml only */ - d->haslength = c->haslength; /* haslength used in dnamlk only */ - d->initialized = c->initialized; /* initialized used in dnamlk only */ -} /* prot_copynode */ - - -void copy_(tree *a, tree *b, long nonodes, long categs) -{ - /* used in dnamlk */ - long i; - node *p, *q, *r, *s, *t; - - for (i = 0; i < spp; i++) { - copynode(a->nodep[i], b->nodep[i], categs); - if (a->nodep[i]->back) { - if (a->nodep[i]->back == a->nodep[a->nodep[i]->back->index - 1]) - b->nodep[i]->back = b->nodep[a->nodep[i]->back->index - 1]; - else if (a->nodep[i]->back == a->nodep[a->nodep[i]->back->index - 1]->next) - b->nodep[i]->back = b->nodep[a->nodep[i]->back->index - 1]->next; - else - b->nodep[i]->back = b->nodep[a->nodep[i]->back->index - 1]->next->next; - } - else b->nodep[i]->back = NULL; - } - for (i = spp; i < nonodes; i++) { - if (a->nodep[i]) { - p = a->nodep[i]; - q = b->nodep[i]; - r = p; - do { - copynode(p, q, categs); - if (p->back) { - s = a->nodep[p->back->index - 1]; - t = b->nodep[p->back->index - 1]; - if (s->tip) { - if(p->back == s) - q->back = t; - } else { - do { - if (p->back == s) - q->back = t; - s = s->next; - t = t->next; - } while (s != a->nodep[p->back->index - 1]); - } - } - else - q->back = NULL; - p = p->next; - q = q->next; - } while (p != r); - } - } - b->likelihood = a->likelihood; - b->start = a->start; /* start used in dnaml only */ - b->root = a->root; /* root used in dnamlk only */ -} /* copy_ */ - - -void prot_copy_(tree *a, tree *b, long nonodes, long categs) -{ - /* used in promlk */ - /* identical to copy_() except for calls to prot_copynode rather */ - /* than copynode. */ - long i; - node *p, *q, *r, *s, *t; - - for (i = 0; i < spp; i++) { - prot_copynode(a->nodep[i], b->nodep[i], categs); - if (a->nodep[i]->back) { - if (a->nodep[i]->back == a->nodep[a->nodep[i]->back->index - 1]) - b->nodep[i]->back = b->nodep[a->nodep[i]->back->index - 1]; - else if (a->nodep[i]->back == a->nodep[a->nodep[i]->back->index - 1]->next -) - b->nodep[i]->back = b->nodep[a->nodep[i]->back->index - 1]->next; - else - b->nodep[i]->back = b->nodep[a->nodep[i]->back->index - 1]->next->next; - } - else b->nodep[i]->back = NULL; - } - for (i = spp; i < nonodes; i++) { - if (a->nodep[i]) { - p = a->nodep[i]; - q = b->nodep[i]; - r = p; - do { - prot_copynode(p, q, categs); - if (p->back) { - s = a->nodep[p->back->index - 1]; - t = b->nodep[p->back->index - 1]; - if (s->tip) - { - if(p->back == s) - q->back = t; - } else { - do { - if (p->back == s) - q->back = t; - s = s->next; - t = t->next; - } while (s != a->nodep[p->back->index - 1]); - } - } - else - q->back = NULL; - p = p->next; - q = q->next; - } while (p != r); - } - } - b->likelihood = a->likelihood; - b->start = a->start; /* start used in dnaml only */ - b->root = a->root; /* root used in dnamlk only */ -} /* prot_copy_ */ - - -void standev(long chars, long numtrees, long minwhich, double minsteps, - double *nsteps, long **fsteps, longer seed) -{ /* do paired sites test (KHT or SH test) on user-defined trees */ - /* used in dnapars & protpars */ - long i, j, k; - double wt, sumw, sum, sum2, sd; - double temp; - double **covar, *P, *f, *r; - -#define SAMPLES 1000 - if (numtrees == 2) { - fprintf(outfile, "Kishino-Hasegawa-Templeton test\n\n"); - fprintf(outfile, "Tree Steps Diff Steps Its S.D."); - fprintf(outfile, " Significantly worse?\n\n"); - which = 1; - while (which <= numtrees) { - fprintf(outfile, "%3ld%10.1f", which, nsteps[which - 1] / 10); - if (minwhich == which) - fprintf(outfile, " <------ best\n"); - else { - sumw = 0.0; - sum = 0.0; - sum2 = 0.0; - for (i = 0; i < endsite; i++) { - if (weight[i] > 0) { - wt = weight[i] / 10.0; - sumw += wt; - temp = (fsteps[which - 1][i] - fsteps[minwhich - 1][i]) / 10.0; - sum += wt * temp; - sum2 += wt * temp * temp; - } - } - sd = sqrt(sumw / (sumw - 1.0) * (sum2 - sum * sum / sumw)); - fprintf(outfile, "%10.1f%12.4f", - (nsteps[which - 1] - minsteps) / 10, sd); - if ((sum > 0.0) && (sum > 1.95996 * sd)) - fprintf(outfile, " Yes\n"); - else - fprintf(outfile, " No\n"); - } - which++; - } - fprintf(outfile, "\n\n"); - } else { /* Shimodaira-Hasegawa test using normal approximation */ - if(numtrees > MAXSHIMOTREES){ - fprintf(outfile, "Shimodaira-Hasegawa test on first %d of %ld trees\n\n" - , MAXSHIMOTREES, numtrees); - numtrees = MAXSHIMOTREES; - } else { - fprintf(outfile, "Shimodaira-Hasegawa test\n\n"); - } - covar = (double **)Malloc(numtrees*sizeof(double *)); - sumw = 0.0; - for (i = 0; i < endsite; i++) - sumw += weight[i] / 10.0; - for (i = 0; i < numtrees; i++) - covar[i] = (double *)Malloc(numtrees*sizeof(double)); - for (i = 0; i < numtrees; i++) { /* compute covariances of trees */ - sum = nsteps[i]/(10.0*sumw); - for (j = 0; j <=i; j++) { - sum2 = nsteps[j]/(10.0*sumw); - temp = 0.0; - for (k = 0; k < endsite; k++) { - if (weight[k] > 0) { - wt = weight[k]/10.0; - temp = temp + wt*(fsteps[i][k]/10.0-sum) - *(fsteps[j][k]/10.0-sum2); - } - } - covar[i][j] = temp; - if (i != j) - covar[j][i] = temp; - } - } - for (i = 0; i < numtrees; i++) { /* in-place Cholesky decomposition - of trees x trees covariance matrix */ - sum = 0.0; - for (j = 0; j <= i-1; j++) - sum = sum + covar[i][j] * covar[i][j]; - if (covar[i][i] <= sum) - temp = 0.0; - else - temp = sqrt(covar[i][i] - sum); - covar[i][i] = temp; - for (j = i+1; j < numtrees; j++) { - sum = 0.0; - for (k = 0; k < i; k++) - sum = sum + covar[i][k] * covar[j][k]; - if (fabs(temp) < 1.0E-12) - covar[j][i] = 0.0; - else - covar[j][i] = (covar[j][i] - sum)/temp; - } - } - f = (double *)Malloc(numtrees*sizeof(double)); /* resampled sums */ - P = (double *)Malloc(numtrees*sizeof(double)); /* vector of P's of trees */ - r = (double *)Malloc(numtrees*sizeof(double)); /* store Normal variates */ - for (i = 0; i < numtrees; i++) - P[i] = 0.0; - sum2 = nsteps[0]/10.0; /* sum2 will be smallest # of steps */ - for (i = 1; i < numtrees; i++) - if (sum2 > nsteps[i]/10.0) - sum2 = nsteps[i]/10.0; - for (i = 1; i <= SAMPLES; i++) { /* loop over resampled trees */ - for (j = 0; j < numtrees; j++) /* draw Normal variates */ - r[j] = normrand(seed); - for (j = 0; j < numtrees; j++) { /* compute vectors */ - sum = 0.0; - for (k = 0; k <= j; k++) - sum += covar[j][k]*r[k]; - f[j] = sum; - } - sum = f[1]; - for (j = 1; j < numtrees; j++) /* get min of vector */ - if (f[j] < sum) - sum = f[j]; - for (j = 0; j < numtrees; j++) /* accumulate P's */ - if (nsteps[j]/10.0-sum2 <= f[j] - sum) - P[j] += 1.0/SAMPLES; - } - fprintf(outfile, "Tree Steps Diff Steps P value"); - fprintf(outfile, " Significantly worse?\n\n"); - for (i = 0; i < numtrees; i++) { - fprintf(outfile, "%3ld%10.1f", i+1, nsteps[i]/10); - if ((minwhich-1) == i) - fprintf(outfile, " <------ best\n"); - else { - fprintf(outfile, " %9.1f %10.3f", nsteps[i]/10.0-sum2, P[i]); - if (P[i] < 0.05) - fprintf(outfile, " Yes\n"); - else - fprintf(outfile, " No\n"); - } - } - fprintf(outfile, "\n"); - free(P); /* free the variables we Malloc'ed */ - free(f); - free(r); - for (i = 0; i < numtrees; i++) - free(covar[i]); - free(covar); - } -} /* standev */ - - -void standev2(long numtrees, long maxwhich, long a, long b, double maxlogl, - double *l0gl, double **l0gf, steptr aliasweight, longer seed) -{ /* do paired sites test (KHT or SH) for user-defined trees */ - /* used in dnaml, dnamlk, proml, promlk, and restml */ - double **covar, *P, *f, *r; - long i, j, k; - double wt, sumw, sum, sum2, sd; - double temp; - -#define SAMPLES 1000 - if (numtrees == 2) { - fprintf(outfile, "Kishino-Hasegawa-Templeton test\n\n"); - fprintf(outfile, "Tree logL Diff logL Its S.D."); - fprintf(outfile, " Significantly worse?\n\n"); - which = 1; - while (which <= numtrees) { - fprintf(outfile, "%3ld %9.1f", which, l0gl[which - 1]); - if (maxwhich == which) - fprintf(outfile, " <------ best\n"); - else { - sumw = 0.0; - sum = 0.0; - sum2 = 0.0; - for (i = a; i <= b; i++) { - if (aliasweight[i] > 0) { - wt = aliasweight[i]; - sumw += wt; - temp = l0gf[which - 1][i] - l0gf[maxwhich - 1][i]; - sum += temp * wt; - sum2 += wt * temp * temp; - } - } - temp = sum / sumw; - sd = sqrt(sumw / (sumw - 1.0) * (sum2 - sum * sum / sumw )); - fprintf(outfile, "%10.1f %11.4f", (l0gl[which - 1])-maxlogl, sd); - if ((sum < 0.0) && ((-sum) > 1.95996 * sd)) - fprintf(outfile, " Yes\n"); - else - fprintf(outfile, " No\n"); - } - which++; - } - fprintf(outfile, "\n\n"); - } else { /* Shimodaira-Hasegawa test using normal approximation */ - if(numtrees > MAXSHIMOTREES){ - fprintf(outfile, "Shimodaira-Hasegawa test on first %d of %ld trees\n\n" - , MAXSHIMOTREES, numtrees); - numtrees = MAXSHIMOTREES; - } else { - fprintf(outfile, "Shimodaira-Hasegawa test\n\n"); - } - covar = (double **)Malloc(numtrees*sizeof(double *)); - sumw = 0.0; - for (i = a; i <= b; i++) - sumw += aliasweight[i]; - for (i = 0; i < numtrees; i++) - covar[i] = (double *)Malloc(numtrees*sizeof(double)); - for (i = 0; i < numtrees; i++) { /* compute covariances of trees */ - sum = l0gl[i]/sumw; - for (j = 0; j <=i; j++) { - sum2 = l0gl[j]/sumw; - temp = 0.0; - for (k = a; k <= b ; k++) { - if (aliasweight[k] > 0) { - wt = aliasweight[k]; - temp = temp + wt*(l0gf[i][k]-sum) - *(l0gf[j][k]-sum2); - } - } - covar[i][j] = temp; - if (i != j) - covar[j][i] = temp; - } - } - for (i = 0; i < numtrees; i++) { /* in-place Cholesky decomposition - of trees x trees covariance matrix */ - sum = 0.0; - for (j = 0; j <= i-1; j++) - sum = sum + covar[i][j] * covar[i][j]; - if (covar[i][i] <= sum) - temp = 0.0; - else - temp = sqrt(covar[i][i] - sum); - covar[i][i] = temp; - for (j = i+1; j < numtrees; j++) { - sum = 0.0; - for (k = 0; k < i; k++) - sum = sum + covar[i][k] * covar[j][k]; - if (fabs(temp) < 1.0E-12) - covar[j][i] = 0.0; - else - covar[j][i] = (covar[j][i] - sum)/temp; - } - } - f = (double *)Malloc(numtrees*sizeof(double)); /* resampled likelihoods */ - P = (double *)Malloc(numtrees*sizeof(double)); /* vector of P's of trees */ - r = (double *)Malloc(numtrees*sizeof(double)); /* store Normal variates */ - for (i = 0; i < numtrees; i++) - P[i] = 0.0; - for (i = 1; i <= SAMPLES; i++) { /* loop over resampled trees */ - for (j = 0; j < numtrees; j++) /* draw Normal variates */ - r[j] = normrand(seed); - for (j = 0; j < numtrees; j++) { /* compute vectors */ - sum = 0.0; - for (k = 0; k <= j; k++) - sum += covar[j][k]*r[k]; - f[j] = sum; - } - sum = f[1]; - for (j = 1; j < numtrees; j++) /* get max of vector */ - if (f[j] > sum) - sum = f[j]; - for (j = 0; j < numtrees; j++) /* accumulate P's */ - if (maxlogl-l0gl[j] <= sum-f[j]) - P[j] += 1.0/SAMPLES; - } - fprintf(outfile, "Tree logL Diff logL P value"); - fprintf(outfile, " Significantly worse?\n\n"); - for (i = 0; i < numtrees; i++) { - fprintf(outfile, "%3ld%10.1f", i+1, l0gl[i]); - if ((maxwhich-1) == i) - fprintf(outfile, " <------ best\n"); - else { - fprintf(outfile, " %9.1f %10.3f", l0gl[i]-maxlogl, P[i]); - if (P[i] < 0.05) - fprintf(outfile, " Yes\n"); - else - fprintf(outfile, " No\n"); - } - } - fprintf(outfile, "\n"); - free(P); /* free the variables we Malloc'ed */ - free(f); - free(r); - for (i = 0; i < numtrees; i++) - free(covar[i]); - free(covar); - } -} /* standev */ - - -void freetip(node *anode) -{ - /* used in dnacomp, dnapars, & dnapenny */ - - free(anode->numsteps); - free(anode->oldnumsteps); - free(anode->base); - free(anode->oldbase); -} /* freetip */ - - -void freenontip(node *anode) -{ - /* used in dnacomp, dnapars, & dnapenny */ - - free(anode->numsteps); - free(anode->oldnumsteps); - free(anode->base); - free(anode->oldbase); - free(anode->numnuc); -} /* freenontip */ - - -void freenodes(long nonodes, pointarray treenode) -{ - /* used in dnacomp, dnapars, & dnapenny */ - long i; - node *p; - - for (i = 0; i < spp; i++) - freetip(treenode[i]); - for (i = spp; i < nonodes; i++) { - if (treenode[i] != NULL) { - p = treenode[i]->next; - do { - freenontip(p); - p = p->next; - } while (p != treenode[i]); - freenontip(p); - } - } -} /* freenodes */ - - -void freenode(node **anode) -{ - /* used in dnacomp, dnapars, & dnapenny */ - - freenontip(*anode); - free(*anode); -} /* freenode */ - - -void freetree(long nonodes, pointarray treenode) -{ - /* used in dnacomp, dnapars, & dnapenny */ - long i; - node *p, *q; - - for (i = 0; i < spp; i++) - free(treenode[i]); - for (i = spp; i < nonodes; i++) { - if (treenode[i] != NULL) { - p = treenode[i]->next; - do { - q = p->next; - free(p); - p = q; - } while (p != treenode[i]); - free(p); - } - } - free(treenode); -} /* freetree */ - - -void prot_freex_notip(long nonodes, pointarray treenode) -{ - /* used in proml */ - long i, j; - node *p; - - for (i = spp; i < nonodes; i++) { - p = treenode[i]; - if ( p == NULL ) continue; - do { - for (j = 0; j < endsite; j++){ - free(p->protx[j]); - p->protx[j] = NULL; - } - free(p->protx); - p->protx = NULL; - p = p->next; - } while (p != treenode[i]); - } -} /* prot_freex_notip */ - - -void prot_freex(long nonodes, pointarray treenode) -{ - /* used in proml */ - long i, j; - node *p; - - for (i = 0; i < spp; i++) { - for (j = 0; j < endsite; j++) - free(treenode[i]->protx[j]); - free(treenode[i]->protx); - } - for (i = spp; i < nonodes; i++) { - p = treenode[i]; - do { - for (j = 0; j < endsite; j++) - free(p->protx[j]); - free(p->protx); - p = p->next; - } while (p != treenode[i]); - } -} /* prot_freex */ - - -void freex_notip(long nonodes, pointarray treenode) -{ - /* used in dnaml & dnamlk */ - long i, j; - node *p; - - for (i = spp; i < nonodes; i++) { - p = treenode[i]; - if ( p == NULL ) continue; - do { - for (j = 0; j < endsite; j++) - free(p->x[j]); - free(p->x); - p = p->next; - } while (p != treenode[i]); - } -} /* freex_notip */ - - -void freex(long nonodes, pointarray treenode) -{ - /* used in dnaml & dnamlk */ - long i, j; - node *p; - - for (i = 0; i < spp; i++) { - for (j = 0; j < endsite; j++) - free(treenode[i]->x[j]); - free(treenode[i]->x); - } - for (i = spp; i < nonodes; i++) { - if(treenode[i]){ - p = treenode[i]; - do { - for (j = 0; j < endsite; j++) - free(p->x[j]); - free(p->x); - p = p->next; - } while (p != treenode[i]); - } - } -} /* freex */ - - -void freex2(long nonodes, pointarray treenode) -{ - /* used in restml */ - long i, j; - node *p; - - for (i = 0; i < spp; i++) - free(treenode[i]->x2); - for (i = spp; i < nonodes; i++) { - p = treenode[i]; - for (j = 1; j <= 3; j++) { - free(p->x2); - p = p->next; - } - } -} /* freex2 */ - - -void freegarbage(gbases **garbage) -{ - /* used in dnacomp, dnapars, & dnapenny */ - gbases *p; - - while (*garbage) { - p = *garbage; - *garbage = (*garbage)->next; - free(p->base); - free(p); - } -} /*freegarbage */ - - -void freegrbg(node **grbg) -{ - /* used in dnacomp, dnapars, & dnapenny */ - node *p; - - while (*grbg) { - p = *grbg; - *grbg = (*grbg)->next; - freenontip(p); - free(p); - } -} /*freegrbg */ - - -void collapsetree(node *p, node *root, node **grbg, pointarray treenode, - long *zeros) -{ - /* Recurse through tree searching for zero length brances between */ - /* nodes (not to tips). If one exists, collapse the nodes together, */ - /* removing the branch. */ - node *q, *x1, *y1, *x2, *y2; - long i, j, index, index2, numd; - if (p->tip) - return; - q = p->next; - do { - if (!q->back->tip && q->v == 0.000000) { - /* merge the two nodes. */ - x1 = y2 = q->next; - x2 = y1 = q->back->next; - while(x1->next != q) - x1 = x1-> next; - while(y1->next != q->back) - y1 = y1-> next; - x1->next = x2; - y1->next = y2; - - index = q->index; - index2 = q->back->index; - numd = treenode[index-1]->numdesc + q->back->numdesc -1; - chucktreenode(grbg, q->back); - chucktreenode(grbg, q); - q = x2; - - /* update the indicies around the node circle */ - do{ - if(q->index != index){ - q->index = index; - } - q = q-> next; - }while(x2 != q); - updatenumdesc(treenode[index-1], root, numd); - - /* Alter treenode to point to real nodes, and update indicies */ - /* acordingly. */ - j = 0; i=0; - for(i = (index2-1); i < nonodes-1 && treenode[i+1]; i++){ - treenode[i]=treenode[i+1]; - treenode[i+1] = NULL; - x1=x2=treenode[i]; - do{ - x1->index = i+1; - x1 = x1 -> next; - } while(x1 != x2); - } - - /* Create a new empty fork in the blank spot of treenode */ - x1=NULL; - for(i=1; i <=3 ; i++){ - gnutreenode(grbg, &x2, index2, endsite, zeros); - x2->next = x1; - x1 = x2; - } - x2->next->next->next = x2; - treenode[nonodes-1]=x2; - if (q->back) - collapsetree(q->back, root, grbg, treenode, zeros); - } else { - if (q->back) - collapsetree(q->back, root, grbg, treenode, zeros); - q = q->next; - } - } while (q != p); -} /* collapsetree */ - - -void collapsebestrees(node **root, node **grbg, pointarray treenode, - bestelm *bestrees, long *place, long *zeros, - long chars, boolean recompute, boolean progress) - -{ - /* Goes through all best trees, collapsing trees where possible, and */ - /* deleting trees that are not unique. */ - long i,j, k, pos, nextnode, oldnextree; - boolean found; - node *dummy; - - oldnextree = nextree; - for(i = 0 ; i < (oldnextree - 1) ; i++){ - bestrees[i].collapse = true; - } - - if(progress) - printf("Collapsing best trees\n "); - k = 0; - for(i = 0 ; i < (oldnextree - 1) ; i++){ - if(progress){ - if(i % (((oldnextree-1) / 72) + 1) == 0) - putchar('.'); - fflush(stdout); - } - while(!bestrees[k].collapse) - k++; - /* Reconstruct tree. */ - *root = treenode[0]; - add(treenode[0], treenode[1], treenode[spp], root, recompute, - treenode, grbg, zeros); - nextnode = spp + 2; - for (j = 3; j <= spp; j++) { - if (bestrees[k].btree[j - 1] > 0) - add(treenode[bestrees[k].btree[j - 1] - 1], treenode[j - 1], - treenode[nextnode++ - 1], root, recompute, treenode, grbg, - zeros); - else - add(treenode[treenode[-bestrees[k].btree[j - 1]-1]->back->index-1], - treenode[j - 1], NULL, root, recompute, treenode, grbg, zeros); - } - reroot(treenode[outgrno - 1], *root); - - treelength(*root, chars, treenode); - collapsetree(*root, *root, grbg, treenode, zeros); - savetree(*root, place, treenode, grbg, zeros); - /* move everything down in the bestree list */ - for(j = k ; j < (nextree - 2) ; j++){ - memcpy(bestrees[j].btree, bestrees[j + 1].btree, spp * sizeof(long)); - bestrees[j].gloreange = bestrees[j + 1].gloreange; - bestrees[j + 1].gloreange = false; - bestrees[j].locreange = bestrees[j + 1].locreange; - bestrees[j + 1].locreange = false; - bestrees[j].collapse = bestrees[j + 1].collapse; - } - pos=0; - findtree(&found, &pos, nextree-1, place, bestrees); - - /* put the new tree at the end of the list if it wasn't found */ - nextree--; - if(!found) - addtree(pos, &nextree, false, place, bestrees); - - /* Deconstruct the tree */ - for (j = 1; j < spp; j++){ - re_move(treenode[j], &dummy, root, recompute, treenode, - grbg, zeros); - } - } - if (progress) { - putchar('\n'); -#ifdef WIN32 - phyFillScreenColor(); -#endif - } -} diff --git a/forester/archive/RIO/others/phylip_mod/src/seq.h b/forester/archive/RIO/others/phylip_mod/src/seq.h deleted file mode 100644 index e70a07a..0000000 --- a/forester/archive/RIO/others/phylip_mod/src/seq.h +++ /dev/null @@ -1,216 +0,0 @@ -/*Modified by Christian Zmasek. Use at your own risk.*/ - -/* version 3.6. (c) Copyright 1993-2000 by the University of Washington. - Written by Joseph Felsenstein, Akiko Fuseki, Sean Lamont, and Andrew Keeffe. - Permission is granted to copy and use this program provided no fee is - charged for it and provided that this copyright notice is not removed. */ - -/* - seq.h: included in dnacomp, dnadist, dnainvar, dnaml, dnamlk, dnamove, - dnapars, dnapenny, protdist, protpars, & restml -*/ - -/* move */ -/* All the below moved here in the Great TreeRead Migration of '96 */ - -#define ebcdic EBCDIC -#define MAXNCH 26 /*changed from to 20 to 26 by CZ 2006-07-28 */ - -/* All of this came over from cons.h -plc*/ -#define OVER 7 -#define ADJACENT_PAIRS 1 -#define CORR_IN_1_AND_2 2 -#define ALL_IN_1_AND_2 3 -#define NO_PAIRING 4 -#define ALL_IN_FIRST 5 -#define TREE1 8 -#define TREE2 9 - -#define FULL_MATRIX 11 -#define VERBOSE 22 -#define SPARSE 33 - -/* Used in proml, promlk, dnaml, dnamlk for undefined bestyet*/ -#define UNDEFINED 1.0 - - -/* Number of columns per block in a matrix output */ -#define COLUMNS_PER_BLOCK 10 - - -/*end move*/ - - -typedef struct gbases { - baseptr base; - struct gbases *next; -} gbases; - -typedef struct nuview_data { - /* A big 'ol collection of pointers used in nuview */ - double *yy, *wwzz, *vvzz, *vzsumr, *vzsumy, *sum, *sumr, *sumy; - sitelike *xx; -} nuview_data; - -struct LOC_hyptrav { - boolean bottom; - node *r; - long *hypset; - boolean maybe, nonzero; - long tempset, anc; -} ; - - -extern long nonodes, endsite, outgrno, nextree, which; - -extern boolean interleaved, printdata, outgropt, treeprint, dotdiff, transvp; -extern steptr weight, category, alias, location, ally; -extern sequence y; - -#ifndef OLDC -/* function prototypes */ -void alloctemp(node **, long *, long); -void freetemp(node **); -void freetree2 (pointarray, long); -void inputdata(long); -void alloctree(pointarray *, long, boolean); -void allocx(long, long, pointarray, boolean); - -void prot_allocx(long, long, pointarray, boolean); -void allocx2(long, long, long, pointarray, boolean); -void setuptree(pointarray, long, boolean); -void setuptree2(tree); -void alloctip(node *, long *); -void freetrans(transptr *, long ,long ); -void getbasefreqs(double, double, double, double, double *, double *, - double *, double *, double *, double *, double *, - double *xi, double *, double *, boolean, boolean); -void empiricalfreqs(double *,double *,double *,double *,steptr,pointarray); -void sitesort(long, steptr); -void sitecombine(long); - -void sitescrunch(long); -void sitesort2(long, steptr); -void sitecombine2(long, steptr); -void sitescrunch2(long, long, long, steptr); -void makevalues(pointarray, long *, boolean); -void makevalues2(long, pointarray, long, long, sequence, steptr); -void fillin(node *, node *, node *); -long getlargest(long *); -void multifillin(node *, node *, long); -void sumnsteps(node *, node *, node *, long, long); - -void sumnsteps2(node *, node *, node *, long, long, long *); -void multisumnsteps(node *, node *, long, long, long *); -void multisumnsteps2(node *); -boolean alltips(node *, node *); -void gdispose(node *, node **, pointarray); -void preorder(node *, node *, node *, node *, node *, node *, long); -void updatenumdesc(node *, node *, long); -void add(node *,node *,node *,node **,boolean,pointarray,node **,long *); -void findbelow(node **below, node *item, node *fork); - -void re_move(node *item, node **fork, node **root, boolean recompute, - pointarray, node **, long *); -void postorder(node *p); -void getnufork(node **, node **, pointarray, long *); -void reroot(node *, node *); -void reroot2(node *, node *); -void reroot3(node *, node *, node *, node *, node **); -void savetraverse(node *); -void newindex(long, node *); -void flipindexes(long, pointarray); -boolean parentinmulti(node *); - -long sibsvisited(node *, long *); -long smallest(node *, long *); -void bintomulti(node **, node **, node **, long *); -void backtobinary(node **, node *, node **); -boolean outgrin(node *, node *); -void flipnodes(node *, node *); -void moveleft(node *, node *, node **); -void savetree(node *, long *, pointarray, node **, long *); -void addnsave(node *, node *, node *, node **, node **,boolean, - pointarray, long *, long *); -void addbestever(long *, long *, long, boolean, long *, bestelm *); - -void addtiedtree(long, long *, long, boolean,long *, bestelm *); -void clearcollapse(pointarray); -void clearbottom(pointarray); -void collabranch(node *, node *, node *); -boolean allcommonbases(node *, node *, boolean *); -void findbottom(node *, node **); -boolean moresteps(node *, node *); -boolean passdown(node *, node *, node *, node *, node *, node *, - node *, node *, node *, boolean); -boolean trycollapdesc(node *, node *, node *, node *, node *, - node *, node *, node *, node *, boolean , long *); -void setbottom(node *); - -boolean zeroinsubtree(node *, node *, node *, node *, node *, - node *, node *, node *, boolean, node *, long *); -boolean collapsible(node *, node *, node *, node *, node *, - node *, node *, node *, boolean, node *, long *, pointarray); -void replaceback(node **, node *, node *, node **, long *); -void putback(node *, node *, node *, node **); -void savelocrearr(node *, node *, node *, node *, node *, node *, - node *, node *, node *, node **, long, long *, boolean, - boolean , boolean *, long *, bestelm *, pointarray , - node **, long *); -void clearvisited(pointarray); -void hyprint(long, long, struct LOC_hyptrav *,pointarray, Char *); -void gnubase(gbases **, gbases **, long); -void chuckbase(gbases *, gbases **); -void hyptrav(node *, long *, long, long, boolean,pointarray, - gbases **, Char *); - -void hypstates(long , node *, pointarray, gbases **, Char *); -void initbranchlen(node *p); -void initmin(node *, long, boolean); -void initbase(node *, long); -void inittreetrav(node *, long); -void compmin(node *, node *); -void minpostorder(node *, pointarray); -void branchlength(node *,node *,double *,pointarray); -void printbranchlengths(node *); -void branchlentrav(node *,node *,long,long,double *,pointarray); - -void treelength(node *, long, pointarray); -void coordinates(node *, long *, double, long *); -void drawline(long, double, node *); -void printree(node *, double); -void writesteps(long, boolean, steptr, node *); -void treeout(node *, long, long *, node *); -void treeout3(node *, long, long *, node *); -void drawline2(long, double, tree); -void drawline3(long, double, node *); -void copynode(node *, node *, long); - -void prot_copynode(node *, node *, long); -void copy_(tree *, tree *, long, long); -void prot_copy_(tree *, tree *, long, long); -void standev(long, long, long, double, double *, long **, longer); -void standev2(long, long, long, long, double, double *, double **, - steptr, longer); -void freetip(node *); -void freenontip(node *); -void freenodes(long, pointarray); -void freenode(node **); -void freetree(long, pointarray); - -void freex(long, pointarray); -void freex_notip(long, pointarray); -void freex2(long, pointarray); -void prot_freex_notip(long nonodes, pointarray treenode); -void prot_freex(long nonodes, pointarray treenode); -void freegarbage(gbases **); -void freegrbg(node **); - -void collapsetree(node *, node *, node **, pointarray, long *); -void collapsebestrees(node **, node **, pointarray, bestelm *, long *, - long *, long, boolean, boolean); -void fix_x(node* p,long site, double maxx, long rcategs); -void fix_protx(node* p,long site,double maxx, long rcategs); -/*function prototypes*/ -#endif - diff --git a/forester/archive/RIO/others/phylip_mod/src/seqboot.c b/forester/archive/RIO/others/phylip_mod/src/seqboot.c deleted file mode 100644 index c5b9c29..0000000 --- a/forester/archive/RIO/others/phylip_mod/src/seqboot.c +++ /dev/null @@ -1,1419 +0,0 @@ -#include "phylip.h" -#include "seq.h" - -/* version 3.6. (c) Copyright 1993-2005 by the University of Washington. - Written by Joseph Felsenstein, Akiko Fuseki, Sean Lamont, Andrew Keeffe, - and Doug Buxton. - Permission is granted to copy and use this program provided no fee is - charged for it and provided that this copyright notice is not removed. */ - -typedef enum { - seqs, morphology, restsites, genefreqs -} datatype; - -typedef enum { - dna, rna, protein -} seqtype; - - -#ifndef OLDC -/* function prototypes */ -void getoptions(void); -void seqboot_inputnumbers(void); -void seqboot_inputfactors(void); -void inputoptions(void); -void seqboot_inputdata(void); -void allocrest(void); -void allocnew(void); -void doinput(int argc, Char *argv[]); -void bootweights(void); -void sppermute(long); -void charpermute(long, long); -void writedata(void); -void writeweights(void); -void writecategories(void); -void writeauxdata(steptr, FILE*); -void writefactors(void); -void bootwrite(void); -void seqboot_inputaux(steptr, FILE*); -/* function prototypes */ -#endif - - -FILE *outcatfile, *outweightfile, *outmixfile, *outancfile, *outfactfile; -Char infilename[FNMLNGTH], outfilename[FNMLNGTH], catfilename[FNMLNGTH], outcatfilename[FNMLNGTH], - weightfilename[FNMLNGTH], outweightfilename[FNMLNGTH], mixfilename[FNMLNGTH], outmixfilename[FNMLNGTH], ancfilename[FNMLNGTH], outancfilename[FNMLNGTH], - factfilename[FNMLNGTH], outfactfilename[FNMLNGTH]; -long sites, loci, maxalleles, groups, newsites, newersites, - newgroups, newergroups, nenzymes, reps, ws, blocksize, categs, maxnewsites; -boolean bootstrap, permute, ild, lockhart, jackknife, regular, xml, nexus, - weights, categories, factors, enzymes, all, justwts, progress, mixture, - firstrep, ancvar; -double fracsample; -datatype data; -seqtype seq; -steptr oldweight, where, how_many, newwhere, newhowmany, - newerwhere, newerhowmany, factorr, newerfactor, mixdata, ancdata; -steptr *charorder; -Char *factor; -long *alleles; -Char **nodep; -double **nodef; -long **sppord; -longer seed; - - -void getoptions() -{ - /* interactively set options */ - long reps0; - long inseed, inseed0, loopcount, loopcount2; - Char ch; - boolean done1; - - data = seqs; - seq = dna; - bootstrap = true; - jackknife = false; - permute = false; - ild = false; - lockhart = false; - blocksize = 1; - regular = true; - fracsample = 1.0; - all = false; - reps = 100; - weights = false; - mixture = false; - ancvar = false; - categories = false; - justwts = false; - printdata = false; - dotdiff = true; - progress = true; - interleaved = true; - xml = false; - nexus = false; - factors = false; - loopcount = 0; - for (;;) { - cleerhome(); - printf("\nBootstrapping algorithm, version %s\n\n",VERSION); - printf("Settings for this run:\n"); - printf(" D Sequence, Morph, Rest., Gene Freqs? %s\n", - (data == seqs ) ? "Molecular sequences" : - (data == morphology ) ? "Discrete Morphology" : - (data == restsites) ? "Restriction Sites" : - (data == genefreqs) ? "Gene Frequencies" : ""); - if (data == restsites) - printf(" E Number of enzymes? %s\n", - enzymes ? "Present in input file" : - "Not present in input file"); - if (data == genefreqs) - printf(" A All alleles present at each locus? %s\n", - all ? "Yes" : "No, one absent at each locus"); - if (data == morphology) - printf(" F Use factors information? %s\n", - factors ? "Yes" : "No"); - - printf(" J Bootstrap, Jackknife, Permute, Rewrite? %s\n", - regular && jackknife ? "Delete-half jackknife" : - (!regular) && jackknife ? "Delete-fraction jackknife" : - permute ? "Permute species for each character" : - ild ? "Permute character order" : - lockhart ? "Permute within species" : - regular && bootstrap ? "Bootstrap" : - (!regular) && bootstrap ? "Partial bootstrap" : - "Rewrite data"); - if (bootstrap || jackknife) { - printf(" "); - putchar('%'); - printf(" Regular or altered sampling fraction? "); - if (regular) - printf("regular\n"); - else { - if (fabs(fracsample*100 - (int)(fracsample*100)) > 0.01) { - if (fracsample < 1) - printf("%2.1lf", 100.0*fracsample); - else - printf("%3.1lf", 100.0*fracsample); - } else { if (fracsample < 1) - printf("%2.0lf", 100.0*fracsample); - else - printf("%3.0lf", 100.0*fracsample); - } - putchar('%'); - printf(" sampled\n"); - } - } - if ((data == seqs) - && !(jackknife || permute || bootstrap || ild || lockhart)) { - printf(" P PHYLIP, NEXUS, or XML output format? %s\n", - nexus ? "NEXUS" : xml ? "XML" : "PHYLIP"); - if (xml || ((data == seqs) && nexus)) { - printf(" S Type of molecular sequences? " ); - switch (seq) { - case (dna) : printf("DNA\n"); break; - case (rna) : printf("RNA\n"); break; - case (protein) : printf("Protein\n"); break; - } - } - } - if ((data == morphology) && !(jackknife || permute || ild - || lockhart || bootstrap)) - printf(" P PHYLIP or NEXUS output format? %s\n", - nexus ? "NEXUS" : "PHYLIP"); - if (bootstrap) { - if (blocksize > 1) - printf(" B Block size for block-bootstrapping? %ld\n", blocksize); - else - printf(" B Block size for block-bootstrapping? %ld (regular bootstrap)\n", blocksize); - } - if (bootstrap || jackknife || permute || ild || lockhart) - printf(" R How many replicates? %ld\n", reps); - if (jackknife || bootstrap || permute) { - printf(" W Read weights of characters? %s\n", - (weights ? "Yes" : "No")); - if(data == morphology){ - printf(" X Read mixture file? %s\n", - (mixture ? "Yes" : "No")); - printf(" N Read ancestors file? %s\n", - (ancvar ? "Yes" : "No")); - } - if (data == seqs) - printf(" C Read categories of sites? %s\n", - (categories ? "Yes" : "No")); - if ((!permute)) { - printf(" S Write out data sets or just weights? %s\n", - (justwts ? "Just weights" : "Data sets")); - } - } - if (data == seqs || data == restsites) - printf(" I Input sequences interleaved? %s\n", - interleaved ? "Yes" : "No, sequential"); - printf(" 0 Terminal type (IBM PC, ANSI, none)? %s\n", - ibmpc ? "IBM PC" : ansi ? "ANSI" : "(none)"); - printf(" 1 Print out the data at start of run %s\n", - printdata ? "Yes" : "No"); - if (printdata) - printf(" . Use dot-differencing to display them %s\n", - dotdiff ? "Yes" : "No"); - printf(" 2 Print indications of progress of run %s\n", - progress ? "Yes" : "No"); - printf("\n Y to accept these or type the letter for one to change\n"); -#ifdef WIN32 - phyFillScreenColor(); -#endif - scanf("%c%*[^\n]", &ch); - getchar(); - uppercase(&ch); - if (ch == 'Y') - break; - if ( - (bootstrap && (strchr("ABCDEFSJPRWXNI%1.20",ch) != NULL)) || - (jackknife && (strchr("ACDEFSJPRWXNI%1.20",ch) != NULL)) || - ((permute || ild || lockhart) - && (strchr("ACDEFSJPRXNI%1.20",ch) != NULL)) || - ((!(bootstrap || jackknife || permute || ild || lockhart)) && - ((!xml) && (strchr("ADEFJPI1.20",ch) != NULL))) || - (((data == morphology) || (data == seqs)) - && (nexus || xml) && (strchr("ADEFJPSI1.20",ch) != NULL)) - ) { - switch (ch) { - - case 'D': - if (data == genefreqs) - data = seqs; - else - data = (datatype)((long)data + 1); - break; - - case 'A': - all = !all; - break; - - case 'E': - enzymes = !enzymes; - break; - - case 'J': - if (permute) { - permute = false; - ild = true; - } else if (ild) { - ild = false; - lockhart = true; - } else if (lockhart) - lockhart = false; - else if (jackknife) { - jackknife = false; - permute = true; - } else if (bootstrap) { - bootstrap = false; - jackknife = true; - } else - bootstrap = true; - break; - - case '%': - regular = !regular; - if (!regular) { - loopcount2 = 0; - do { - printf("Samples as percentage of"); - if ((data == seqs) || (data == restsites)) - printf(" sites?\n"); - if (data == morphology) - printf(" characters?\n"); - if (data == genefreqs) - printf(" loci?\n"); - scanf("%lf%*[^\n]", &fracsample); - getchar(); - done1 = (fracsample > 0.0); - if (!done1) { - printf("BAD NUMBER: must be positive\n"); - } - fracsample = fracsample/100.0; - countup(&loopcount2, 10); - } while (done1 != true); - } - break; - - case 'P': - if (data == seqs) { - if (!xml && !nexus) - nexus = true; - else { - if (nexus) { - nexus = false; - xml = true; - } - else xml = false; - } - } - if (data == morphology) { - nexus = !nexus; - xml = false; - } - break; - - case 'S': - if(jackknife || permute || bootstrap || ild || lockhart){ - justwts = !justwts; - } else { - switch (seq) { - case (dna): seq = rna; break; - case (rna): seq = protein; break; - case (protein): seq = dna; break; - } - } - break; - - case 'B': - loopcount2 = 0; - do { - printf("Block size?\n"); -#ifdef WIN32 - phyFillScreenColor(); -#endif - scanf("%ld%*[^\n]", &blocksize); - getchar(); - done1 = (blocksize > 0); - if (!done1) { - printf("BAD NUMBER: must be positive\n"); - } - countup(&loopcount2, 10); - } while (done1 != true); - break; - - case 'R': - reps0 = reps; - loopcount2 = 0; - do { - printf("Number of replicates?\n"); -#ifdef WIN32 - phyFillScreenColor(); -#endif - scanf("%ld%*[^\n]", &reps); - getchar(); - done1 = (reps > 0); - if (!done1) { - printf("BAD NUMBER: must be positive\n"); - reps = reps0; - } - countup(&loopcount2, 10); - } while (done1 != true); - break; - - case 'W': - weights = !weights; - break; - - case 'X': - mixture = !mixture; - break; - - case 'N': - ancvar = !ancvar; - break; - - case 'C': - categories = !categories; - break; - - case 'F': - factors = !factors; - break; - - case 'I': - interleaved = !interleaved; - break; - - case '0': - initterminal(&ibmpc, &ansi); - break; - - case '1': - printdata = !printdata; - break; - - case '.': - dotdiff = !dotdiff; - break; - - case '2': - progress = !progress; - break; - } - } else - printf("Not a possible option!\n"); - countup(&loopcount, 100); - } - if (bootstrap || jackknife) { - if (jackknife && regular) - fracsample = 0.5; - if (bootstrap && regular) - fracsample = 1.0; - } - if (bootstrap || jackknife || permute || ild || lockhart) - initseed(&inseed, &inseed0, seed); - xml = xml && (data == seqs); - categories = categories && (data == seqs); - mixture = mixture && (data == morphology); - ancvar = ancvar && (data == morphology); -} /* getoptions */ - - -void seqboot_inputnumbers() -{ - /* read numbers of species and of sites */ - long i; - - fscanf(infile, "%ld%ld", &spp, &sites); - loci = sites; - maxalleles = 1; - if (data == restsites && enzymes) - fscanf(infile, "%ld", &nenzymes); - if (data == genefreqs) { - alleles = (long *)Malloc(sites*sizeof(long)); - scan_eoln(infile); - sites = 0; - for (i = 0; i < (loci); i++) { - if (eoln(infile)) - scan_eoln(infile); - fscanf(infile, "%ld", &alleles[i]); - if (alleles[i] > maxalleles) - maxalleles = alleles[i]; - if (all) - sites += alleles[i]; - else - sites += alleles[i] - 1; - } - if (!all) - maxalleles--; - scan_eoln(infile); - } -} /* seqboot_inputnumbers */ - - -void seqboot_inputfactors() -{ - long i, j; - Char ch, prevch; - - prevch = ' '; - j = 0; - for (i = 0; i < (sites); i++) { - do { - if (eoln(factfile)) - scan_eoln(factfile); - ch = gettc(factfile); - } while (ch == ' '); - if (ch != prevch) - j++; - prevch = ch; - factorr[i] = j; - } - scan_eoln(factfile); -} /* seqboot_inputfactors */ - - -void inputoptions() -{ - /* input the information on the options */ - long weightsum, maxfactsize, i, j, k, l, m; - - if (data == genefreqs) { - k = 0; - l = 0; - for (i = 0; i < (loci); i++) { - if (all) - m = alleles[i]; - else - m = alleles[i] - 1; - k++; - for (j = 1; j <= m; j++) { - l++; - factorr[l - 1] = k; - } - } - } else { - for (i = 1; i <= (sites); i++) - factorr[i - 1] = i; - } - if(factors){ - seqboot_inputfactors(); - } - for (i = 0; i < (sites); i++) - oldweight[i] = 1; - if (weights) - inputweights2(0, sites, &weightsum, oldweight, &weights, "seqboot"); - if (factors && printdata) { - for(i = 0; i < sites; i++) - factor[i] = (char)('0' + (factorr[i]%10)); - printfactors(outfile, sites, factor, " (least significant digit)"); - } - if (weights && printdata) - printweights(outfile, 0, sites, oldweight, "Sites"); - for (i = 0; i < (loci); i++) - how_many[i] = 0; - for (i = 0; i < (loci); i++) - where[i] = 0; - for (i = 1; i <= (sites); i++) { - how_many[factorr[i - 1] - 1]++; - if (where[factorr[i - 1] - 1] == 0) - where[factorr[i - 1] - 1] = i; - } - groups = factorr[sites - 1]; - newgroups = 0; - newsites = 0; - maxfactsize = 0; - for(i = 0 ; i < loci ; i++){ - if(how_many[i] > maxfactsize){ - maxfactsize = how_many[i]; - } - } - maxnewsites = groups * maxfactsize; - allocnew(); - for (i = 0; i < (groups); i++) { - if (oldweight[where[i] - 1] > 0) { - newgroups++; - newsites += how_many[i]; - newwhere[newgroups - 1] = where[i]; - newhowmany[newgroups - 1] = how_many[i]; - } - } -} /* inputoptions */ - - -void seqboot_inputdata() -{ - /* input the names and sequences for each species */ - long i, j, k, l, m, n, basesread, basesnew=0; - double x; - Char charstate; - boolean allread, done; - - if (data == genefreqs) { - nodef = (double **)Malloc(spp*sizeof(double *)); - for (i = 0; i < (spp); i++) - nodef[i] = (double *)Malloc(sites*sizeof(double)); - } else { - nodep = (Char **)Malloc(spp*sizeof(Char *)); - for (i = 0; i < (spp); i++) - nodep[i] = (Char *)Malloc(sites*sizeof(Char)); - } - j = nmlngth + (sites + (sites - 1) / 10) / 2 - 5; - if (j < nmlngth - 1) - j = nmlngth - 1; - if (j > 37) - j = 37; - if (printdata) { - fprintf(outfile, "\nBootstrapping algorithm, version %s\n\n\n",VERSION); - if (bootstrap) { - if (blocksize > 1) { - if (regular) - fprintf(outfile, "Block-bootstrap with block size %ld\n\n", blocksize); - else - fprintf(outfile, "Partial (%2.0f%%) block-bootstrap with block size %ld\n\n", - 100*fracsample, blocksize); - } else { - if (regular) - fprintf(outfile, "Bootstrap\n\n"); - else - fprintf(outfile, "Partial (%2.0f%%) bootstrap\n\n", 100*fracsample); - } - } else { - if (jackknife) { - if (regular) - fprintf(outfile, "Delete-half Jackknife\n\n"); - else - fprintf(outfile, "Delete-%2.0f%% Jackknife\n\n", 100*(1.0-fracsample)); - } else { - if (permute) { - fprintf(outfile, "Species order permuted separately for each"); - if (data == genefreqs) - fprintf(outfile, " locus\n\n"); - if (data == seqs) - fprintf(outfile, " site\n\n"); - if (data == morphology) - fprintf(outfile, " character\n\n"); - if (data == restsites) - fprintf(outfile, " site\n\n"); - } - else { - if (ild) { - if (data == genefreqs) - fprintf(outfile, "Locus"); - if (data == seqs) - fprintf(outfile, "Site"); - if (data == morphology) - fprintf(outfile, "Character"); - if (data == restsites) - fprintf(outfile, "Site"); - fprintf(outfile, " order permuted\n\n"); - } else { - if (lockhart) - if (data == genefreqs) - fprintf(outfile, "Locus"); - if (data == seqs) - fprintf(outfile, "Site"); - if (data == morphology) - fprintf(outfile, "Character"); - if (data == restsites) - fprintf(outfile, "Site"); - fprintf(outfile, " order permuted separately for each species\n\n"); - } - } - } - } - if (data == genefreqs) - fprintf(outfile, "%3ld species, %3ld loci\n\n", spp, loci); - else { - fprintf(outfile, "%3ld species, ", spp); - if (data == seqs) - fprintf(outfile, "%3ld sites\n\n", sites); - else if (data == morphology) - fprintf(outfile, "%3ld characters\n\n", sites); - else if (data == restsites) - fprintf(outfile, "%3ld sites\n\n", sites); - } - fprintf(outfile, "Name"); - for (i = 1; i <= j; i++) - putc(' ', outfile); - fprintf(outfile, "Data\n"); - fprintf(outfile, "----"); - for (i = 1; i <= j; i++) - putc(' ', outfile); - fprintf(outfile, "----\n\n"); - } - interleaved = (interleaved && ((data == seqs) || (data == restsites))); - if (data == genefreqs) { - for (i = 1; i <= (spp); i++) { - initname(i - 1); - j = 1; - while (j <= sites && !eoff(infile)) { - if (eoln(infile)) - scan_eoln(infile); - fscanf(infile, "%lf", &x); - if ((unsigned)x > 1.0) { - printf("GENE FREQ OUTSIDE [0,1] in species %ld\n", i); - exxit(-1); - } else { - nodef[i - 1][j - 1] = x; - j++; - } - } - scan_eoln(infile); - } - return; - } - basesread = 0; - allread = false; - while (!allread) { - /* eat white space -- if the separator line has spaces on it*/ - do { - charstate = gettc(infile); - } while (charstate == ' ' || charstate == '\t'); - ungetc(charstate, infile); - if (eoln(infile)) - scan_eoln(infile); - i = 1; - while (i <= spp) { - if ((interleaved && basesread == 0) || !interleaved) - initname(i-1); - j = interleaved ? basesread : 0; - done = false; - while (!done && !eoff(infile)) { - if (interleaved) - done = true; - while (j < sites && !(eoln(infile) ||eoff(infile))) { - charstate = gettc(infile); - if (charstate == '\n' || charstate == '\t') - charstate = ' '; - if (charstate == ' ' || - (data == seqs && charstate >= '0' && charstate <= '9')) - continue; - uppercase(&charstate); - j++; - if (charstate == '.') - charstate = nodep[0][j-1]; - nodep[i-1][j-1] = charstate; - } - if (interleaved) - continue; - if (j < sites) - scan_eoln(infile); - else if (j == sites) - done = true; - } - if (interleaved && i == 1) - basesnew = j; - scan_eoln(infile); - if ((interleaved && j != basesnew) || ((!interleaved) && j != sites)){ - printf("\n\nERROR: sequences out of alignment at site %ld", j+1); - printf(" of species %ld\n\n", i); - exxit(-1);} - i++; - } - if (interleaved) { - basesread = basesnew; - allread = (basesread == sites); - } else - allread = (i > spp); - } - if (!printdata) - return; - if (data == genefreqs) - m = (sites - 1) / 8 + 1; - else - m = (sites - 1) / 60 + 1; - for (i = 1; i <= m; i++) { - for (j = 0; j < spp; j++) { - for (k = 0; k < nmlngth; k++) - putc(nayme[j][k], outfile); - fprintf(outfile, " "); - if (data == genefreqs) - l = i * 8; - else - l = i * 60; - if (l > sites) - l = sites; - if (data == genefreqs) - n = (i - 1) * 8; - else - n = (i - 1) * 60; - for (k = n; k < l; k++) { - if (data == genefreqs) - fprintf(outfile, "%8.5f", nodef[j][k]); - else { - if (j + 1 > 1 && nodep[j][k] == nodep[0][k]) - charstate = '.'; - else - charstate = nodep[j][k]; - putc(charstate, outfile); - if ((k + 1) % 10 == 0 && (k + 1) % 60 != 0) - putc(' ', outfile); - - } - } - putc('\n', outfile); - } - putc('\n', outfile); - } - putc('\n', outfile); -} /* seqboot_inputdata */ - - -void allocrest() -{ /* allocate memory for bookkeeping arrays */ - - oldweight = (steptr)Malloc(sites*sizeof(long)); - weight = (steptr)Malloc(sites*sizeof(long)); - if (categories) - category = (steptr)Malloc(sites*sizeof(long)); - if (mixture) - mixdata = (steptr)Malloc(sites*sizeof(long)); - if (ancvar) - ancdata = (steptr)Malloc(sites*sizeof(long)); - where = (steptr)Malloc(loci*sizeof(long)); - how_many = (steptr)Malloc(loci*sizeof(long)); - factor = (Char *)Malloc(sites*sizeof(Char)); - factorr = (steptr)Malloc(sites*sizeof(long)); - nayme = (naym *)Malloc(spp*sizeof(naym)); -} /* allocrest */ - -void allocnew(void) -{ /* allocate memory for arrays that depend on the lenght of the - output sequence*/ - long i; - - newwhere = (steptr)Malloc(loci*sizeof(long)); - newhowmany = (steptr)Malloc(loci*sizeof(long)); - newerwhere = (steptr)Malloc(loci*sizeof(long)); - newerhowmany = (steptr)Malloc(loci*sizeof(long)); - newerfactor = (steptr)Malloc(maxnewsites*maxalleles*sizeof(long)); - charorder = (steptr *)Malloc(spp*sizeof(steptr)); - for (i = 0; i < spp; i++) - charorder[i] = (steptr)Malloc(maxnewsites*sizeof(long)); -} - -void doinput(int argc, Char *argv[]) -{ /* reads the input data */ - getoptions(); - seqboot_inputnumbers(); - allocrest(); - if (weights) - openfile(&weightfile,WEIGHTFILE,"input weight file", - "r",argv[0],weightfilename); - if (mixture){ - openfile(&mixfile,MIXFILE,"mixture file", "r",argv[0],mixfilename); - openfile(&outmixfile,"outmixture","output mixtures file","w",argv[0], - outmixfilename); - seqboot_inputaux(mixdata, mixfile); - } - if (ancvar){ - openfile(&ancfile,ANCFILE,"ancestor file", "r",argv[0],ancfilename); - openfile(&outancfile,"outancestors","output ancestors file","w",argv[0], - outancfilename); - seqboot_inputaux(ancdata, ancfile); - } - if (categories) { - openfile(&catfile,CATFILE,"input category file","r",argv[0],catfilename); - openfile(&outcatfile,"outcategories","output category file","w",argv[0], - outcatfilename); - inputcategs(0, sites, category, 9, "SeqBoot"); - } - if (factors){ - openfile(&factfile,FACTFILE,"factors file","r",argv[0],factfilename); - openfile(&outfactfile,"outfactors","output factors file","w",argv[0], - outfactfilename); - } - if (justwts && !permute) - openfile(&outweightfile,"outweights","output weight file", - "w",argv[0],outweightfilename); - else { - openfile(&outfile,OUTFILE,"output data file","w",argv[0],outfilename); - } - inputoptions(); - seqboot_inputdata(); -} /* doinput */ - - -void bootweights() -{ /* sets up weights by resampling data */ - long i, j, k, blocks; - double p, q, r; - - ws = newgroups; - for (i = 0; i < (ws); i++) - weight[i] = 0; - if (jackknife) { - if (fabs(newgroups*fracsample - (long)(newgroups*fracsample+0.5)) - > 0.00001) { - if (randum(seed) - < (newgroups*fracsample - (long)(newgroups*fracsample)) - /((long)(newgroups*fracsample+1.0)-(long)(newgroups*fracsample))) - q = (long)(newgroups*fracsample)+1; - else - q = (long)(newgroups*fracsample); - } else - q = (long)(newgroups*fracsample+0.5); - r = newgroups; - p = q / r; - ws = 0; - for (i = 0; i < (newgroups); i++) { - if (randum(seed) < p) { - weight[i]++; - ws++; - q--; - } - r--; - if (i + 1 < newgroups) - p = q / r; - } - } else if (permute) { - for (i = 0; i < (newgroups); i++) - weight[i] = 1; - } else if (bootstrap) { - blocks = fracsample * newgroups / blocksize; - for (i = 1; i <= (blocks); i++) { - j = (long)(newgroups * randum(seed)) + 1; - for (k = 0; k < blocksize; k++) { - weight[j - 1]++; - j++; - if (j > newgroups) - j = 1; - } - } - } else /* case of rewriting data */ - for (i = 0; i < (newgroups); i++) - weight[i] = 1; - for (i = 0; i < (newgroups); i++) - newerwhere[i] = 0; - for (i = 0; i < (newgroups); i++) - newerhowmany[i] = 0; - newergroups = 0; - newersites = 0; - for (i = 0; i < (newgroups); i++) { - for (j = 1; j <= (weight[i]); j++) { - newergroups++; - for (k = 1; k <= (newhowmany[i]); k++) { - newersites++; - newerfactor[newersites - 1] = newergroups; - } - newerwhere[newergroups - 1] = newwhere[i]; - newerhowmany[newergroups - 1] = newhowmany[i]; - } - } -} /* bootweights */ - - -void sppermute(long n) -{ /* permute the species order as given in array sppord */ - long i, j, k; - - for (i = 1; i <= (spp - 1); i++) { - k = (long)((i+1) * randum(seed)); - j = sppord[n - 1][i]; - sppord[n - 1][i] = sppord[n - 1][k]; - sppord[n - 1][k] = j; - } -} /* sppermute */ - - -void charpermute(long m, long n) -{ /* permute the n+1 characters of species m+1 */ - long i, j, k; - - for (i = 1; i <= (n-1); i++) { - k = (long)((i+1) * randum(seed)); - j = charorder[m][i]; - charorder[m][i] = charorder[m][k]; - charorder[m][k] = j; - } -} /* charpermute */ - - -void writedata() -{ - /* write out one set of bootstrapped sequences */ - long i, j, k, l, m, n, n2; - double x; - Char charstate; - - sppord = (long **)Malloc(newergroups*sizeof(long *)); - for (i = 0; i < (newergroups); i++) - sppord[i] = (long *)Malloc(spp*sizeof(long)); - for (j = 1; j <= spp; j++) - sppord[0][j - 1] = j; - for (i = 1; i < newergroups; i++) { - for (j = 1; j <= (spp); j++) - sppord[i][j - 1] = sppord[i - 1][j - 1]; - } - if (!justwts || permute) { - if (data == restsites && enzymes) - fprintf(outfile, "%5ld %5ld% 4ld\n", spp, newergroups, nenzymes); - else if (data == genefreqs) - fprintf(outfile, "%5ld %5ld\n", spp, newergroups); - else { - if ((data == seqs) - && !(bootstrap || jackknife || permute || ild || lockhart) && xml) - fprintf(outfile, "\n"); - else - if (!(bootstrap || jackknife || permute || ild || lockhart) && nexus) { - fprintf(outfile, "#NEXUS\n"); - fprintf(outfile, "BEGIN DATA;\n"); - fprintf(outfile, " DIMENSIONS NTAX=%ld NCHAR=%ld;\n", - spp, newersites); - fprintf(outfile, " FORMAT"); - if (interleaved) - fprintf(outfile, " interleave=yes"); - else - fprintf(outfile, " interleave=no"); - fprintf(outfile, " DATATYPE="); - if (data == seqs) { - switch (seq) { - case (dna): fprintf(outfile, "DNA missing=N gap=-"); break; - case (rna): fprintf(outfile, "RNA missing=N gap=-"); break; - case (protein): - fprintf(outfile, "protein missing=? gap=-"); - break; - } - } - if (data == morphology) - fprintf(outfile, "STANDARD"); - fprintf(outfile, ";\n MATRIX\n"); - } - else fprintf(outfile, "%5ld %5ld\n", spp, newersites); - } - if (data == genefreqs) { - for (i = 0; i < (newergroups); i++) - fprintf(outfile, " %3ld", alleles[factorr[newerwhere[i] - 1] - 1]); - putc('\n', outfile); - } - } - l = 1; - if ((!(bootstrap || jackknife || permute || ild || lockhart | nexus)) - && ((data == seqs) || (data == restsites))) { - interleaved = !interleaved; - if (!(bootstrap || jackknife || permute || ild || lockhart) && xml) - interleaved = false; - } - if (interleaved) - m = 60; - else - m = newergroups; - do { - if (m > newergroups) - m = newergroups; - for (j = 0; j < spp; j++) { - n = 0; - if ((l == 1) || (interleaved && nexus)) { - if (!(bootstrap || jackknife || permute || ild || lockhart) && xml) { - fprintf(outfile, " \n"); - fprintf(outfile, " "); - } - n2 = nmlngth; - if (!(bootstrap || jackknife || permute || ild || lockhart) - && (xml || nexus)) { - while (nayme[j][n2-1] == ' ') - n2--; - } - if (nexus) - fprintf(outfile, " "); - for (k = 0; k < n2; k++) - if (nexus && (nayme[j][k] == ' ') && (k < n2)) - putc('_', outfile); - else - putc(nayme[j][k], outfile); - if (!(bootstrap || jackknife || permute || ild || lockhart) && xml) - fprintf(outfile, "\n "); - } else { - if (!(bootstrap || jackknife || permute || ild || lockhart) && xml) { - fprintf(outfile, " "); - } - else { - for (k = 1; k <= nmlngth; k++) - putc(' ', outfile); - } - } - if (!xml) { - for (k = 0; k < nmlngth-n2; k++) - fprintf(outfile, " "); - fprintf(outfile, " "); - } - for (k = l - 1; k < m; k++) { - if (permute && j + 1 == 1) - sppermute(newerfactor[n]); /* we can assume chars not permuted */ - for (n2 = -1; n2 <= (newerhowmany[k] - 2); n2++) { - n++; - if (data == genefreqs) { - if (n > 1 && (n & 7) == 1) - fprintf(outfile, "\n "); - x = nodef[sppord[newerfactor[charorder[j][n - 1]] - 1][j] - 1] - [newerwhere[charorder[j][k]] + n2]; - fprintf(outfile, "%8.5f", x); - } else { - if (!(bootstrap || jackknife || permute || ild || lockhart) && xml - && (n > 1) && (n % 60 == 1)) - fprintf(outfile, "\n "); - else if (!nexus && !interleaved && (n > 1) && (n % 60 == 1)) - fprintf(outfile, "\n "); - charstate = nodep[sppord[newerfactor[charorder[j][n - 1]] - 1] - [j] - 1][newerwhere[charorder[j][k]] + n2]; - putc(charstate, outfile); - if (n % 10 == 0 && n % 60 != 0) - putc(' ', outfile); - } - } - } - if (!(bootstrap || jackknife || permute || ild || lockhart ) && xml) { - fprintf(outfile, "\n \n"); - } - putc('\n', outfile); - } - if (interleaved) { - if ((m <= newersites) && (newersites > 60)) - putc('\n', outfile); - l += 60; - m += 60; - } - } while (interleaved && l <= newersites); - if ((data == seqs) && - (!(bootstrap || jackknife || permute || ild || lockhart) && xml)) - fprintf(outfile, "\n"); - if (!(bootstrap || jackknife || permute || ild || lockhart) && nexus) - fprintf(outfile, " ;\nEND;\n"); - for (i = 0; i < (newergroups); i++) - free(sppord[i]); - free(sppord); -} /* writedata */ - - -void writeweights() -{ /* write out one set of post-bootstrapping weights */ - long j, k, l, m, n, o; - - j = 0; - l = 1; - if (interleaved) - m = 60; - else - m = sites; - do { - if(m > sites) - m = sites; - n = 0; - for (k = l - 1; k < m; k++) { - for(o = 0 ; o < how_many[k] ; o++){ - if(oldweight[k]==0){ - fprintf(outweightfile, "0"); - j++; - } - else{ - if (weight[k-j] < 10) - fprintf(outweightfile, "%c", (char)('0'+weight[k-j])); - else - fprintf(outweightfile, "%c", (char)('A'+weight[k-j]-10)); - n++; - if (!interleaved && n > 1 && n % 60 == 1) { - fprintf(outweightfile, "\n"); - if (n % 10 == 0 && n % 60 != 0) - putc(' ', outweightfile); - } - } - } - } - putc('\n', outweightfile); - if (interleaved) { - l += 60; - m += 60; - } - } while (interleaved && l <= sites); -} /* writeweights */ - - -void writecategories() -{ - /* write out categories for the bootstrapped sequences */ - long k, l, m, n, n2; - Char charstate; - if(justwts){ - if (interleaved) - m = 60; - else - m = sites; - l=1; - do { - if(m > sites) - m = sites; - n=0; - for(k=l-1 ; k < m ; k++){ - n++; - if (!interleaved && n > 1 && n % 60 == 1) - fprintf(outcatfile, "\n "); - charstate = '0' + category[k]; - putc(charstate, outcatfile); - } - if (interleaved) { - l += 60; - m += 60; - } - }while(interleaved && l <= sites); - fprintf(outcatfile, "\n"); - return; - } - - l = 1; - if (interleaved) - m = 60; - else - m = newergroups; - do { - if (m > newergroups) - m = newergroups; - n = 0; - for (k = l - 1; k < m; k++) { - for (n2 = -1; n2 <= (newerhowmany[k] - 2); n2++) { - n++; - if (!interleaved && n > 1 && n % 60 == 1) - fprintf(outcatfile, "\n "); - charstate = '0' + category[newerwhere[k] + n2]; - putc(charstate, outcatfile); - if (n % 10 == 0 && n % 60 != 0) - putc(' ', outcatfile); - } - } - if (interleaved) { - l += 60; - m += 60; - } - } while (interleaved && l <= newersites); - fprintf(outcatfile, "\n"); -} /* writecategories */ - - -void writeauxdata(steptr auxdata, FILE *outauxfile) -{ - /* write out auxiliary option data (mixtures, ancestors, ect) to - appropriate file. Samples parralel to data, or just gives one - output entry if justwts is true */ - long k, l, m, n, n2; - Char charstate; - - /* if we just output weights (justwts), and this is first set - just output the data unsampled */ - if(justwts){ - if(firstrep){ - if (interleaved) - m = 60; - else - m = sites; - l=1; - do { - if(m > sites) - m = sites; - n = 0; - for(k=l-1 ; k < m ; k++){ - n++; - if (!interleaved && n > 1 && n % 60 == 1) - fprintf(outauxfile, "\n "); - charstate = auxdata[k]; - putc(charstate, outauxfile); - } - if (interleaved) { - l += 60; - m += 60; - } - }while(interleaved && l <= sites); - fprintf(outauxfile, "\n"); - } - return; - } - - l = 1; - if (interleaved) - m = 60; - else - m = newergroups; - do { - if (m > newergroups) - m = newergroups; - n = 0; - for (k = l - 1; k < m; k++) { - for (n2 = -1; n2 <= (newerhowmany[k] - 2); n2++) { - n++; - if (!interleaved && n > 1 && n % 60 == 1) - fprintf(outauxfile, "\n "); - charstate = auxdata[newerwhere[k] + n2]; - putc(charstate, outauxfile); - if (n % 10 == 0 && n % 60 != 0) - putc(' ', outauxfile); - } - } - if (interleaved) { - l += 60; - m += 60; - } - } while (interleaved && l <= newersites); - fprintf(outauxfile, "\n"); -} /* writeauxdata */ - -void writefactors(void) -{ - long k, l, m, n, prevfact, writesites; - char symbol; - steptr wfactor; - - if(!justwts || firstrep){ - if(justwts){ - writesites = sites; - wfactor = factorr; - } else { - writesites = newersites; - wfactor = newerfactor; - } - prevfact = wfactor[0]; - symbol = '+'; - if (interleaved) - m = 60; - else - m = writesites; - l=1; - do { - if(m > writesites) - m = writesites; - n = 0; - for(k=l-1 ; k < m ; k++){ - n++; - if (!interleaved && n > 1 && n % 60 == 1) - fprintf(outfactfile, "\n "); - if(prevfact != wfactor[k]){ - symbol = (symbol == '+') ? '-' : '+'; - prevfact = wfactor[k]; - } - putc(symbol, outfactfile); - if (n % 10 == 0 && n % 60 != 0) - putc(' ', outfactfile); - } - if (interleaved) { - l += 60; - m += 60; - } - }while(interleaved && l <= writesites); - fprintf(outfactfile, "\n"); - } -} /* writefactors */ - - -void bootwrite() -{ /* does bootstrapping and writes out data sets */ - long i, j, rr, repdiv10; - - if (!(bootstrap || jackknife || permute || ild || lockhart)) - reps = 1; - repdiv10 = reps / 10; - if (repdiv10 < 1) - repdiv10 = 1; - if (progress) - putchar('\n'); - for (rr = 1; rr <= (reps); rr++) { - for (i = 0; i < spp; i++) - for (j = 0; j < maxnewsites; j++) - charorder[i][j] = j; - if(rr==1) - firstrep = true; - else - firstrep = false; - if (ild) { - charpermute(0, maxnewsites); - for (i = 1; i < spp; i++) - for (j = 0; j < maxnewsites; j++) - charorder[i][j] = charorder[0][j]; - } - if (lockhart) - for (i = 0; i < spp; i++) - charpermute(i, maxnewsites); - bootweights(); - if (!justwts || permute || ild || lockhart) - writedata(); - if (justwts && !(permute || ild || lockhart)) - writeweights(); - if (categories) - writecategories(); - if (factors) - writefactors(); - if (mixture) - writeauxdata(mixdata, outmixfile); - if (ancvar) - writeauxdata(ancdata, outancfile); - if (progress && (bootstrap || jackknife || permute || ild || lockhart) - && ((reps < 10) || rr % repdiv10 == 0)) { - printf("completed replicate number %4ld\n", rr); -#ifdef WIN32 - phyFillScreenColor(); -#endif - } - } - if (progress) { - if (justwts) - printf("\nOutput weights written to file \"%s\"\n\n", outweightfilename); - else - printf("\nOutput written to file \"%s\"\n\n", outfilename); - } -} /* bootwrite */ - - -void seqboot_inputaux(steptr dataptr, FILE* auxfile) -{ /* input auxiliary option data (mixtures, ancestors, ect) for - new style input, assumes that data is correctly formated - in input files*/ - long i, j, k; - Char ch; - - j = 0; - k = 1; - for (i = 0; i < (sites); i++) { - do { - if (eoln(auxfile)) - scan_eoln(auxfile); - ch = gettc(auxfile); - if (ch == '\n') - ch = ' '; - } while (ch == ' '); - dataptr[i] = ch; - } - scan_eoln(auxfile); -} /* seqboot_inputaux */ - - -int main(int argc, Char *argv[]) -{ /* Read in sequences or frequencies and bootstrap or jackknife them */ -#ifdef MAC - argc = 1; /* macsetup("SeqBoot",""); */ - argv[0] = "SeqBoot"; -#endif - init(argc,argv); - openfile(&infile, INFILE, "input file", "r", argv[0], infilename); - ibmpc = IBMCRT; - ansi = ANSICRT; - doinput(argc, argv); - bootwrite(); - FClose(infile); - if (weights) - FClose(weightfile); - if (categories) { - FClose(catfile); - FClose(outcatfile); - } - if(mixture) - FClose(outmixfile); - if(ancvar) - FClose(outancfile); - if (justwts && !permute) { - FClose(outweightfile); - } - else - FClose(outfile); -#ifdef MAC - fixmacfile(outfilename); - if (justwts && !permute) - fixmacfile(outweightfilename); - if (categories) - fixmacfile(outcatfilename); - if (mixture) - fixmacfile(outmixfilename); -#endif - printf("Done.\n\n"); -#ifdef WIN32 - phyRestoreConsoleAttributes(); -#endif - return 0; -} diff --git a/forester/archive/RIO/others/phylip_mod/src/test_infile_fitch b/forester/archive/RIO/others/phylip_mod/src/test_infile_fitch deleted file mode 100644 index 3c129f1..0000000 --- a/forester/archive/RIO/others/phylip_mod/src/test_infile_fitch +++ /dev/null @@ -1,8 +0,0 @@ -7 -Bovine_sequence 0.0000 1.6866 1.7198 1.6606 1.5243 1.6043 1.5905 -Mouse_sequence 1.6866 0.0000 1.5232 1.4841 1.4465 1.4389 1.4629 -Gibbon_sequence 1.7198 1.5232 0.0000 0.7115 0.5958 0.6179 0.5583 -Orang_sequence 1.6606 1.4841 0.7115 0.0000 0.4631 0.5061 0.4710 -Gorilla_sequence 1.5243 1.4465 0.5958 0.4631 0.0000 0.3484 0.3083 -Chimp_sequence 1.6043 1.4389 0.6179 0.5061 0.3484 0.0000 0.2692 -Human_sequence 1.5905 1.4629 0.5583 0.4710 0.3083 0.2692 0.0000 diff --git a/forester/archive/RIO/others/phylip_mod/src/test_infile_protdist b/forester/archive/RIO/others/phylip_mod/src/test_infile_protdist deleted file mode 100644 index d263f1b..0000000 --- a/forester/archive/RIO/others/phylip_mod/src/test_infile_protdist +++ /dev/null @@ -1,6 +0,0 @@ - 5 13 -Alpha AACGTGGCCACAT -Beta AAGGTCGCCACAC -Gamma CAGTTCGCCACAA -Delta GAGATTTCCGCCT -Epsilon GAGATCTCCGCCC diff --git a/forester/archive/RIO/others/phylip_mod/src/test_infile_protml b/forester/archive/RIO/others/phylip_mod/src/test_infile_protml deleted file mode 100644 index 67a49d6..0000000 --- a/forester/archive/RIO/others/phylip_mod/src/test_infile_protml +++ /dev/null @@ -1,6 +0,0 @@ - 5 13 -Alpha_sequence AACGTGGCCAAAT -Beta_sequence AAGGTCGCCAAAC -Gamma_sequence CATTTCGTCACAA -Delta_sequence GGTATTTCGGCCT -Epsilon_sequence GGGATCTCGGCCC diff --git a/forester/archive/RIO/others/phylip_mod/src/test_infile_protmlk b/forester/archive/RIO/others/phylip_mod/src/test_infile_protmlk deleted file mode 100644 index d825652..0000000 --- a/forester/archive/RIO/others/phylip_mod/src/test_infile_protmlk +++ /dev/null @@ -1,6 +0,0 @@ - 5 13 -Alpha AACGTGGCCAAAT -Beta AAGGTCGCCAAAC -Gamma CATTTCGTCACAA -Delta GGTATTTCGGCCT -Epsilon GGGATCTCGGCCC diff --git a/forester/archive/RIO/others/phylip_mod/src/test_infile_protpars b/forester/archive/RIO/others/phylip_mod/src/test_infile_protpars deleted file mode 100644 index 30d58b2..0000000 --- a/forester/archive/RIO/others/phylip_mod/src/test_infile_protpars +++ /dev/null @@ -1,6 +0,0 @@ - 5 10 -Alpha ABCDEFGHIK -Beta AB--EFGHIK -Gamma ?BCDSFG*?? -Delta CIKDEFGHIK -Epsilon DIKDEFGHIK diff --git a/forester/archive/RIO/others/phylip_mod/src/test_infile_seqbboot b/forester/archive/RIO/others/phylip_mod/src/test_infile_seqbboot deleted file mode 100644 index 19766b0..0000000 --- a/forester/archive/RIO/others/phylip_mod/src/test_infile_seqbboot +++ /dev/null @@ -1,6 +0,0 @@ - 5 6 -Alpha_sequence AACAAC -Beta_sequence AACCCC -Gamma_sequence ACCAAC -Delta_sequence CCACCA -Epsilon_sequence CCAAAC diff --git a/forester/archive/RIO/others/puzzle_dqo/AUTHORS b/forester/archive/RIO/others/puzzle_dqo/AUTHORS deleted file mode 100644 index cbef439..0000000 --- a/forester/archive/RIO/others/puzzle_dqo/AUTHORS +++ /dev/null @@ -1,45 +0,0 @@ -since 1999 by Heiko A. Schmidt, Korbinian Strimmer, - Martin Vingron, Arndt von Haeseler - -1995-1999 by Korbinian Strimmer and Arndt von Haeseler - - - -Heiko A. Schmidt - Theoretical Bioinformatics - Deutsches Krebsforschungszentrum (DKFZ) - Im Neuenheimer Feld 280 - D-69124 Heidelberg - Germany - - email: h.schmidt@dkfz-heidelberg.de, - http://www.dkfz-heidelberg.de/tbi/ - -Korbinian Strimmer - Department of Zoology - University of Oxford - South Parks Road - Oxford OX1 3PS, UK - - email: korbinian.strimmer@zoo.ox.ac.uk - http://www.zoo.ox.ac.uk/ - -Martin Vingron - Theoretical Bioinformatics - Deutsches Krebsforschungszentrum (DKFZ) - Im Neuenheimer Feld 280 - D-69124 Heidelberg - Germany - - email: vingron@dkfz-heidelberg.de - http://www.dkfz-heidelberg.de/tbi/ - -Arndt von Haeseler - Max-Planck-Institute for Evolutionary Anthropology - Inselstr. 22 - D-04103 Leipzig - Germany - - email: haeseler@eva.mpg.de, - http://www.eva.mpg.de/ - diff --git a/forester/archive/RIO/others/puzzle_dqo/COPYING b/forester/archive/RIO/others/puzzle_dqo/COPYING deleted file mode 100644 index d60c31a..0000000 --- a/forester/archive/RIO/others/puzzle_dqo/COPYING +++ /dev/null @@ -1,340 +0,0 @@ - GNU GENERAL PUBLIC LICENSE - Version 2, June 1991 - - Copyright (C) 1989, 1991 Free Software Foundation, Inc. - 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - Preamble - - The licenses for most software are designed to take away your -freedom to share and change it. By contrast, the GNU General Public -License is intended to guarantee your freedom to share and change free -software--to make sure the software is free for all its users. This -General Public License applies to most of the Free Software -Foundation's software and to any other program whose authors commit to -using it. (Some other Free Software Foundation software is covered by -the GNU Library General Public License instead.) You can apply it to -your programs, too. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -this service if you wish), that you receive source code or can get it -if you want it, that you can change the software or use pieces of it -in new free programs; and that you know you can do these things. - - To protect your rights, we need to make restrictions that forbid -anyone to deny you these rights or to ask you to surrender the rights. -These restrictions translate to certain responsibilities for you if you -distribute copies of the software, or if you modify it. - - For example, if you distribute copies of such a program, whether -gratis or for a fee, you must give the recipients all the rights that -you have. You must make sure that they, too, receive or can get the -source code. And you must show them these terms so they know their -rights. - - We protect your rights with two steps: (1) copyright the software, and -(2) offer you this license which gives you legal permission to copy, -distribute and/or modify the software. - - Also, for each author's protection and ours, we want to make certain -that everyone understands that there is no warranty for this free -software. If the software is modified by someone else and passed on, we -want its recipients to know that what they have is not the original, so -that any problems introduced by others will not reflect on the original -authors' reputations. - - Finally, any free program is threatened constantly by software -patents. We wish to avoid the danger that redistributors of a free -program will individually obtain patent licenses, in effect making the -program proprietary. To prevent this, we have made it clear that any -patent must be licensed for everyone's free use or not licensed at all. - - The precise terms and conditions for copying, distribution and -modification follow. - - GNU GENERAL PUBLIC LICENSE - TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION - - 0. This License applies to any program or other work which contains -a notice placed by the copyright holder saying it may be distributed -under the terms of this General Public License. The "Program", below, -refers to any such program or work, and a "work based on the Program" -means either the Program or any derivative work under copyright law: -that is to say, a work containing the Program or a portion of it, -either verbatim or with modifications and/or translated into another -language. (Hereinafter, translation is included without limitation in -the term "modification".) Each licensee is addressed as "you". - -Activities other than copying, distribution and modification are not -covered by this License; they are outside its scope. The act of -running the Program is not restricted, and the output from the Program -is covered only if its contents constitute a work based on the -Program (independent of having been made by running the Program). -Whether that is true depends on what the Program does. - - 1. You may copy and distribute verbatim copies of the Program's -source code as you receive it, in any medium, provided that you -conspicuously and appropriately publish on each copy an appropriate -copyright notice and disclaimer of warranty; keep intact all the -notices that refer to this License and to the absence of any warranty; -and give any other recipients of the Program a copy of this License -along with the Program. - -You may charge a fee for the physical act of transferring a copy, and -you may at your option offer warranty protection in exchange for a fee. - - 2. You may modify your copy or copies of the Program or any portion -of it, thus forming a work based on the Program, and copy and -distribute such modifications or work under the terms of Section 1 -above, provided that you also meet all of these conditions: - - a) You must cause the modified files to carry prominent notices - stating that you changed the files and the date of any change. - - b) You must cause any work that you distribute or publish, that in - whole or in part contains or is derived from the Program or any - part thereof, to be licensed as a whole at no charge to all third - parties under the terms of this License. - - c) If the modified program normally reads commands interactively - when run, you must cause it, when started running for such - interactive use in the most ordinary way, to print or display an - announcement including an appropriate copyright notice and a - notice that there is no warranty (or else, saying that you provide - a warranty) and that users may redistribute the program under - these conditions, and telling the user how to view a copy of this - License. (Exception: if the Program itself is interactive but - does not normally print such an announcement, your work based on - the Program is not required to print an announcement.) - -These requirements apply to the modified work as a whole. If -identifiable sections of that work are not derived from the Program, -and can be reasonably considered independent and separate works in -themselves, then this License, and its terms, do not apply to those -sections when you distribute them as separate works. But when you -distribute the same sections as part of a whole which is a work based -on the Program, the distribution of the whole must be on the terms of -this License, whose permissions for other licensees extend to the -entire whole, and thus to each and every part regardless of who wrote it. - -Thus, it is not the intent of this section to claim rights or contest -your rights to work written entirely by you; rather, the intent is to -exercise the right to control the distribution of derivative or -collective works based on the Program. - -In addition, mere aggregation of another work not based on the Program -with the Program (or with a work based on the Program) on a volume of -a storage or distribution medium does not bring the other work under -the scope of this License. - - 3. You may copy and distribute the Program (or a work based on it, -under Section 2) in object code or executable form under the terms of -Sections 1 and 2 above provided that you also do one of the following: - - a) Accompany it with the complete corresponding machine-readable - source code, which must be distributed under the terms of Sections - 1 and 2 above on a medium customarily used for software interchange; or, - - b) Accompany it with a written offer, valid for at least three - years, to give any third party, for a charge no more than your - cost of physically performing source distribution, a complete - machine-readable copy of the corresponding source code, to be - distributed under the terms of Sections 1 and 2 above on a medium - customarily used for software interchange; or, - - c) Accompany it with the information you received as to the offer - to distribute corresponding source code. (This alternative is - allowed only for noncommercial distribution and only if you - received the program in object code or executable form with such - an offer, in accord with Subsection b above.) - -The source code for a work means the preferred form of the work for -making modifications to it. For an executable work, complete source -code means all the source code for all modules it contains, plus any -associated interface definition files, plus the scripts used to -control compilation and installation of the executable. However, as a -special exception, the source code distributed need not include -anything that is normally distributed (in either source or binary -form) with the major components (compiler, kernel, and so on) of the -operating system on which the executable runs, unless that component -itself accompanies the executable. - -If distribution of executable or object code is made by offering -access to copy from a designated place, then offering equivalent -access to copy the source code from the same place counts as -distribution of the source code, even though third parties are not -compelled to copy the source along with the object code. - - 4. You may not copy, modify, sublicense, or distribute the Program -except as expressly provided under this License. Any attempt -otherwise to copy, modify, sublicense or distribute the Program is -void, and will automatically terminate your rights under this License. -However, parties who have received copies, or rights, from you under -this License will not have their licenses terminated so long as such -parties remain in full compliance. - - 5. You are not required to accept this License, since you have not -signed it. However, nothing else grants you permission to modify or -distribute the Program or its derivative works. These actions are -prohibited by law if you do not accept this License. Therefore, by -modifying or distributing the Program (or any work based on the -Program), you indicate your acceptance of this License to do so, and -all its terms and conditions for copying, distributing or modifying -the Program or works based on it. - - 6. Each time you redistribute the Program (or any work based on the -Program), the recipient automatically receives a license from the -original licensor to copy, distribute or modify the Program subject to -these terms and conditions. You may not impose any further -restrictions on the recipients' exercise of the rights granted herein. -You are not responsible for enforcing compliance by third parties to -this License. - - 7. If, as a consequence of a court judgment or allegation of patent -infringement or for any other reason (not limited to patent issues), -conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot -distribute so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you -may not distribute the Program at all. For example, if a patent -license would not permit royalty-free redistribution of the Program by -all those who receive copies directly or indirectly through you, then -the only way you could satisfy both it and this License would be to -refrain entirely from distribution of the Program. - -If any portion of this section is held invalid or unenforceable under -any particular circumstance, the balance of the section is intended to -apply and the section as a whole is intended to apply in other -circumstances. - -It is not the purpose of this section to induce you to infringe any -patents or other property right claims or to contest validity of any -such claims; this section has the sole purpose of protecting the -integrity of the free software distribution system, which is -implemented by public license practices. Many people have made -generous contributions to the wide range of software distributed -through that system in reliance on consistent application of that -system; it is up to the author/donor to decide if he or she is willing -to distribute software through any other system and a licensee cannot -impose that choice. - -This section is intended to make thoroughly clear what is believed to -be a consequence of the rest of this License. - - 8. If the distribution and/or use of the Program is restricted in -certain countries either by patents or by copyrighted interfaces, the -original copyright holder who places the Program under this License -may add an explicit geographical distribution limitation excluding -those countries, so that distribution is permitted only in or among -countries not thus excluded. In such case, this License incorporates -the limitation as if written in the body of this License. - - 9. The Free Software Foundation may publish revised and/or new versions -of the General Public License from time to time. Such new versions will -be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - -Each version is given a distinguishing version number. If the Program -specifies a version number of this License which applies to it and "any -later version", you have the option of following the terms and conditions -either of that version or of any later version published by the Free -Software Foundation. If the Program does not specify a version number of -this License, you may choose any version ever published by the Free Software -Foundation. - - 10. If you wish to incorporate parts of the Program into other free -programs whose distribution conditions are different, write to the author -to ask for permission. For software which is copyrighted by the Free -Software Foundation, write to the Free Software Foundation; we sometimes -make exceptions for this. Our decision will be guided by the two goals -of preserving the free status of all derivatives of our free software and -of promoting the sharing and reuse of software generally. - - NO WARRANTY - - 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY -FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN -OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES -PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED -OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS -TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE -PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, -REPAIR OR CORRECTION. - - 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR -REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, -INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING -OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED -TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY -YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER -PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE -POSSIBILITY OF SUCH DAMAGES. - - END OF TERMS AND CONDITIONS - - How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -convey the exclusion of warranty; and each file should have at least -the "copyright" line and a pointer to where the full notice is found. - - - Copyright (C) - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - - -Also add information on how to contact you by electronic and paper mail. - -If the program is interactive, make it output a short notice like this -when it starts in an interactive mode: - - Gnomovision version 69, Copyright (C) year name of author - Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. - This is free software, and you are welcome to redistribute it - under certain conditions; type `show c' for details. - -The hypothetical commands `show w' and `show c' should show the appropriate -parts of the General Public License. Of course, the commands you use may -be called something other than `show w' and `show c'; they could even be -mouse-clicks or menu items--whatever suits your program. - -You should also get your employer (if you work as a programmer) or your -school, if any, to sign a "copyright disclaimer" for the program, if -necessary. Here is a sample; alter the names: - - Yoyodyne, Inc., hereby disclaims all copyright interest in the program - `Gnomovision' (which makes passes at compilers) written by James Hacker. - - , 1 April 1989 - Ty Coon, President of Vice - -This General Public License does not permit incorporating your program into -proprietary programs. If your program is a subroutine library, you may -consider it more useful to permit linking proprietary applications with the -library. If this is what you want to do, use the GNU Library General -Public License instead of this License. diff --git a/forester/archive/RIO/others/puzzle_dqo/ChangeLog b/forester/archive/RIO/others/puzzle_dqo/ChangeLog deleted file mode 100644 index 824b296..0000000 --- a/forester/archive/RIO/others/puzzle_dqo/ChangeLog +++ /dev/null @@ -1,347 +0,0 @@ - -Version date what has been changed - -5.0 26.08.2000 - changes to manual, Makefile.in - - cpREV hidden by -DCPREV flag - - chi2test, quartio included into source code files - - generic scr/Makefile.generic - - src/makefile.com for VAX - - AUTHORS, README, ChangeLog updated - - INSTALL checked - 27.08.2000 - test code excluded - - '-randseed#' added for debugging purposes - - ./data added to autoconf/automake - - warning output if cmdline parameter unknown - 11.10.2000 - fixed output of rate categories of sites before - computing them - - check whether rate categories were computed by - 1st user tree or NJ tree fixed in the output - 12.10.2000 - invariant site model normalization fixed - - -CODE FREEZE -=========== - -5.0.a33 15.08.2000 - changes for autoconf/automake - -5.0.a32 01.08.2000 - a FPE error fixed (badq == 0) - - small error in -bestq fixed - - fflush's added at several places - -5.0.a31 01.08.2000 - comments added to tree structure sorting puzzle2.c - - changes in configure.in, Makefile.in - -5.0.a30 23.07.2000 - some debugging in checkquart - - changed to autoconf - -5.0.a29 13.07.2000 - some debugging in checkquart - -5.0.a28 13.07.2000 - use best quartet topology option (-bestq) implemented - -5.0.a27 13.07.2000 - further developement to checkquart - - ascii/binary quartet values (-wqla/-wqlb) - - typo correction - -5.0.a26 11.07.2000 - fflush at all checktimer - - further developement at checkquart - - possibility to write quartet values to file (-wqlh) - -5.0.a25 06.07.2000 - fflush at checktimer - -5.0.a24 02.07.2000 - further debugging of checkquart - -5.0.a23 02.07.2000 - further developement to checkquart - -5.0.a22 29.06.2000 - checkquart added to makefile - - bad quartet stats added after reading in *.allquarts - -5.0.a21 27.06.2000 - site pattern statistics implemented and added to - SEQUENCE ALIGNMENT section in puzzle report - -5.0.a20 26.06.2000 - cpREV45 implemented - -5.0.a19 26.06.2000 - for debugging purposes: typo "MPE" changed to "FPE" - - fflush(stdout) added in chi2test - -5.0.a18 20.06.2000 - checkquart implemented - -5.0.a17 19.06.2000 - FPRINTF(STDOUTFILE and STDOUT definition changed - and moved; fputid/fputid10 writes to STDOUT instead - of stdout - - ppuzzle checks slaves enough slave-processes - - numquarts, num2quart, quart2num moved from ppuzzle.c - to puzzle1.c - - read/writeallquart implemented (undocumented feature) - to be used by -wqf/-rqf at comandline - -wqf = write quartet file (infilename.allquart) after - quartet evaluation - -rqf = read quartet file (infilename.allquart), no - quartet evaluation, unless -wqf is used as - well, then quartets are written and read in - - '-h' option at comandline -> printusage - -5.0.a16 31.05.2000 - chi2test bug fixed - - WAG matrix added, model choice adopted - 13.06.2000 - date set to June 2000 - - author order changed to Schmidt, Strimmer, Vingron, - v.Haeseler - - CPU time output stopped, due to overflow errors - 16.06.2000 - sequence composition chi2test moved before - parameter output. - - output of chi2test and bad quartet statistics split, - to do the chi2test output earlier. - -5.0.a15 02.05.2000 - Names changed back from TREE-PUZZLE to PUZZLE - 09.05.2000 - and to TREE-PUZZLE again ;-) - -5.0.a14 13.03.2000 - Changes to the manual. - - Executable names changed to (p)treepuzzle. - (changes in the makefiles) - 15.03.2000 - Output of parameters after estimation added. - -5.0.a13 18.02.2000 - ALPHA version number removed from the code - -5.0.a12 18.02.2000 - CPU time measurement problems fixed for case where - clock_t is an unsigned type. - -5.0.a11 17.02.2000 - time measure problems (CPU/wallclock) fixed - not all features in addtimes are used at the moment. - - unnecessary and unused routines removed fron source - code. - -5.0.a10 20.01.2000 - Name changes from PUZZLE to TREE-PUZZLE - - Chi2-fit model guessing for VT model added - - little model printing bug fixed - -5.0.a9 22.12.1999 - VT Model incorporated (Mueller, Vingron (2000) - JCB, to appear). - - TODO: Chi2-fit model guessing for VT model - -5.0.a8 21.12.1999 - 'sys/times.h' and 'sys/types.h' removed from - puzzle.h. They were neither ANSI conform nor - necessary, but occured in the SUN man pages. - - Definition and call of writetimesstat eliminated - from the sequention version by compiler switched, - and not just the function body as before. - - '-O4' canged to '-O' to be more generic. - -5.0.a7 21.12.1999 - Macro constants introduced for data_optn - (NUCLEOTIDE, AMINOACID, BINARY) - - round robbing of datatype and AA model option changed - in menu to make adjustment of the model possible by a - determined sequence of letters: - 'd': Auto -> Nucleotides - -> Amino acids - -> Binary states - -> Auto - ('m' && data_optn == AMINOACID): - Auto -> Dayhoff - -> JTT - -> mtREV24 - -> BLOSUM62 - -> Auto - - manual.html adjusted - -5.0.a6 20.12.1999 - new manual.html added - -5.0.a5 07.12.1999 - output bug fixed (bestrates were written before they - were computed) - -5.0.a4 02.12.1999 - header file inclusion ajusted: - added: #include - changed from: #include "ppuzzle.h" - to: #ifdef PARALLEL - # include "ppuzzle.h" - #endif - -5.0.a3 27.11.1999 - '-h' comandline option removed, because of problems - with MPICH under LINUX - - new memory leaks of 5.0.a2 closed in PP_Finalize - -5.0.a2 27.11.1999 - Cleanup of the source code - - Measurement of CPU time added - - Parallel load statistics added (quartets, trees, time) - to puzzle report. - - Cleanup debug messages - - Comments "[...]" are removed from usertrees now. - - single quotes will only be printed arount species - names if -DUSEQUOTES is set at compiletime. - - tree likelihood is printed infront of a tree as a - comment, [ lh=-xx.xxxxx ](...); - -5.0.a1 26.11.1999 - Cleanup of the directories - - Copyright changes - - Version changes - - -VERSION CHANGE -============== - -4.1.a26 25.11.1999 - Makefile made universal for pauzzle and ppuzzle - - lines not needed removed from puzzle.h - -4.1.a25 19.11.1999 - Output file prefixes for distances, trees, and - puzzlereport changed in user trees analysis case - to user tree file name - - Temporary output of likelihood to treefile added - -4.1.a24 11.11.1999 - Output of puzzling step trees changed - ptorder: [ orderno # % ID #UniqTopos #Steps ]PHYLIP - pstep: chunk #InChunk sum ID #UniqTopos #Steps - - preliminary leap frog RNG implemented, i.e. uses - the rand4 in the usual way in the sequential case. - If run in parallel all rand4 are initialized with - the same seed and started with PP_Myid-th random - number. after that each process uses the every - PP_NumProcs-th random number to make sure that these - unique. - -4.1.a23 08.11.1999 - output of sequential and parallel version to *.pstep - made identical - -4.1.a22 05.11.1999 - two different puzzle step tree outputs intruduced - and added to the menu ("[ 1. 35 ](...);": - - ordered unique tree list -> *.ptorder - Format: "[ 1. 35 ]" (Ordernumber, Amount) - - chronological tree list -> *.pstep - Format: "[ 1. 35 ]" (Chunknumber, Amount in chunk) - (the last is a problem in parallel, because the come - in chunks, as scheduled) - - debugged the output -4.1.a21 04.11.1999 - Makefile adjustments for other Plattforms - - pstep tree output changed. unique treestructures - printed to *.pstep file with a leading comment - containing an order number and the ammount padded - with blanks (e.g. "[ 1. 356 ]('mouse'..."). - output is done right before writing the puzzle file. - - controlled MPI finish to the Quit menu option added - -4.1.a20 03.11.1999 - some garbage collection (free) added - - makefile adjusted, OFLAGS for optimization added - (ppuzzle/MPICH has problems with -O, so the - ppuzzle is created without optimization) - Some minor changes in the makefiles - - still to do: garbage collection from 'internalnode' - in master process - -4.1.a19 13.10.1999 - adding the output of standardized (i.e. sorted) - puzzling step trees. Those are printed to the - standard output at the moment. (Routines to sort - and print the trees implemented) - 14.10.1999 - routines for printing the sorted trees to a string. - needed to send them between Master and Worker, and - to have a unique key to sort and count the trees. - 21.10.1999 - counting of sorted trees implemented by doubly linked - list, sort routine, print to stdout - 25.10.1999 - change place of writing distances to file right after - distances have been computed. - - output of puzzling step trees now with true name, - not numbers - 02.11.1999 - parallel counting and sending of puzzling step trees - - some parallel sending bugs fixed - -4.1.a18 14.09.1999 - adding possibility to specify input file at - command line, this specifies also the output - filenames (puzzle output: *.puzzle; treefile: - *.tree; distances: *.dist; Triangel EPS: *.eps; - unresolved: *.qlist; puzzling step trees: *.pstep) - If an unexisting name is given, one has to reenter - the right name, but the wrong one is used as prefix. - 15.09.1999 - sending back of bad quartets from slaves added - - bug in quart2num fixed (not used before; was shifted - by 1) - - first version of a README added ;-) - -4.1.a17 03.08.1999 - Recv-Error in receiving DoPuzzleBlock fixed - - double freeing of same MPI_Datatype fixed - - changing of scheduling algorithm to smaller chunks - in gss -> sgss - 13.09.1999 - bug fixed in optimization routine in ml2.c: - boundary check added - -4.1.a16 12.07.1999 - slight changes in verbosity levels - - changed all printf to FPRINTF(STDOUTFILE to - change easily from stdout to a file. - -4.1.a15 08.07.1999 - scheduler for both parallel parts - - several small changes - -4.1.a14 25.06.1999 - computation of tree parallel, scheduler dependent, - sending all biparts in one message instead of one - by one - - several small changes since a13 in sched.c, et al. - -4.1.a13 10.06.1999 - computation of tree parallel (chunk = #trees/#slaves) - - scheduling schemes implemented for minimum chunk sizes - -4.1.a12 07.06.1999 - computation of quartets properly parallel - - scheduling implemented - - counting of quartets by slave ajusted - - TODO: sending of bad quartets (array + list) - - distinction between '1st user tree' and 'NJ tree' - in result output removed again - -4.1.a11 28.05.1999 - PP_SendDoQuartBlock, PP_RecvDoQuartBlock, - PP_SendQuartBlock, PP_RecvQuartBlock - - mallocquartets() changed from global to local - variables to be more flexible - - Quartet computation moved to slave (badquartet - handling missing: output, badquartet vector); - - distinction between '1st user tree' and 'NJ tree' - added in result output (puzzle1.c around l.1756) - -4.1.a10 20.05.1999 - num2quart, numquarts, quart2num introduced - - parallel init/finalize, quartets computed on - master and slave, compared -> equal -> all necessary - parameter exported - -4.1.a9 19.05.1999 - 'dvector forg' removed from onepamratematrix - cmdline, because it's not used in the function. - -4.1.a8 18.05.1999 - add _GAMMA_ (not necessary) to gamma.h and _PUZZLE_ - to puzzle.h to avoid dublicate includes, possible - due to ppuzzle.h - - ppuzzle added to makefile and to check - - 1st parallel version but no slave computations - only sending parameters and done signals. - -4.1.a7 18.05.1999 - export reevaluation of tree and evaluation of - usertrees to evaluatetree. - -4.1.a6 17.05.1999 - -DNEWFORLOOP added to fixed.src, because the changed - for loop structure changes the sequence of randomized - quartets during likelihood mapping - - change 'int main()' to 'int main(argc, argv)' - - export more functionalities from main: - memcleanup(), inputandinit(&argc, &argv) - - grouping if's (excluding eachother) together in - switch() - - split treereavaluation and 1st usertree, - evaluate all usertrees together (TODO: both, - treereavaluation and usertrees in one loop) - - MAKE CHECK added to ./makefile - -4.1.a5 16.05.1999 - adding ´dvector Brnlength´ to lslength cmdline to - reduce globality of Brnlength. (Later better to *Tree) - -4.1.a4 11.05.1999 - structure of for loops changed in computeallquartets - and recon_tree, so that the quarted addresses are in - one contigous sequence (for a /dev/null \ - || cp -p $$d/$$file $(distdir)/$$file || :; \ - fi; \ - done - for subdir in $(SUBDIRS); do \ - if test "$$subdir" = .; then :; else \ - test -d $(distdir)/$$subdir \ - || mkdir $(distdir)/$$subdir \ - || exit 1; \ - chmod 777 $(distdir)/$$subdir; \ - (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir=../$(distdir) distdir=../$(distdir)/$$subdir distdir) \ - || exit 1; \ - fi; \ - done -info-am: -info: info-recursive -dvi-am: -dvi: dvi-recursive -check-am: all-am -check: check-recursive -installcheck-am: -installcheck: installcheck-recursive -install-exec-am: -install-exec: install-exec-recursive - -install-data-am: -install-data: install-data-recursive - -install-am: all-am - @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am -install: install-recursive -uninstall-am: -uninstall: uninstall-recursive -all-am: Makefile -all-redirect: all-recursive -install-strip: - $(MAKE) $(AM_MAKEFLAGS) AM_INSTALL_PROGRAM_FLAGS=-s install -installdirs: installdirs-recursive -installdirs-am: - - -mostlyclean-generic: - -clean-generic: - -distclean-generic: - -rm -f Makefile $(CONFIG_CLEAN_FILES) - -rm -f config.cache config.log stamp-h stamp-h[0-9]* - -maintainer-clean-generic: -mostlyclean-am: mostlyclean-tags mostlyclean-generic - -mostlyclean: mostlyclean-recursive - -clean-am: clean-tags clean-generic mostlyclean-am - -clean: clean-recursive - -distclean-am: distclean-tags distclean-generic clean-am - -distclean: distclean-recursive - -rm -f config.status - -maintainer-clean-am: maintainer-clean-tags maintainer-clean-generic \ - distclean-am - @echo "This command is intended for maintainers to use;" - @echo "it deletes files that may require special tools to rebuild." - -maintainer-clean: maintainer-clean-recursive - -rm -f config.status - -.PHONY: install-data-recursive uninstall-data-recursive \ -install-exec-recursive uninstall-exec-recursive installdirs-recursive \ -uninstalldirs-recursive all-recursive check-recursive \ -installcheck-recursive info-recursive dvi-recursive \ -mostlyclean-recursive distclean-recursive clean-recursive \ -maintainer-clean-recursive tags tags-recursive mostlyclean-tags \ -distclean-tags clean-tags maintainer-clean-tags distdir info-am info \ -dvi-am dvi check check-am installcheck-am installcheck install-exec-am \ -install-exec install-data-am install-data install-am install \ -uninstall-am uninstall all-redirect all-am all installdirs-am \ -installdirs mostlyclean-generic distclean-generic clean-generic \ -maintainer-clean-generic clean mostlyclean distclean maintainer-clean - - -# Tell versions [3.59,3.63) of GNU make to not export all variables. -# Otherwise a system limit (for SysV at least) may be exceeded. -.NOEXPORT: diff --git a/forester/archive/RIO/others/puzzle_dqo/Makefile.am b/forester/archive/RIO/others/puzzle_dqo/Makefile.am deleted file mode 100644 index 2a0bac6..0000000 --- a/forester/archive/RIO/others/puzzle_dqo/Makefile.am +++ /dev/null @@ -1,2 +0,0 @@ -EXTRA_DIST = -SUBDIRS = src doc data diff --git a/forester/archive/RIO/others/puzzle_dqo/Makefile.in b/forester/archive/RIO/others/puzzle_dqo/Makefile.in deleted file mode 100644 index 38b4d60..0000000 --- a/forester/archive/RIO/others/puzzle_dqo/Makefile.in +++ /dev/null @@ -1,327 +0,0 @@ -# Makefile.in generated automatically by automake 1.4 from Makefile.am - -# Copyright (C) 1994, 1995-8, 1999 Free Software Foundation, Inc. -# This Makefile.in is free software; the Free Software Foundation -# gives unlimited permission to copy and/or distribute it, -# with or without modifications, as long as this notice is preserved. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY, to the extent permitted by law; without -# even the implied warranty of MERCHANTABILITY or FITNESS FOR A -# PARTICULAR PURPOSE. - - -SHELL = @SHELL@ - -srcdir = @srcdir@ -top_srcdir = @top_srcdir@ -VPATH = @srcdir@ -prefix = @prefix@ -exec_prefix = @exec_prefix@ - -bindir = @bindir@ -sbindir = @sbindir@ -libexecdir = @libexecdir@ -datadir = @datadir@ -sysconfdir = @sysconfdir@ -sharedstatedir = @sharedstatedir@ -localstatedir = @localstatedir@ -libdir = @libdir@ -infodir = @infodir@ -mandir = @mandir@ -includedir = @includedir@ -oldincludedir = /usr/include - -DESTDIR = - -pkgdatadir = $(datadir)/@PACKAGE@ -pkglibdir = $(libdir)/@PACKAGE@ -pkgincludedir = $(includedir)/@PACKAGE@ - -top_builddir = . - -ACLOCAL = @ACLOCAL@ -AUTOCONF = @AUTOCONF@ -AUTOMAKE = @AUTOMAKE@ -AUTOHEADER = @AUTOHEADER@ - -INSTALL = @INSTALL@ -INSTALL_PROGRAM = @INSTALL_PROGRAM@ $(AM_INSTALL_PROGRAM_FLAGS) -INSTALL_DATA = @INSTALL_DATA@ -INSTALL_SCRIPT = @INSTALL_SCRIPT@ -transform = @program_transform_name@ - -NORMAL_INSTALL = : -PRE_INSTALL = : -POST_INSTALL = : -NORMAL_UNINSTALL = : -PRE_UNINSTALL = : -POST_UNINSTALL = : -CC = @CC@ -MAKEINFO = @MAKEINFO@ -MPICC = @MPICC@ -MPICC0 = @MPICC0@ -MPICC1 = @MPICC1@ -MPICC2 = @MPICC2@ -MPICC3 = @MPICC3@ -MPICC4 = @MPICC4@ -MPICC5 = @MPICC5@ -MPICFLAGS = @MPICFLAGS@ -MPIDEFS = @MPIDEFS@ -MPILIBS = @MPILIBS@ -PACKAGE = @PACKAGE@ -PPUZZLE = @PPUZZLE@ -VERSION = @VERSION@ - -EXTRA_DIST = -SUBDIRS = src doc data -ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 -mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs -CONFIG_CLEAN_FILES = -DIST_COMMON = README AUTHORS COPYING ChangeLog INSTALL Makefile.am \ -Makefile.in NEWS aclocal.m4 configure configure.in install-sh missing \ -mkinstalldirs - - -DISTFILES = $(DIST_COMMON) $(SOURCES) $(HEADERS) $(TEXINFOS) $(EXTRA_DIST) - -TAR = tar -GZIP_ENV = --best -all: all-redirect -.SUFFIXES: -$(srcdir)/Makefile.in: Makefile.am $(top_srcdir)/configure.in $(ACLOCAL_M4) - cd $(top_srcdir) && $(AUTOMAKE) --gnu --include-deps Makefile - -Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status - cd $(top_builddir) \ - && CONFIG_FILES=$@ CONFIG_HEADERS= $(SHELL) ./config.status - -$(ACLOCAL_M4): configure.in - cd $(srcdir) && $(ACLOCAL) - -config.status: $(srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) - $(SHELL) ./config.status --recheck -$(srcdir)/configure: $(srcdir)/configure.in $(ACLOCAL_M4) $(CONFIGURE_DEPENDENCIES) - cd $(srcdir) && $(AUTOCONF) - -# This directory's subdirectories are mostly independent; you can cd -# into them and run `make' without going through this Makefile. -# To change the values of `make' variables: instead of editing Makefiles, -# (1) if the variable is set in `config.status', edit `config.status' -# (which will cause the Makefiles to be regenerated when you run `make'); -# (2) otherwise, pass the desired values on the `make' command line. - -@SET_MAKE@ - -all-recursive install-data-recursive install-exec-recursive \ -installdirs-recursive install-recursive uninstall-recursive \ -check-recursive installcheck-recursive info-recursive dvi-recursive: - @set fnord $(MAKEFLAGS); amf=$$2; \ - dot_seen=no; \ - target=`echo $@ | sed s/-recursive//`; \ - list='$(SUBDIRS)'; for subdir in $$list; do \ - echo "Making $$target in $$subdir"; \ - if test "$$subdir" = "."; then \ - dot_seen=yes; \ - local_target="$$target-am"; \ - else \ - local_target="$$target"; \ - fi; \ - (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ - || case "$$amf" in *=*) exit 1;; *k*) fail=yes;; *) exit 1;; esac; \ - done; \ - if test "$$dot_seen" = "no"; then \ - $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ - fi; test -z "$$fail" - -mostlyclean-recursive clean-recursive distclean-recursive \ -maintainer-clean-recursive: - @set fnord $(MAKEFLAGS); amf=$$2; \ - dot_seen=no; \ - rev=''; list='$(SUBDIRS)'; for subdir in $$list; do \ - rev="$$subdir $$rev"; \ - test "$$subdir" = "." && dot_seen=yes; \ - done; \ - test "$$dot_seen" = "no" && rev=". $$rev"; \ - target=`echo $@ | sed s/-recursive//`; \ - for subdir in $$rev; do \ - echo "Making $$target in $$subdir"; \ - if test "$$subdir" = "."; then \ - local_target="$$target-am"; \ - else \ - local_target="$$target"; \ - fi; \ - (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ - || case "$$amf" in *=*) exit 1;; *k*) fail=yes;; *) exit 1;; esac; \ - done && test -z "$$fail" -tags-recursive: - list='$(SUBDIRS)'; for subdir in $$list; do \ - test "$$subdir" = . || (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \ - done - -tags: TAGS - -ID: $(HEADERS) $(SOURCES) $(LISP) - list='$(SOURCES) $(HEADERS)'; \ - unique=`for i in $$list; do echo $$i; done | \ - awk ' { files[$$0] = 1; } \ - END { for (i in files) print i; }'`; \ - here=`pwd` && cd $(srcdir) \ - && mkid -f$$here/ID $$unique $(LISP) - -TAGS: tags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) $(LISP) - tags=; \ - here=`pwd`; \ - list='$(SUBDIRS)'; for subdir in $$list; do \ - if test "$$subdir" = .; then :; else \ - test -f $$subdir/TAGS && tags="$$tags -i $$here/$$subdir/TAGS"; \ - fi; \ - done; \ - list='$(SOURCES) $(HEADERS)'; \ - unique=`for i in $$list; do echo $$i; done | \ - awk ' { files[$$0] = 1; } \ - END { for (i in files) print i; }'`; \ - test -z "$(ETAGS_ARGS)$$unique$(LISP)$$tags" \ - || (cd $(srcdir) && etags $(ETAGS_ARGS) $$tags $$unique $(LISP) -o $$here/TAGS) - -mostlyclean-tags: - -clean-tags: - -distclean-tags: - -rm -f TAGS ID - -maintainer-clean-tags: - -distdir = $(PACKAGE)-$(VERSION) -top_distdir = $(distdir) - -# This target untars the dist file and tries a VPATH configuration. Then -# it guarantees that the distribution is self-contained by making another -# tarfile. -distcheck: dist - -rm -rf $(distdir) - GZIP=$(GZIP_ENV) $(TAR) zxf $(distdir).tar.gz - mkdir $(distdir)/=build - mkdir $(distdir)/=inst - dc_install_base=`cd $(distdir)/=inst && pwd`; \ - cd $(distdir)/=build \ - && ../configure --srcdir=.. --prefix=$$dc_install_base \ - && $(MAKE) $(AM_MAKEFLAGS) \ - && $(MAKE) $(AM_MAKEFLAGS) dvi \ - && $(MAKE) $(AM_MAKEFLAGS) check \ - && $(MAKE) $(AM_MAKEFLAGS) install \ - && $(MAKE) $(AM_MAKEFLAGS) installcheck \ - && $(MAKE) $(AM_MAKEFLAGS) dist - -rm -rf $(distdir) - @banner="$(distdir).tar.gz is ready for distribution"; \ - dashes=`echo "$$banner" | sed s/./=/g`; \ - echo "$$dashes"; \ - echo "$$banner"; \ - echo "$$dashes" -dist: distdir - -chmod -R a+r $(distdir) - GZIP=$(GZIP_ENV) $(TAR) chozf $(distdir).tar.gz $(distdir) - -rm -rf $(distdir) -dist-all: distdir - -chmod -R a+r $(distdir) - GZIP=$(GZIP_ENV) $(TAR) chozf $(distdir).tar.gz $(distdir) - -rm -rf $(distdir) -distdir: $(DISTFILES) - -rm -rf $(distdir) - mkdir $(distdir) - -chmod 777 $(distdir) - @for file in $(DISTFILES); do \ - d=$(srcdir); \ - if test -d $$d/$$file; then \ - cp -pr $$/$$file $(distdir)/$$file; \ - else \ - test -f $(distdir)/$$file \ - || ln $$d/$$file $(distdir)/$$file 2> /dev/null \ - || cp -p $$d/$$file $(distdir)/$$file || :; \ - fi; \ - done - for subdir in $(SUBDIRS); do \ - if test "$$subdir" = .; then :; else \ - test -d $(distdir)/$$subdir \ - || mkdir $(distdir)/$$subdir \ - || exit 1; \ - chmod 777 $(distdir)/$$subdir; \ - (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir=../$(distdir) distdir=../$(distdir)/$$subdir distdir) \ - || exit 1; \ - fi; \ - done -info-am: -info: info-recursive -dvi-am: -dvi: dvi-recursive -check-am: all-am -check: check-recursive -installcheck-am: -installcheck: installcheck-recursive -install-exec-am: -install-exec: install-exec-recursive - -install-data-am: -install-data: install-data-recursive - -install-am: all-am - @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am -install: install-recursive -uninstall-am: -uninstall: uninstall-recursive -all-am: Makefile -all-redirect: all-recursive -install-strip: - $(MAKE) $(AM_MAKEFLAGS) AM_INSTALL_PROGRAM_FLAGS=-s install -installdirs: installdirs-recursive -installdirs-am: - - -mostlyclean-generic: - -clean-generic: - -distclean-generic: - -rm -f Makefile $(CONFIG_CLEAN_FILES) - -rm -f config.cache config.log stamp-h stamp-h[0-9]* - -maintainer-clean-generic: -mostlyclean-am: mostlyclean-tags mostlyclean-generic - -mostlyclean: mostlyclean-recursive - -clean-am: clean-tags clean-generic mostlyclean-am - -clean: clean-recursive - -distclean-am: distclean-tags distclean-generic clean-am - -distclean: distclean-recursive - -rm -f config.status - -maintainer-clean-am: maintainer-clean-tags maintainer-clean-generic \ - distclean-am - @echo "This command is intended for maintainers to use;" - @echo "it deletes files that may require special tools to rebuild." - -maintainer-clean: maintainer-clean-recursive - -rm -f config.status - -.PHONY: install-data-recursive uninstall-data-recursive \ -install-exec-recursive uninstall-exec-recursive installdirs-recursive \ -uninstalldirs-recursive all-recursive check-recursive \ -installcheck-recursive info-recursive dvi-recursive \ -mostlyclean-recursive distclean-recursive clean-recursive \ -maintainer-clean-recursive tags tags-recursive mostlyclean-tags \ -distclean-tags clean-tags maintainer-clean-tags distdir info-am info \ -dvi-am dvi check check-am installcheck-am installcheck install-exec-am \ -install-exec install-data-am install-data install-am install \ -uninstall-am uninstall all-redirect all-am all installdirs-am \ -installdirs mostlyclean-generic distclean-generic clean-generic \ -maintainer-clean-generic clean mostlyclean distclean maintainer-clean - - -# Tell versions [3.59,3.63) of GNU make to not export all variables. -# Otherwise a system limit (for SysV at least) may be exceeded. -.NOEXPORT: diff --git a/forester/archive/RIO/others/puzzle_dqo/aclocal.m4 b/forester/archive/RIO/others/puzzle_dqo/aclocal.m4 deleted file mode 100644 index 9f8add8..0000000 --- a/forester/archive/RIO/others/puzzle_dqo/aclocal.m4 +++ /dev/null @@ -1,104 +0,0 @@ -dnl aclocal.m4 generated automatically by aclocal 1.4 - -dnl Copyright (C) 1994, 1995-8, 1999 Free Software Foundation, Inc. -dnl This file is free software; the Free Software Foundation -dnl gives unlimited permission to copy and/or distribute it, -dnl with or without modifications, as long as this notice is preserved. - -dnl This program is distributed in the hope that it will be useful, -dnl but WITHOUT ANY WARRANTY, to the extent permitted by law; without -dnl even the implied warranty of MERCHANTABILITY or FITNESS FOR A -dnl PARTICULAR PURPOSE. - -# Do all the work for Automake. This macro actually does too much -- -# some checks are only needed if your package does certain things. -# But this isn't really a big deal. - -# serial 1 - -dnl Usage: -dnl AM_INIT_AUTOMAKE(package,version, [no-define]) - -AC_DEFUN(AM_INIT_AUTOMAKE, -[AC_REQUIRE([AC_PROG_INSTALL]) -PACKAGE=[$1] -AC_SUBST(PACKAGE) -VERSION=[$2] -AC_SUBST(VERSION) -dnl test to see if srcdir already configured -if test "`cd $srcdir && pwd`" != "`pwd`" && test -f $srcdir/config.status; then - AC_MSG_ERROR([source directory already configured; run "make distclean" there first]) -fi -ifelse([$3],, -AC_DEFINE_UNQUOTED(PACKAGE, "$PACKAGE", [Name of package]) -AC_DEFINE_UNQUOTED(VERSION, "$VERSION", [Version number of package])) -AC_REQUIRE([AM_SANITY_CHECK]) -AC_REQUIRE([AC_ARG_PROGRAM]) -dnl FIXME This is truly gross. -missing_dir=`cd $ac_aux_dir && pwd` -AM_MISSING_PROG(ACLOCAL, aclocal, $missing_dir) -AM_MISSING_PROG(AUTOCONF, autoconf, $missing_dir) -AM_MISSING_PROG(AUTOMAKE, automake, $missing_dir) -AM_MISSING_PROG(AUTOHEADER, autoheader, $missing_dir) -AM_MISSING_PROG(MAKEINFO, makeinfo, $missing_dir) -AC_REQUIRE([AC_PROG_MAKE_SET])]) - -# -# Check to make sure that the build environment is sane. -# - -AC_DEFUN(AM_SANITY_CHECK, -[AC_MSG_CHECKING([whether build environment is sane]) -# Just in case -sleep 1 -echo timestamp > conftestfile -# Do `set' in a subshell so we don't clobber the current shell's -# arguments. Must try -L first in case configure is actually a -# symlink; some systems play weird games with the mod time of symlinks -# (eg FreeBSD returns the mod time of the symlink's containing -# directory). -if ( - set X `ls -Lt $srcdir/configure conftestfile 2> /dev/null` - if test "[$]*" = "X"; then - # -L didn't work. - set X `ls -t $srcdir/configure conftestfile` - fi - if test "[$]*" != "X $srcdir/configure conftestfile" \ - && test "[$]*" != "X conftestfile $srcdir/configure"; then - - # If neither matched, then we have a broken ls. This can happen - # if, for instance, CONFIG_SHELL is bash and it inherits a - # broken ls alias from the environment. This has actually - # happened. Such a system could not be considered "sane". - AC_MSG_ERROR([ls -t appears to fail. Make sure there is not a broken -alias in your environment]) - fi - - test "[$]2" = conftestfile - ) -then - # Ok. - : -else - AC_MSG_ERROR([newly created file is older than distributed files! -Check your system clock]) -fi -rm -f conftest* -AC_MSG_RESULT(yes)]) - -dnl AM_MISSING_PROG(NAME, PROGRAM, DIRECTORY) -dnl The program must properly implement --version. -AC_DEFUN(AM_MISSING_PROG, -[AC_MSG_CHECKING(for working $2) -# Run test in a subshell; some versions of sh will print an error if -# an executable is not found, even if stderr is redirected. -# Redirect stdin to placate older versions of autoconf. Sigh. -if ($2 --version) < /dev/null > /dev/null 2>&1; then - $1=$2 - AC_MSG_RESULT(found) -else - $1="$3/missing $2" - AC_MSG_RESULT(missing) -fi -AC_SUBST($1)]) - diff --git a/forester/archive/RIO/others/puzzle_dqo/config.status b/forester/archive/RIO/others/puzzle_dqo/config.status deleted file mode 100755 index da58b56..0000000 --- a/forester/archive/RIO/others/puzzle_dqo/config.status +++ /dev/null @@ -1,179 +0,0 @@ -#! /bin/sh -# Generated automatically by configure. -# Run this file to recreate the current configuration. -# This directory was configured as follows, -# on host forester.wustl.edu: -# -# ./configure -# -# Compiler output produced by configure, useful for debugging -# configure, is in ./config.log if it exists. - -ac_cs_usage="Usage: ./config.status [--recheck] [--version] [--help]" -for ac_option -do - case "$ac_option" in - -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r) - echo "running ${CONFIG_SHELL-/bin/sh} ./configure --no-create --no-recursion" - exec ${CONFIG_SHELL-/bin/sh} ./configure --no-create --no-recursion ;; - -version | --version | --versio | --versi | --vers | --ver | --ve | --v) - echo "./config.status generated by autoconf version 2.13" - exit 0 ;; - -help | --help | --hel | --he | --h) - echo "$ac_cs_usage"; exit 0 ;; - *) echo "$ac_cs_usage"; exit 1 ;; - esac -done - -ac_given_srcdir=. -ac_given_INSTALL="/usr/bin/install -c" - -trap 'rm -fr Makefile src/Makefile src/test doc/Makefile data/Makefile conftest*; exit 1' 1 2 15 - -# Protect against being on the right side of a sed subst in config.status. -sed 's/%@/@@/; s/@%/@@/; s/%g$/@g/; /@g$/s/[\\&%]/\\&/g; - s/@@/%@/; s/@@/@%/; s/@g$/%g/' > conftest.subs <<\CEOF -/^[ ]*VPATH[ ]*=[^:]*$/d - -s%@SHELL@%/bin/sh%g -s%@CFLAGS@%-g -O2%g -s%@CPPFLAGS@%%g -s%@CXXFLAGS@%%g -s%@FFLAGS@%%g -s%@DEFS@% -DPACKAGE=\"tree-puzzle\" -DVERSION=\"5.0\" -DHAVE_LIBM=1 -DSTDC_HEADERS=1 -DHAVE_LIMITS_H=1 %g -s%@LDFLAGS@%%g -s%@LIBS@%-lm %g -s%@exec_prefix@%${prefix}%g -s%@prefix@%/usr/local%g -s%@program_transform_name@%s,x,x,%g -s%@bindir@%${exec_prefix}/bin%g -s%@sbindir@%${exec_prefix}/sbin%g -s%@libexecdir@%${exec_prefix}/libexec%g -s%@datadir@%${prefix}/share%g -s%@sysconfdir@%${prefix}/etc%g -s%@sharedstatedir@%${prefix}/com%g -s%@localstatedir@%${prefix}/var%g -s%@libdir@%${exec_prefix}/lib%g -s%@includedir@%${prefix}/include%g -s%@oldincludedir@%/usr/include%g -s%@infodir@%${prefix}/info%g -s%@mandir@%${prefix}/man%g -s%@INSTALL_PROGRAM@%${INSTALL}%g -s%@INSTALL_SCRIPT@%${INSTALL_PROGRAM}%g -s%@INSTALL_DATA@%${INSTALL} -m 644%g -s%@PACKAGE@%tree-puzzle%g -s%@VERSION@%5.0%g -s%@ACLOCAL@%aclocal%g -s%@AUTOCONF@%autoconf%g -s%@AUTOMAKE@%automake%g -s%@AUTOHEADER@%autoheader%g -s%@MAKEINFO@%makeinfo%g -s%@SET_MAKE@%%g -s%@CC@%gcc%g -s%@MPICC0@%%g -s%@MPICC1@%%g -s%@MPICC2@%%g -s%@MPICC3@%%g -s%@MPICC4@%%g -s%@MPICC5@%%g -s%@MPICC@%%g -s%@MPILIBS@%%g -s%@MPIDEFS@%%g -s%@MPICFLAGS@%%g -s%@PPUZZLE@%%g -s%@CPP@%gcc -E%g - -CEOF - -# Split the substitutions into bite-sized pieces for seds with -# small command number limits, like on Digital OSF/1 and HP-UX. -ac_max_sed_cmds=90 # Maximum number of lines to put in a sed script. -ac_file=1 # Number of current file. -ac_beg=1 # First line for current file. -ac_end=$ac_max_sed_cmds # Line after last line for current file. -ac_more_lines=: -ac_sed_cmds="" -while $ac_more_lines; do - if test $ac_beg -gt 1; then - sed "1,${ac_beg}d; ${ac_end}q" conftest.subs > conftest.s$ac_file - else - sed "${ac_end}q" conftest.subs > conftest.s$ac_file - fi - if test ! -s conftest.s$ac_file; then - ac_more_lines=false - rm -f conftest.s$ac_file - else - if test -z "$ac_sed_cmds"; then - ac_sed_cmds="sed -f conftest.s$ac_file" - else - ac_sed_cmds="$ac_sed_cmds | sed -f conftest.s$ac_file" - fi - ac_file=`expr $ac_file + 1` - ac_beg=$ac_end - ac_end=`expr $ac_end + $ac_max_sed_cmds` - fi -done -if test -z "$ac_sed_cmds"; then - ac_sed_cmds=cat -fi - -CONFIG_FILES=${CONFIG_FILES-"Makefile src/Makefile src/test doc/Makefile data/Makefile"} -for ac_file in .. $CONFIG_FILES; do if test "x$ac_file" != x..; then - # Support "outfile[:infile[:infile...]]", defaulting infile="outfile.in". - case "$ac_file" in - *:*) ac_file_in=`echo "$ac_file"|sed 's%[^:]*:%%'` - ac_file=`echo "$ac_file"|sed 's%:.*%%'` ;; - *) ac_file_in="${ac_file}.in" ;; - esac - - # Adjust a relative srcdir, top_srcdir, and INSTALL for subdirectories. - - # Remove last slash and all that follows it. Not all systems have dirname. - ac_dir=`echo $ac_file|sed 's%/[^/][^/]*$%%'` - if test "$ac_dir" != "$ac_file" && test "$ac_dir" != .; then - # The file is in a subdirectory. - test ! -d "$ac_dir" && mkdir "$ac_dir" - ac_dir_suffix="/`echo $ac_dir|sed 's%^\./%%'`" - # A "../" for each directory in $ac_dir_suffix. - ac_dots=`echo $ac_dir_suffix|sed 's%/[^/]*%../%g'` - else - ac_dir_suffix= ac_dots= - fi - - case "$ac_given_srcdir" in - .) srcdir=. - if test -z "$ac_dots"; then top_srcdir=. - else top_srcdir=`echo $ac_dots|sed 's%/$%%'`; fi ;; - /*) srcdir="$ac_given_srcdir$ac_dir_suffix"; top_srcdir="$ac_given_srcdir" ;; - *) # Relative path. - srcdir="$ac_dots$ac_given_srcdir$ac_dir_suffix" - top_srcdir="$ac_dots$ac_given_srcdir" ;; - esac - - case "$ac_given_INSTALL" in - [/$]*) INSTALL="$ac_given_INSTALL" ;; - *) INSTALL="$ac_dots$ac_given_INSTALL" ;; - esac - - echo creating "$ac_file" - rm -f "$ac_file" - configure_input="Generated automatically from `echo $ac_file_in|sed 's%.*/%%'` by configure." - case "$ac_file" in - *Makefile*) ac_comsub="1i\\ -# $configure_input" ;; - *) ac_comsub= ;; - esac - - ac_file_inputs=`echo $ac_file_in|sed -e "s%^%$ac_given_srcdir/%" -e "s%:% $ac_given_srcdir/%g"` - sed -e "$ac_comsub -s%@configure_input@%$configure_input%g -s%@srcdir@%$srcdir%g -s%@top_srcdir@%$top_srcdir%g -s%@INSTALL@%$INSTALL%g -" $ac_file_inputs | (eval "$ac_sed_cmds") > $ac_file -fi; done -rm -f conftest.s* - - - -exit 0 diff --git a/forester/archive/RIO/others/puzzle_dqo/configure b/forester/archive/RIO/others/puzzle_dqo/configure deleted file mode 100755 index 5d4db41..0000000 --- a/forester/archive/RIO/others/puzzle_dqo/configure +++ /dev/null @@ -1,2265 +0,0 @@ -#! /bin/sh - -# Guess values for system-dependent variables and create Makefiles. -# Generated automatically using autoconf version 2.13 -# Copyright (C) 1992, 93, 94, 95, 96 Free Software Foundation, Inc. -# -# This configure script is free software; the Free Software Foundation -# gives unlimited permission to copy, distribute and modify it. - -# Defaults: -ac_help= -ac_default_prefix=/usr/local -# Any additions from configure.in: - -# Initialize some variables set by options. -# The variables have the same names as the options, with -# dashes changed to underlines. -build=NONE -cache_file=./config.cache -exec_prefix=NONE -host=NONE -no_create= -nonopt=NONE -no_recursion= -prefix=NONE -program_prefix=NONE -program_suffix=NONE -program_transform_name=s,x,x, -silent= -site= -srcdir= -target=NONE -verbose= -x_includes=NONE -x_libraries=NONE -bindir='${exec_prefix}/bin' -sbindir='${exec_prefix}/sbin' -libexecdir='${exec_prefix}/libexec' -datadir='${prefix}/share' -sysconfdir='${prefix}/etc' -sharedstatedir='${prefix}/com' -localstatedir='${prefix}/var' -libdir='${exec_prefix}/lib' -includedir='${prefix}/include' -oldincludedir='/usr/include' -infodir='${prefix}/info' -mandir='${prefix}/man' - -# Initialize some other variables. -subdirs= -MFLAGS= MAKEFLAGS= -SHELL=${CONFIG_SHELL-/bin/sh} -# Maximum number of lines to put in a shell here document. -ac_max_here_lines=12 - -ac_prev= -for ac_option -do - - # If the previous option needs an argument, assign it. - if test -n "$ac_prev"; then - eval "$ac_prev=\$ac_option" - ac_prev= - continue - fi - - case "$ac_option" in - -*=*) ac_optarg=`echo "$ac_option" | sed 's/[-_a-zA-Z0-9]*=//'` ;; - *) ac_optarg= ;; - esac - - # Accept the important Cygnus configure options, so we can diagnose typos. - - case "$ac_option" in - - -bindir | --bindir | --bindi | --bind | --bin | --bi) - ac_prev=bindir ;; - -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*) - bindir="$ac_optarg" ;; - - -build | --build | --buil | --bui | --bu) - ac_prev=build ;; - -build=* | --build=* | --buil=* | --bui=* | --bu=*) - build="$ac_optarg" ;; - - -cache-file | --cache-file | --cache-fil | --cache-fi \ - | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c) - ac_prev=cache_file ;; - -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \ - | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*) - cache_file="$ac_optarg" ;; - - -datadir | --datadir | --datadi | --datad | --data | --dat | --da) - ac_prev=datadir ;; - -datadir=* | --datadir=* | --datadi=* | --datad=* | --data=* | --dat=* \ - | --da=*) - datadir="$ac_optarg" ;; - - -disable-* | --disable-*) - ac_feature=`echo $ac_option|sed -e 's/-*disable-//'` - # Reject names that are not valid shell variable names. - if test -n "`echo $ac_feature| sed 's/[-a-zA-Z0-9_]//g'`"; then - { echo "configure: error: $ac_feature: invalid feature name" 1>&2; exit 1; } - fi - ac_feature=`echo $ac_feature| sed 's/-/_/g'` - eval "enable_${ac_feature}=no" ;; - - -enable-* | --enable-*) - ac_feature=`echo $ac_option|sed -e 's/-*enable-//' -e 's/=.*//'` - # Reject names that are not valid shell variable names. - if test -n "`echo $ac_feature| sed 's/[-_a-zA-Z0-9]//g'`"; then - { echo "configure: error: $ac_feature: invalid feature name" 1>&2; exit 1; } - fi - ac_feature=`echo $ac_feature| sed 's/-/_/g'` - case "$ac_option" in - *=*) ;; - *) ac_optarg=yes ;; - esac - eval "enable_${ac_feature}='$ac_optarg'" ;; - - -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \ - | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \ - | --exec | --exe | --ex) - ac_prev=exec_prefix ;; - -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \ - | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \ - | --exec=* | --exe=* | --ex=*) - exec_prefix="$ac_optarg" ;; - - -gas | --gas | --ga | --g) - # Obsolete; use --with-gas. - with_gas=yes ;; - - -help | --help | --hel | --he) - # Omit some internal or obsolete options to make the list less imposing. - # This message is too long to be a string in the A/UX 3.1 sh. - cat << EOF -Usage: configure [options] [host] -Options: [defaults in brackets after descriptions] -Configuration: - --cache-file=FILE cache test results in FILE - --help print this message - --no-create do not create output files - --quiet, --silent do not print \`checking...' messages - --version print the version of autoconf that created configure -Directory and file names: - --prefix=PREFIX install architecture-independent files in PREFIX - [$ac_default_prefix] - --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX - [same as prefix] - --bindir=DIR user executables in DIR [EPREFIX/bin] - --sbindir=DIR system admin executables in DIR [EPREFIX/sbin] - --libexecdir=DIR program executables in DIR [EPREFIX/libexec] - --datadir=DIR read-only architecture-independent data in DIR - [PREFIX/share] - --sysconfdir=DIR read-only single-machine data in DIR [PREFIX/etc] - --sharedstatedir=DIR modifiable architecture-independent data in DIR - [PREFIX/com] - --localstatedir=DIR modifiable single-machine data in DIR [PREFIX/var] - --libdir=DIR object code libraries in DIR [EPREFIX/lib] - --includedir=DIR C header files in DIR [PREFIX/include] - --oldincludedir=DIR C header files for non-gcc in DIR [/usr/include] - --infodir=DIR info documentation in DIR [PREFIX/info] - --mandir=DIR man documentation in DIR [PREFIX/man] - --srcdir=DIR find the sources in DIR [configure dir or ..] - --program-prefix=PREFIX prepend PREFIX to installed program names - --program-suffix=SUFFIX append SUFFIX to installed program names - --program-transform-name=PROGRAM - run sed PROGRAM on installed program names -EOF - cat << EOF -Host type: - --build=BUILD configure for building on BUILD [BUILD=HOST] - --host=HOST configure for HOST [guessed] - --target=TARGET configure for TARGET [TARGET=HOST] -Features and packages: - --disable-FEATURE do not include FEATURE (same as --enable-FEATURE=no) - --enable-FEATURE[=ARG] include FEATURE [ARG=yes] - --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] - --without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no) - --x-includes=DIR X include files are in DIR - --x-libraries=DIR X library files are in DIR -EOF - if test -n "$ac_help"; then - echo "--enable and --with options recognized:$ac_help" - fi - exit 0 ;; - - -host | --host | --hos | --ho) - ac_prev=host ;; - -host=* | --host=* | --hos=* | --ho=*) - host="$ac_optarg" ;; - - -includedir | --includedir | --includedi | --included | --include \ - | --includ | --inclu | --incl | --inc) - ac_prev=includedir ;; - -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \ - | --includ=* | --inclu=* | --incl=* | --inc=*) - includedir="$ac_optarg" ;; - - -infodir | --infodir | --infodi | --infod | --info | --inf) - ac_prev=infodir ;; - -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*) - infodir="$ac_optarg" ;; - - -libdir | --libdir | --libdi | --libd) - ac_prev=libdir ;; - -libdir=* | --libdir=* | --libdi=* | --libd=*) - libdir="$ac_optarg" ;; - - -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \ - | --libexe | --libex | --libe) - ac_prev=libexecdir ;; - -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \ - | --libexe=* | --libex=* | --libe=*) - libexecdir="$ac_optarg" ;; - - -localstatedir | --localstatedir | --localstatedi | --localstated \ - | --localstate | --localstat | --localsta | --localst \ - | --locals | --local | --loca | --loc | --lo) - ac_prev=localstatedir ;; - -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \ - | --localstate=* | --localstat=* | --localsta=* | --localst=* \ - | --locals=* | --local=* | --loca=* | --loc=* | --lo=*) - localstatedir="$ac_optarg" ;; - - -mandir | --mandir | --mandi | --mand | --man | --ma | --m) - ac_prev=mandir ;; - -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*) - mandir="$ac_optarg" ;; - - -nfp | --nfp | --nf) - # Obsolete; use --without-fp. - with_fp=no ;; - - -no-create | --no-create | --no-creat | --no-crea | --no-cre \ - | --no-cr | --no-c) - no_create=yes ;; - - -no-recursion | --no-recursion | --no-recursio | --no-recursi \ - | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) - no_recursion=yes ;; - - -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \ - | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \ - | --oldin | --oldi | --old | --ol | --o) - ac_prev=oldincludedir ;; - -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \ - | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \ - | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*) - oldincludedir="$ac_optarg" ;; - - -prefix | --prefix | --prefi | --pref | --pre | --pr | --p) - ac_prev=prefix ;; - -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*) - prefix="$ac_optarg" ;; - - -program-prefix | --program-prefix | --program-prefi | --program-pref \ - | --program-pre | --program-pr | --program-p) - ac_prev=program_prefix ;; - -program-prefix=* | --program-prefix=* | --program-prefi=* \ - | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*) - program_prefix="$ac_optarg" ;; - - -program-suffix | --program-suffix | --program-suffi | --program-suff \ - | --program-suf | --program-su | --program-s) - ac_prev=program_suffix ;; - -program-suffix=* | --program-suffix=* | --program-suffi=* \ - | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*) - program_suffix="$ac_optarg" ;; - - -program-transform-name | --program-transform-name \ - | --program-transform-nam | --program-transform-na \ - | --program-transform-n | --program-transform- \ - | --program-transform | --program-transfor \ - | --program-transfo | --program-transf \ - | --program-trans | --program-tran \ - | --progr-tra | --program-tr | --program-t) - ac_prev=program_transform_name ;; - -program-transform-name=* | --program-transform-name=* \ - | --program-transform-nam=* | --program-transform-na=* \ - | --program-transform-n=* | --program-transform-=* \ - | --program-transform=* | --program-transfor=* \ - | --program-transfo=* | --program-transf=* \ - | --program-trans=* | --program-tran=* \ - | --progr-tra=* | --program-tr=* | --program-t=*) - program_transform_name="$ac_optarg" ;; - - -q | -quiet | --quiet | --quie | --qui | --qu | --q \ - | -silent | --silent | --silen | --sile | --sil) - silent=yes ;; - - -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb) - ac_prev=sbindir ;; - -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \ - | --sbi=* | --sb=*) - sbindir="$ac_optarg" ;; - - -sharedstatedir | --sharedstatedir | --sharedstatedi \ - | --sharedstated | --sharedstate | --sharedstat | --sharedsta \ - | --sharedst | --shareds | --shared | --share | --shar \ - | --sha | --sh) - ac_prev=sharedstatedir ;; - -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \ - | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \ - | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \ - | --sha=* | --sh=*) - sharedstatedir="$ac_optarg" ;; - - -site | --site | --sit) - ac_prev=site ;; - -site=* | --site=* | --sit=*) - site="$ac_optarg" ;; - - -srcdir | --srcdir | --srcdi | --srcd | --src | --sr) - ac_prev=srcdir ;; - -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*) - srcdir="$ac_optarg" ;; - - -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \ - | --syscon | --sysco | --sysc | --sys | --sy) - ac_prev=sysconfdir ;; - -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \ - | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*) - sysconfdir="$ac_optarg" ;; - - -target | --target | --targe | --targ | --tar | --ta | --t) - ac_prev=target ;; - -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*) - target="$ac_optarg" ;; - - -v | -verbose | --verbose | --verbos | --verbo | --verb) - verbose=yes ;; - - -version | --version | --versio | --versi | --vers) - echo "configure generated by autoconf version 2.13" - exit 0 ;; - - -with-* | --with-*) - ac_package=`echo $ac_option|sed -e 's/-*with-//' -e 's/=.*//'` - # Reject names that are not valid shell variable names. - if test -n "`echo $ac_package| sed 's/[-_a-zA-Z0-9]//g'`"; then - { echo "configure: error: $ac_package: invalid package name" 1>&2; exit 1; } - fi - ac_package=`echo $ac_package| sed 's/-/_/g'` - case "$ac_option" in - *=*) ;; - *) ac_optarg=yes ;; - esac - eval "with_${ac_package}='$ac_optarg'" ;; - - -without-* | --without-*) - ac_package=`echo $ac_option|sed -e 's/-*without-//'` - # Reject names that are not valid shell variable names. - if test -n "`echo $ac_package| sed 's/[-a-zA-Z0-9_]//g'`"; then - { echo "configure: error: $ac_package: invalid package name" 1>&2; exit 1; } - fi - ac_package=`echo $ac_package| sed 's/-/_/g'` - eval "with_${ac_package}=no" ;; - - --x) - # Obsolete; use --with-x. - with_x=yes ;; - - -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \ - | --x-incl | --x-inc | --x-in | --x-i) - ac_prev=x_includes ;; - -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \ - | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*) - x_includes="$ac_optarg" ;; - - -x-libraries | --x-libraries | --x-librarie | --x-librari \ - | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l) - ac_prev=x_libraries ;; - -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \ - | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*) - x_libraries="$ac_optarg" ;; - - -*) { echo "configure: error: $ac_option: invalid option; use --help to show usage" 1>&2; exit 1; } - ;; - - *) - if test -n "`echo $ac_option| sed 's/[-a-z0-9.]//g'`"; then - echo "configure: warning: $ac_option: invalid host type" 1>&2 - fi - if test "x$nonopt" != xNONE; then - { echo "configure: error: can only configure for one host and one target at a time" 1>&2; exit 1; } - fi - nonopt="$ac_option" - ;; - - esac -done - -if test -n "$ac_prev"; then - { echo "configure: error: missing argument to --`echo $ac_prev | sed 's/_/-/g'`" 1>&2; exit 1; } -fi - -trap 'rm -fr conftest* confdefs* core core.* *.core $ac_clean_files; exit 1' 1 2 15 - -# File descriptor usage: -# 0 standard input -# 1 file creation -# 2 errors and warnings -# 3 some systems may open it to /dev/tty -# 4 used on the Kubota Titan -# 6 checking for... messages and results -# 5 compiler messages saved in config.log -if test "$silent" = yes; then - exec 6>/dev/null -else - exec 6>&1 -fi -exec 5>./config.log - -echo "\ -This file contains any messages produced by compilers while -running configure, to aid debugging if configure makes a mistake. -" 1>&5 - -# Strip out --no-create and --no-recursion so they do not pile up. -# Also quote any args containing shell metacharacters. -ac_configure_args= -for ac_arg -do - case "$ac_arg" in - -no-create | --no-create | --no-creat | --no-crea | --no-cre \ - | --no-cr | --no-c) ;; - -no-recursion | --no-recursion | --no-recursio | --no-recursi \ - | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) ;; - *" "*|*" "*|*[\[\]\~\#\$\^\&\*\(\)\{\}\\\|\;\<\>\?]*) - ac_configure_args="$ac_configure_args '$ac_arg'" ;; - *) ac_configure_args="$ac_configure_args $ac_arg" ;; - esac -done - -# NLS nuisances. -# Only set these to C if already set. These must not be set unconditionally -# because not all systems understand e.g. LANG=C (notably SCO). -# Fixing LC_MESSAGES prevents Solaris sh from translating var values in `set'! -# Non-C LC_CTYPE values break the ctype check. -if test "${LANG+set}" = set; then LANG=C; export LANG; fi -if test "${LC_ALL+set}" = set; then LC_ALL=C; export LC_ALL; fi -if test "${LC_MESSAGES+set}" = set; then LC_MESSAGES=C; export LC_MESSAGES; fi -if test "${LC_CTYPE+set}" = set; then LC_CTYPE=C; export LC_CTYPE; fi - -# confdefs.h avoids OS command line length limits that DEFS can exceed. -rm -rf conftest* confdefs.h -# AIX cpp loses on an empty file, so make sure it contains at least a newline. -echo > confdefs.h - -# A filename unique to this package, relative to the directory that -# configure is in, which we can look for to find out if srcdir is correct. -ac_unique_file=src/ml.h - -# Find the source files, if location was not specified. -if test -z "$srcdir"; then - ac_srcdir_defaulted=yes - # Try the directory containing this script, then its parent. - ac_prog=$0 - ac_confdir=`echo $ac_prog|sed 's%/[^/][^/]*$%%'` - test "x$ac_confdir" = "x$ac_prog" && ac_confdir=. - srcdir=$ac_confdir - if test ! -r $srcdir/$ac_unique_file; then - srcdir=.. - fi -else - ac_srcdir_defaulted=no -fi -if test ! -r $srcdir/$ac_unique_file; then - if test "$ac_srcdir_defaulted" = yes; then - { echo "configure: error: can not find sources in $ac_confdir or .." 1>&2; exit 1; } - else - { echo "configure: error: can not find sources in $srcdir" 1>&2; exit 1; } - fi -fi -srcdir=`echo "${srcdir}" | sed 's%\([^/]\)/*$%\1%'` - -# Prefer explicitly selected file to automatically selected ones. -if test -z "$CONFIG_SITE"; then - if test "x$prefix" != xNONE; then - CONFIG_SITE="$prefix/share/config.site $prefix/etc/config.site" - else - CONFIG_SITE="$ac_default_prefix/share/config.site $ac_default_prefix/etc/config.site" - fi -fi -for ac_site_file in $CONFIG_SITE; do - if test -r "$ac_site_file"; then - echo "loading site script $ac_site_file" - . "$ac_site_file" - fi -done - - -ac_ext=c -# CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options. -ac_cpp='$CPP $CPPFLAGS' -ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5' -ac_link='${CC-cc} -o conftest${ac_exeext} $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5' -cross_compiling=$ac_cv_prog_cc_cross - -ac_exeext= -ac_objext=o -if (echo "testing\c"; echo 1,2,3) | grep c >/dev/null; then - # Stardent Vistra SVR4 grep lacks -e, says ghazi@caip.rutgers.edu. - if (echo -n testing; echo 1,2,3) | sed s/-n/xn/ | grep xn >/dev/null; then - ac_n= ac_c=' -' ac_t=' ' - else - ac_n=-n ac_c= ac_t= - fi -else - ac_n= ac_c='\c' ac_t= -fi - - - -ac_aux_dir= -for ac_dir in $srcdir $srcdir/.. $srcdir/../..; do - if test -f $ac_dir/install-sh; then - ac_aux_dir=$ac_dir - ac_install_sh="$ac_aux_dir/install-sh -c" - break - elif test -f $ac_dir/install.sh; then - ac_aux_dir=$ac_dir - ac_install_sh="$ac_aux_dir/install.sh -c" - break - fi -done -if test -z "$ac_aux_dir"; then - { echo "configure: error: can not find install-sh or install.sh in $srcdir $srcdir/.. $srcdir/../.." 1>&2; exit 1; } -fi -ac_config_guess=$ac_aux_dir/config.guess -ac_config_sub=$ac_aux_dir/config.sub -ac_configure=$ac_aux_dir/configure # This should be Cygnus configure. - -# Find a good install program. We prefer a C program (faster), -# so one script is as good as another. But avoid the broken or -# incompatible versions: -# SysV /etc/install, /usr/sbin/install -# SunOS /usr/etc/install -# IRIX /sbin/install -# AIX /bin/install -# AIX 4 /usr/bin/installbsd, which doesn't work without a -g flag -# AFS /usr/afsws/bin/install, which mishandles nonexistent args -# SVR4 /usr/ucb/install, which tries to use the nonexistent group "staff" -# ./install, which can be erroneously created by make from ./install.sh. -echo $ac_n "checking for a BSD compatible install""... $ac_c" 1>&6 -echo "configure:550: checking for a BSD compatible install" >&5 -if test -z "$INSTALL"; then -if eval "test \"`echo '$''{'ac_cv_path_install'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - IFS="${IFS= }"; ac_save_IFS="$IFS"; IFS=":" - for ac_dir in $PATH; do - # Account for people who put trailing slashes in PATH elements. - case "$ac_dir/" in - /|./|.//|/etc/*|/usr/sbin/*|/usr/etc/*|/sbin/*|/usr/afsws/bin/*|/usr/ucb/*) ;; - *) - # OSF1 and SCO ODT 3.0 have their own names for install. - # Don't use installbsd from OSF since it installs stuff as root - # by default. - for ac_prog in ginstall scoinst install; do - if test -f $ac_dir/$ac_prog; then - if test $ac_prog = install && - grep dspmsg $ac_dir/$ac_prog >/dev/null 2>&1; then - # AIX install. It has an incompatible calling convention. - : - else - ac_cv_path_install="$ac_dir/$ac_prog -c" - break 2 - fi - fi - done - ;; - esac - done - IFS="$ac_save_IFS" - -fi - if test "${ac_cv_path_install+set}" = set; then - INSTALL="$ac_cv_path_install" - else - # As a last resort, use the slow shell script. We don't cache a - # path for INSTALL within a source directory, because that will - # break other packages using the cache if that directory is - # removed, or if the path is relative. - INSTALL="$ac_install_sh" - fi -fi -echo "$ac_t""$INSTALL" 1>&6 - -# Use test -z because SunOS4 sh mishandles braces in ${var-val}. -# It thinks the first close brace ends the variable substitution. -test -z "$INSTALL_PROGRAM" && INSTALL_PROGRAM='${INSTALL}' - -test -z "$INSTALL_SCRIPT" && INSTALL_SCRIPT='${INSTALL_PROGRAM}' - -test -z "$INSTALL_DATA" && INSTALL_DATA='${INSTALL} -m 644' - -echo $ac_n "checking whether build environment is sane""... $ac_c" 1>&6 -echo "configure:603: checking whether build environment is sane" >&5 -# Just in case -sleep 1 -echo timestamp > conftestfile -# Do `set' in a subshell so we don't clobber the current shell's -# arguments. Must try -L first in case configure is actually a -# symlink; some systems play weird games with the mod time of symlinks -# (eg FreeBSD returns the mod time of the symlink's containing -# directory). -if ( - set X `ls -Lt $srcdir/configure conftestfile 2> /dev/null` - if test "$*" = "X"; then - # -L didn't work. - set X `ls -t $srcdir/configure conftestfile` - fi - if test "$*" != "X $srcdir/configure conftestfile" \ - && test "$*" != "X conftestfile $srcdir/configure"; then - - # If neither matched, then we have a broken ls. This can happen - # if, for instance, CONFIG_SHELL is bash and it inherits a - # broken ls alias from the environment. This has actually - # happened. Such a system could not be considered "sane". - { echo "configure: error: ls -t appears to fail. Make sure there is not a broken -alias in your environment" 1>&2; exit 1; } - fi - - test "$2" = conftestfile - ) -then - # Ok. - : -else - { echo "configure: error: newly created file is older than distributed files! -Check your system clock" 1>&2; exit 1; } -fi -rm -f conftest* -echo "$ac_t""yes" 1>&6 -if test "$program_transform_name" = s,x,x,; then - program_transform_name= -else - # Double any \ or $. echo might interpret backslashes. - cat <<\EOF_SED > conftestsed -s,\\,\\\\,g; s,\$,$$,g -EOF_SED - program_transform_name="`echo $program_transform_name|sed -f conftestsed`" - rm -f conftestsed -fi -test "$program_prefix" != NONE && - program_transform_name="s,^,${program_prefix},; $program_transform_name" -# Use a double $ so make ignores it. -test "$program_suffix" != NONE && - program_transform_name="s,\$\$,${program_suffix},; $program_transform_name" - -# sed with no file args requires a program. -test "$program_transform_name" = "" && program_transform_name="s,x,x," - -echo $ac_n "checking whether ${MAKE-make} sets \${MAKE}""... $ac_c" 1>&6 -echo "configure:660: checking whether ${MAKE-make} sets \${MAKE}" >&5 -set dummy ${MAKE-make}; ac_make=`echo "$2" | sed 'y%./+-%__p_%'` -if eval "test \"`echo '$''{'ac_cv_prog_make_${ac_make}_set'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftestmake <<\EOF -all: - @echo 'ac_maketemp="${MAKE}"' -EOF -# GNU make sometimes prints "make[1]: Entering...", which would confuse us. -eval `${MAKE-make} -f conftestmake 2>/dev/null | grep temp=` -if test -n "$ac_maketemp"; then - eval ac_cv_prog_make_${ac_make}_set=yes -else - eval ac_cv_prog_make_${ac_make}_set=no -fi -rm -f conftestmake -fi -if eval "test \"`echo '$ac_cv_prog_make_'${ac_make}_set`\" = yes"; then - echo "$ac_t""yes" 1>&6 - SET_MAKE= -else - echo "$ac_t""no" 1>&6 - SET_MAKE="MAKE=${MAKE-make}" -fi - - -PACKAGE=tree-puzzle - -VERSION=5.0 - -if test "`cd $srcdir && pwd`" != "`pwd`" && test -f $srcdir/config.status; then - { echo "configure: error: source directory already configured; run "make distclean" there first" 1>&2; exit 1; } -fi -cat >> confdefs.h <> confdefs.h <&6 -echo "configure:706: checking for working aclocal" >&5 -# Run test in a subshell; some versions of sh will print an error if -# an executable is not found, even if stderr is redirected. -# Redirect stdin to placate older versions of autoconf. Sigh. -if (aclocal --version) < /dev/null > /dev/null 2>&1; then - ACLOCAL=aclocal - echo "$ac_t""found" 1>&6 -else - ACLOCAL="$missing_dir/missing aclocal" - echo "$ac_t""missing" 1>&6 -fi - -echo $ac_n "checking for working autoconf""... $ac_c" 1>&6 -echo "configure:719: checking for working autoconf" >&5 -# Run test in a subshell; some versions of sh will print an error if -# an executable is not found, even if stderr is redirected. -# Redirect stdin to placate older versions of autoconf. Sigh. -if (autoconf --version) < /dev/null > /dev/null 2>&1; then - AUTOCONF=autoconf - echo "$ac_t""found" 1>&6 -else - AUTOCONF="$missing_dir/missing autoconf" - echo "$ac_t""missing" 1>&6 -fi - -echo $ac_n "checking for working automake""... $ac_c" 1>&6 -echo "configure:732: checking for working automake" >&5 -# Run test in a subshell; some versions of sh will print an error if -# an executable is not found, even if stderr is redirected. -# Redirect stdin to placate older versions of autoconf. Sigh. -if (automake --version) < /dev/null > /dev/null 2>&1; then - AUTOMAKE=automake - echo "$ac_t""found" 1>&6 -else - AUTOMAKE="$missing_dir/missing automake" - echo "$ac_t""missing" 1>&6 -fi - -echo $ac_n "checking for working autoheader""... $ac_c" 1>&6 -echo "configure:745: checking for working autoheader" >&5 -# Run test in a subshell; some versions of sh will print an error if -# an executable is not found, even if stderr is redirected. -# Redirect stdin to placate older versions of autoconf. Sigh. -if (autoheader --version) < /dev/null > /dev/null 2>&1; then - AUTOHEADER=autoheader - echo "$ac_t""found" 1>&6 -else - AUTOHEADER="$missing_dir/missing autoheader" - echo "$ac_t""missing" 1>&6 -fi - -echo $ac_n "checking for working makeinfo""... $ac_c" 1>&6 -echo "configure:758: checking for working makeinfo" >&5 -# Run test in a subshell; some versions of sh will print an error if -# an executable is not found, even if stderr is redirected. -# Redirect stdin to placate older versions of autoconf. Sigh. -if (makeinfo --version) < /dev/null > /dev/null 2>&1; then - MAKEINFO=makeinfo - echo "$ac_t""found" 1>&6 -else - MAKEINFO="$missing_dir/missing makeinfo" - echo "$ac_t""missing" 1>&6 -fi - - - -# Extract the first word of "gcc", so it can be a program name with args. -set dummy gcc; ac_word=$2 -echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:775: checking for $ac_word" >&5 -if eval "test \"`echo '$''{'ac_cv_prog_CC'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - if test -n "$CC"; then - ac_cv_prog_CC="$CC" # Let the user override the test. -else - IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" - ac_dummy="$PATH" - for ac_dir in $ac_dummy; do - test -z "$ac_dir" && ac_dir=. - if test -f $ac_dir/$ac_word; then - ac_cv_prog_CC="gcc" - break - fi - done - IFS="$ac_save_ifs" -fi -fi -CC="$ac_cv_prog_CC" -if test -n "$CC"; then - echo "$ac_t""$CC" 1>&6 -else - echo "$ac_t""no" 1>&6 -fi - -if test -z "$CC"; then - # Extract the first word of "cc", so it can be a program name with args. -set dummy cc; ac_word=$2 -echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:805: checking for $ac_word" >&5 -if eval "test \"`echo '$''{'ac_cv_prog_CC'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - if test -n "$CC"; then - ac_cv_prog_CC="$CC" # Let the user override the test. -else - IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" - ac_prog_rejected=no - ac_dummy="$PATH" - for ac_dir in $ac_dummy; do - test -z "$ac_dir" && ac_dir=. - if test -f $ac_dir/$ac_word; then - if test "$ac_dir/$ac_word" = "/usr/ucb/cc"; then - ac_prog_rejected=yes - continue - fi - ac_cv_prog_CC="cc" - break - fi - done - IFS="$ac_save_ifs" -if test $ac_prog_rejected = yes; then - # We found a bogon in the path, so make sure we never use it. - set dummy $ac_cv_prog_CC - shift - if test $# -gt 0; then - # We chose a different compiler from the bogus one. - # However, it has the same basename, so the bogon will be chosen - # first if we set CC to just the basename; use the full file name. - shift - set dummy "$ac_dir/$ac_word" "$@" - shift - ac_cv_prog_CC="$@" - fi -fi -fi -fi -CC="$ac_cv_prog_CC" -if test -n "$CC"; then - echo "$ac_t""$CC" 1>&6 -else - echo "$ac_t""no" 1>&6 -fi - - if test -z "$CC"; then - case "`uname -s`" in - *win32* | *WIN32*) - # Extract the first word of "cl", so it can be a program name with args. -set dummy cl; ac_word=$2 -echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:856: checking for $ac_word" >&5 -if eval "test \"`echo '$''{'ac_cv_prog_CC'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - if test -n "$CC"; then - ac_cv_prog_CC="$CC" # Let the user override the test. -else - IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" - ac_dummy="$PATH" - for ac_dir in $ac_dummy; do - test -z "$ac_dir" && ac_dir=. - if test -f $ac_dir/$ac_word; then - ac_cv_prog_CC="cl" - break - fi - done - IFS="$ac_save_ifs" -fi -fi -CC="$ac_cv_prog_CC" -if test -n "$CC"; then - echo "$ac_t""$CC" 1>&6 -else - echo "$ac_t""no" 1>&6 -fi - ;; - esac - fi - test -z "$CC" && { echo "configure: error: no acceptable cc found in \$PATH" 1>&2; exit 1; } -fi - -echo $ac_n "checking whether the C compiler ($CC $CFLAGS $LDFLAGS) works""... $ac_c" 1>&6 -echo "configure:888: checking whether the C compiler ($CC $CFLAGS $LDFLAGS) works" >&5 - -ac_ext=c -# CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options. -ac_cpp='$CPP $CPPFLAGS' -ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5' -ac_link='${CC-cc} -o conftest${ac_exeext} $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5' -cross_compiling=$ac_cv_prog_cc_cross - -cat > conftest.$ac_ext << EOF - -#line 899 "configure" -#include "confdefs.h" - -main(){return(0);} -EOF -if { (eval echo configure:904: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then - ac_cv_prog_cc_works=yes - # If we can't run a trivial program, we are probably using a cross compiler. - if (./conftest; exit) 2>/dev/null; then - ac_cv_prog_cc_cross=no - else - ac_cv_prog_cc_cross=yes - fi -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - ac_cv_prog_cc_works=no -fi -rm -fr conftest* -ac_ext=c -# CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options. -ac_cpp='$CPP $CPPFLAGS' -ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5' -ac_link='${CC-cc} -o conftest${ac_exeext} $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5' -cross_compiling=$ac_cv_prog_cc_cross - -echo "$ac_t""$ac_cv_prog_cc_works" 1>&6 -if test $ac_cv_prog_cc_works = no; then - { echo "configure: error: installation or configuration problem: C compiler cannot create executables." 1>&2; exit 1; } -fi -echo $ac_n "checking whether the C compiler ($CC $CFLAGS $LDFLAGS) is a cross-compiler""... $ac_c" 1>&6 -echo "configure:930: checking whether the C compiler ($CC $CFLAGS $LDFLAGS) is a cross-compiler" >&5 -echo "$ac_t""$ac_cv_prog_cc_cross" 1>&6 -cross_compiling=$ac_cv_prog_cc_cross - -echo $ac_n "checking whether we are using GNU C""... $ac_c" 1>&6 -echo "configure:935: checking whether we are using GNU C" >&5 -if eval "test \"`echo '$''{'ac_cv_prog_gcc'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.c <&5; (eval $ac_try) 2>&5; }; } | egrep yes >/dev/null 2>&1; then - ac_cv_prog_gcc=yes -else - ac_cv_prog_gcc=no -fi -fi - -echo "$ac_t""$ac_cv_prog_gcc" 1>&6 - -if test $ac_cv_prog_gcc = yes; then - GCC=yes -else - GCC= -fi - -ac_test_CFLAGS="${CFLAGS+set}" -ac_save_CFLAGS="$CFLAGS" -CFLAGS= -echo $ac_n "checking whether ${CC-cc} accepts -g""... $ac_c" 1>&6 -echo "configure:963: checking whether ${CC-cc} accepts -g" >&5 -if eval "test \"`echo '$''{'ac_cv_prog_cc_g'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - echo 'void f(){}' > conftest.c -if test -z "`${CC-cc} -g -c conftest.c 2>&1`"; then - ac_cv_prog_cc_g=yes -else - ac_cv_prog_cc_g=no -fi -rm -f conftest* - -fi - -echo "$ac_t""$ac_cv_prog_cc_g" 1>&6 -if test "$ac_test_CFLAGS" = set; then - CFLAGS="$ac_save_CFLAGS" -elif test $ac_cv_prog_cc_g = yes; then - if test "$GCC" = yes; then - CFLAGS="-g -O2" - else - CFLAGS="-g" - fi -else - if test "$GCC" = yes; then - CFLAGS="-O2" - else - CFLAGS= - fi -fi - -if test "x$CC" != xcc; then - echo $ac_n "checking whether $CC and cc understand -c and -o together""... $ac_c" 1>&6 -echo "configure:996: checking whether $CC and cc understand -c and -o together" >&5 -else - echo $ac_n "checking whether cc understands -c and -o together""... $ac_c" 1>&6 -echo "configure:999: checking whether cc understands -c and -o together" >&5 -fi -set dummy $CC; ac_cc="`echo $2 | - sed -e 's/[^a-zA-Z0-9_]/_/g' -e 's/^[0-9]/_/'`" -if eval "test \"`echo '$''{'ac_cv_prog_cc_${ac_cc}_c_o'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - echo 'foo(){}' > conftest.c -# Make sure it works both with $CC and with simple cc. -# We do the test twice because some compilers refuse to overwrite an -# existing .o file with -o, though they will create one. -ac_try='${CC-cc} -c conftest.c -o conftest.o 1>&5' -if { (eval echo configure:1011: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } && - test -f conftest.o && { (eval echo configure:1012: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }; -then - eval ac_cv_prog_cc_${ac_cc}_c_o=yes - if test "x$CC" != xcc; then - # Test first that cc exists at all. - if { ac_try='cc -c conftest.c 1>&5'; { (eval echo configure:1017: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }; }; then - ac_try='cc -c conftest.c -o conftest.o 1>&5' - if { (eval echo configure:1019: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } && - test -f conftest.o && { (eval echo configure:1020: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }; - then - # cc works too. - : - else - # cc exists but doesn't like -o. - eval ac_cv_prog_cc_${ac_cc}_c_o=no - fi - fi - fi -else - eval ac_cv_prog_cc_${ac_cc}_c_o=no -fi -rm -f conftest* - -fi -if eval "test \"`echo '$ac_cv_prog_cc_'${ac_cc}_c_o`\" = yes"; then - echo "$ac_t""yes" 1>&6 -else - echo "$ac_t""no" 1>&6 - cat >> confdefs.h <<\EOF -#define NO_MINUS_C_MINUS_O 1 -EOF - -fi - -# Find a good install program. We prefer a C program (faster), -# so one script is as good as another. But avoid the broken or -# incompatible versions: -# SysV /etc/install, /usr/sbin/install -# SunOS /usr/etc/install -# IRIX /sbin/install -# AIX /bin/install -# AIX 4 /usr/bin/installbsd, which doesn't work without a -g flag -# AFS /usr/afsws/bin/install, which mishandles nonexistent args -# SVR4 /usr/ucb/install, which tries to use the nonexistent group "staff" -# ./install, which can be erroneously created by make from ./install.sh. -echo $ac_n "checking for a BSD compatible install""... $ac_c" 1>&6 -echo "configure:1058: checking for a BSD compatible install" >&5 -if test -z "$INSTALL"; then -if eval "test \"`echo '$''{'ac_cv_path_install'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - IFS="${IFS= }"; ac_save_IFS="$IFS"; IFS=":" - for ac_dir in $PATH; do - # Account for people who put trailing slashes in PATH elements. - case "$ac_dir/" in - /|./|.//|/etc/*|/usr/sbin/*|/usr/etc/*|/sbin/*|/usr/afsws/bin/*|/usr/ucb/*) ;; - *) - # OSF1 and SCO ODT 3.0 have their own names for install. - # Don't use installbsd from OSF since it installs stuff as root - # by default. - for ac_prog in ginstall scoinst install; do - if test -f $ac_dir/$ac_prog; then - if test $ac_prog = install && - grep dspmsg $ac_dir/$ac_prog >/dev/null 2>&1; then - # AIX install. It has an incompatible calling convention. - : - else - ac_cv_path_install="$ac_dir/$ac_prog -c" - break 2 - fi - fi - done - ;; - esac - done - IFS="$ac_save_IFS" - -fi - if test "${ac_cv_path_install+set}" = set; then - INSTALL="$ac_cv_path_install" - else - # As a last resort, use the slow shell script. We don't cache a - # path for INSTALL within a source directory, because that will - # break other packages using the cache if that directory is - # removed, or if the path is relative. - INSTALL="$ac_install_sh" - fi -fi -echo "$ac_t""$INSTALL" 1>&6 - -# Use test -z because SunOS4 sh mishandles braces in ${var-val}. -# It thinks the first close brace ends the variable substitution. -test -z "$INSTALL_PROGRAM" && INSTALL_PROGRAM='${INSTALL}' - -test -z "$INSTALL_SCRIPT" && INSTALL_SCRIPT='${INSTALL_PROGRAM}' - -test -z "$INSTALL_DATA" && INSTALL_DATA='${INSTALL} -m 644' - -echo $ac_n "checking whether ${MAKE-make} sets \${MAKE}""... $ac_c" 1>&6 -echo "configure:1111: checking whether ${MAKE-make} sets \${MAKE}" >&5 -set dummy ${MAKE-make}; ac_make=`echo "$2" | sed 'y%./+-%__p_%'` -if eval "test \"`echo '$''{'ac_cv_prog_make_${ac_make}_set'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftestmake <<\EOF -all: - @echo 'ac_maketemp="${MAKE}"' -EOF -# GNU make sometimes prints "make[1]: Entering...", which would confuse us. -eval `${MAKE-make} -f conftestmake 2>/dev/null | grep temp=` -if test -n "$ac_maketemp"; then - eval ac_cv_prog_make_${ac_make}_set=yes -else - eval ac_cv_prog_make_${ac_make}_set=no -fi -rm -f conftestmake -fi -if eval "test \"`echo '$ac_cv_prog_make_'${ac_make}_set`\" = yes"; then - echo "$ac_t""yes" 1>&6 - SET_MAKE= -else - echo "$ac_t""no" 1>&6 - SET_MAKE="MAKE=${MAKE-make}" -fi - - - - - -if test "$MPICC" != "" ; then - # Extract the first word of "$MPICC", so it can be a program name with args. -set dummy $MPICC; ac_word=$2 -echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:1145: checking for $ac_word" >&5 -if eval "test \"`echo '$''{'ac_cv_path_MPICC0'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - case "$MPICC0" in - /*) - ac_cv_path_MPICC0="$MPICC0" # Let the user override the test with a path. - ;; - ?:/*) - ac_cv_path_MPICC0="$MPICC0" # Let the user override the test with a dos path. - ;; - *) - IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" - ac_dummy="$PATH" - for ac_dir in $ac_dummy; do - test -z "$ac_dir" && ac_dir=. - if test -f $ac_dir/$ac_word; then - ac_cv_path_MPICC0="$ac_dir/$ac_word" - break - fi - done - IFS="$ac_save_ifs" - ;; -esac -fi -MPICC0="$ac_cv_path_MPICC0" -if test -n "$MPICC0"; then - echo "$ac_t""$MPICC0" 1>&6 -else - echo "$ac_t""no" 1>&6 -fi - -fi -# Extract the first word of "mpcc", so it can be a program name with args. -set dummy mpcc; ac_word=$2 -echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:1181: checking for $ac_word" >&5 -if eval "test \"`echo '$''{'ac_cv_path_MPICC1'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - case "$MPICC1" in - /*) - ac_cv_path_MPICC1="$MPICC1" # Let the user override the test with a path. - ;; - ?:/*) - ac_cv_path_MPICC1="$MPICC1" # Let the user override the test with a dos path. - ;; - *) - IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" - ac_dummy="$PATH" - for ac_dir in $ac_dummy; do - test -z "$ac_dir" && ac_dir=. - if test -f $ac_dir/$ac_word; then - ac_cv_path_MPICC1="$ac_dir/$ac_word" - break - fi - done - IFS="$ac_save_ifs" - ;; -esac -fi -MPICC1="$ac_cv_path_MPICC1" -if test -n "$MPICC1"; then - echo "$ac_t""$MPICC1" 1>&6 -else - echo "$ac_t""no" 1>&6 -fi - -# Extract the first word of "hcc", so it can be a program name with args. -set dummy hcc; ac_word=$2 -echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:1216: checking for $ac_word" >&5 -if eval "test \"`echo '$''{'ac_cv_path_MPICC2'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - case "$MPICC2" in - /*) - ac_cv_path_MPICC2="$MPICC2" # Let the user override the test with a path. - ;; - ?:/*) - ac_cv_path_MPICC2="$MPICC2" # Let the user override the test with a dos path. - ;; - *) - IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" - ac_dummy="$PATH" - for ac_dir in $ac_dummy; do - test -z "$ac_dir" && ac_dir=. - if test -f $ac_dir/$ac_word; then - ac_cv_path_MPICC2="$ac_dir/$ac_word" - break - fi - done - IFS="$ac_save_ifs" - ;; -esac -fi -MPICC2="$ac_cv_path_MPICC2" -if test -n "$MPICC2"; then - echo "$ac_t""$MPICC2" 1>&6 -else - echo "$ac_t""no" 1>&6 -fi - -# Extract the first word of "mpicc", so it can be a program name with args. -set dummy mpicc; ac_word=$2 -echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:1251: checking for $ac_word" >&5 -if eval "test \"`echo '$''{'ac_cv_path_MPICC3'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - case "$MPICC3" in - /*) - ac_cv_path_MPICC3="$MPICC3" # Let the user override the test with a path. - ;; - ?:/*) - ac_cv_path_MPICC3="$MPICC3" # Let the user override the test with a dos path. - ;; - *) - IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" - ac_dummy="$PATH" - for ac_dir in $ac_dummy; do - test -z "$ac_dir" && ac_dir=. - if test -f $ac_dir/$ac_word; then - ac_cv_path_MPICC3="$ac_dir/$ac_word" - break - fi - done - IFS="$ac_save_ifs" - ;; -esac -fi -MPICC3="$ac_cv_path_MPICC3" -if test -n "$MPICC3"; then - echo "$ac_t""$MPICC3" 1>&6 -else - echo "$ac_t""no" 1>&6 -fi - -# Extract the first word of "mpicc_lam", so it can be a program name with args. -set dummy mpicc_lam; ac_word=$2 -echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:1286: checking for $ac_word" >&5 -if eval "test \"`echo '$''{'ac_cv_path_MPICC4'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - case "$MPICC4" in - /*) - ac_cv_path_MPICC4="$MPICC4" # Let the user override the test with a path. - ;; - ?:/*) - ac_cv_path_MPICC4="$MPICC4" # Let the user override the test with a dos path. - ;; - *) - IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" - ac_dummy="$PATH" - for ac_dir in $ac_dummy; do - test -z "$ac_dir" && ac_dir=. - if test -f $ac_dir/$ac_word; then - ac_cv_path_MPICC4="$ac_dir/$ac_word" - break - fi - done - IFS="$ac_save_ifs" - ;; -esac -fi -MPICC4="$ac_cv_path_MPICC4" -if test -n "$MPICC4"; then - echo "$ac_t""$MPICC4" 1>&6 -else - echo "$ac_t""no" 1>&6 -fi - -# Extract the first word of "mpicc_mpich", so it can be a program name with args. -set dummy mpicc_mpich; ac_word=$2 -echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:1321: checking for $ac_word" >&5 -if eval "test \"`echo '$''{'ac_cv_path_MPICC5'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - case "$MPICC5" in - /*) - ac_cv_path_MPICC5="$MPICC5" # Let the user override the test with a path. - ;; - ?:/*) - ac_cv_path_MPICC5="$MPICC5" # Let the user override the test with a dos path. - ;; - *) - IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" - ac_dummy="$PATH" - for ac_dir in $ac_dummy; do - test -z "$ac_dir" && ac_dir=. - if test -f $ac_dir/$ac_word; then - ac_cv_path_MPICC5="$ac_dir/$ac_word" - break - fi - done - IFS="$ac_save_ifs" - ;; -esac -fi -MPICC5="$ac_cv_path_MPICC5" -if test -n "$MPICC5"; then - echo "$ac_t""$MPICC5" 1>&6 -else - echo "$ac_t""no" 1>&6 -fi - - - if test "$MPICC0" != "" ; then - if test "$MPICCSET" = "" ; then -cat > conftest.c < -int main (int argc, char **argv) -{ -MPI_Init(&argc,&argv); -MPI_Finalize(); -exit(0); -} -EOF - - -MPICC=$MPICC0 - - if test "$MPICC" != "" ; then - echo $ac_n "checking whether $MPICC works as MPI compiler""... $ac_c" 1>&6 -echo "configure:1371: checking whether $MPICC works as MPI compiler" >&5 - $MPICC conftest.c -o conftest > /dev/null 2>&1 - if test $? = 0 ; then - echo "$ac_t""yes" 1>&6 - #MPICC=$MPICC - MPILIBS= - MPICCSET=$MPICC - PPUZZLE=ppuzzle - else - echo "$ac_t""no" 1>&6 - echo $ac_n "checking whether $MPICC needs -lmpi""... $ac_c" 1>&6 -echo "configure:1382: checking whether $MPICC needs -lmpi" >&5 - $MPICC conftest.c -o conftest -lmpi > /dev/null 2>&1 - if test $? = 0 ; then - echo "$ac_t""yes" 1>&6 - #MPICC=$PCC - MPILIBS=-lmpi - MPICCSET=$MPICC - PPUZZLE=ppuzzle - else - echo "$ac_t""no" 1>&6 - MPICC= - MPILIBS= - MPICCSET= - PPUZZLE= - fi - fi - fi - rm -f conftest* - fi - fi - if test "$MPICC1" != "" ; then - if test "$MPICCSET" = "" ; then -cat > conftest.c < -int main (int argc, char **argv) -{ -MPI_Init(&argc,&argv); -MPI_Finalize(); -exit(0); -} -EOF - - -MPICC=$MPICC1 - - if test "$MPICC" != "" ; then - echo $ac_n "checking whether $MPICC works as MPI compiler""... $ac_c" 1>&6 -echo "configure:1419: checking whether $MPICC works as MPI compiler" >&5 - $MPICC conftest.c -o conftest > /dev/null 2>&1 - if test $? = 0 ; then - echo "$ac_t""yes" 1>&6 - #MPICC=$MPICC - MPILIBS= - MPICCSET=$MPICC - PPUZZLE=ppuzzle - else - echo "$ac_t""no" 1>&6 - echo $ac_n "checking whether $MPICC needs -lmpi""... $ac_c" 1>&6 -echo "configure:1430: checking whether $MPICC needs -lmpi" >&5 - $MPICC conftest.c -o conftest -lmpi > /dev/null 2>&1 - if test $? = 0 ; then - echo "$ac_t""yes" 1>&6 - #MPICC=$PCC - MPILIBS=-lmpi - MPICCSET=$MPICC - PPUZZLE=ppuzzle - else - echo "$ac_t""no" 1>&6 - MPICC= - MPILIBS= - MPICCSET= - PPUZZLE= - fi - fi - fi - rm -f conftest* - fi - fi - if test "$MPICC2" != "" ; then - if test "$MPICCSET" = "" ; then -cat > conftest.c < -int main (int argc, char **argv) -{ -MPI_Init(&argc,&argv); -MPI_Finalize(); -exit(0); -} -EOF - - -MPICC=$MPICC2 - - if test "$MPICC" != "" ; then - echo $ac_n "checking whether $MPICC works as MPI compiler""... $ac_c" 1>&6 -echo "configure:1467: checking whether $MPICC works as MPI compiler" >&5 - $MPICC conftest.c -o conftest > /dev/null 2>&1 - if test $? = 0 ; then - echo "$ac_t""yes" 1>&6 - #MPICC=$MPICC - MPILIBS= - MPICCSET=$MPICC - PPUZZLE=ppuzzle - else - echo "$ac_t""no" 1>&6 - echo $ac_n "checking whether $MPICC needs -lmpi""... $ac_c" 1>&6 -echo "configure:1478: checking whether $MPICC needs -lmpi" >&5 - $MPICC conftest.c -o conftest -lmpi > /dev/null 2>&1 - if test $? = 0 ; then - echo "$ac_t""yes" 1>&6 - #MPICC=$PCC - MPILIBS=-lmpi - MPICCSET=$MPICC - PPUZZLE=ppuzzle - else - echo "$ac_t""no" 1>&6 - MPICC= - MPILIBS= - MPICCSET= - PPUZZLE= - fi - fi - fi - rm -f conftest* - fi - fi - if test "$MPICC3" != "" ; then - if test "$MPICCSET" = "" ; then -cat > conftest.c < -int main (int argc, char **argv) -{ -MPI_Init(&argc,&argv); -MPI_Finalize(); -exit(0); -} -EOF - - -MPICC=$MPICC3 - - if test "$MPICC" != "" ; then - echo $ac_n "checking whether $MPICC works as MPI compiler""... $ac_c" 1>&6 -echo "configure:1515: checking whether $MPICC works as MPI compiler" >&5 - $MPICC conftest.c -o conftest > /dev/null 2>&1 - if test $? = 0 ; then - echo "$ac_t""yes" 1>&6 - #MPICC=$MPICC - MPILIBS= - MPICCSET=$MPICC - PPUZZLE=ppuzzle - else - echo "$ac_t""no" 1>&6 - echo $ac_n "checking whether $MPICC needs -lmpi""... $ac_c" 1>&6 -echo "configure:1526: checking whether $MPICC needs -lmpi" >&5 - $MPICC conftest.c -o conftest -lmpi > /dev/null 2>&1 - if test $? = 0 ; then - echo "$ac_t""yes" 1>&6 - #MPICC=$PCC - MPILIBS=-lmpi - MPICCSET=$MPICC - PPUZZLE=ppuzzle - else - echo "$ac_t""no" 1>&6 - MPICC= - MPILIBS= - MPICCSET= - PPUZZLE= - fi - fi - fi - rm -f conftest* - fi - fi - if test "$MPICC4" != "" ; then - if test "$MPICCSET" = "" ; then -cat > conftest.c < -int main (int argc, char **argv) -{ -MPI_Init(&argc,&argv); -MPI_Finalize(); -exit(0); -} -EOF - - -MPICC=$MPICC4 - - if test "$MPICC" != "" ; then - echo $ac_n "checking whether $MPICC works as MPI compiler""... $ac_c" 1>&6 -echo "configure:1563: checking whether $MPICC works as MPI compiler" >&5 - $MPICC conftest.c -o conftest > /dev/null 2>&1 - if test $? = 0 ; then - echo "$ac_t""yes" 1>&6 - #MPICC=$MPICC - MPILIBS= - MPICCSET=$MPICC - PPUZZLE=ppuzzle - else - echo "$ac_t""no" 1>&6 - echo $ac_n "checking whether $MPICC needs -lmpi""... $ac_c" 1>&6 -echo "configure:1574: checking whether $MPICC needs -lmpi" >&5 - $MPICC conftest.c -o conftest -lmpi > /dev/null 2>&1 - if test $? = 0 ; then - echo "$ac_t""yes" 1>&6 - #MPICC=$PCC - MPILIBS=-lmpi - MPICCSET=$MPICC - PPUZZLE=ppuzzle - else - echo "$ac_t""no" 1>&6 - MPICC= - MPILIBS= - MPICCSET= - PPUZZLE= - fi - fi - fi - rm -f conftest* - fi - fi - if test "$MPICC5" != "" ; then - if test "$MPICCSET" = "" ; then -cat > conftest.c < -int main (int argc, char **argv) -{ -MPI_Init(&argc,&argv); -MPI_Finalize(); -exit(0); -} -EOF - - -MPICC=$MPICC5 - - if test "$MPICC" != "" ; then - echo $ac_n "checking whether $MPICC works as MPI compiler""... $ac_c" 1>&6 -echo "configure:1611: checking whether $MPICC works as MPI compiler" >&5 - $MPICC conftest.c -o conftest > /dev/null 2>&1 - if test $? = 0 ; then - echo "$ac_t""yes" 1>&6 - #MPICC=$MPICC - MPILIBS= - MPICCSET=$MPICC - PPUZZLE=ppuzzle - else - echo "$ac_t""no" 1>&6 - echo $ac_n "checking whether $MPICC needs -lmpi""... $ac_c" 1>&6 -echo "configure:1622: checking whether $MPICC needs -lmpi" >&5 - $MPICC conftest.c -o conftest -lmpi > /dev/null 2>&1 - if test $? = 0 ; then - echo "$ac_t""yes" 1>&6 - #MPICC=$PCC - MPILIBS=-lmpi - MPICCSET=$MPICC - PPUZZLE=ppuzzle - else - echo "$ac_t""no" 1>&6 - MPICC= - MPILIBS= - MPICCSET= - PPUZZLE= - fi - fi - fi - rm -f conftest* - fi - fi - -ac_cv_prog_MPICC=$MPICC - - - - - - - -echo $ac_n "checking for main in -lm""... $ac_c" 1>&6 -echo "configure:1652: checking for main in -lm" >&5 -ac_lib_var=`echo m'_'main | sed 'y%./+-%__p_%'` -if eval "test \"`echo '$''{'ac_cv_lib_$ac_lib_var'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - ac_save_LIBS="$LIBS" -LIBS="-lm $LIBS" -cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then - rm -rf conftest* - eval "ac_cv_lib_$ac_lib_var=yes" -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - eval "ac_cv_lib_$ac_lib_var=no" -fi -rm -f conftest* -LIBS="$ac_save_LIBS" - -fi -if eval "test \"`echo '$ac_cv_lib_'$ac_lib_var`\" = yes"; then - echo "$ac_t""yes" 1>&6 - ac_tr_lib=HAVE_LIB`echo m | sed -e 's/[^a-zA-Z0-9_]/_/g' \ - -e 'y/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/'` - cat >> confdefs.h <&6 -fi - - -echo $ac_n "checking how to run the C preprocessor""... $ac_c" 1>&6 -echo "configure:1696: checking how to run the C preprocessor" >&5 -# On Suns, sometimes $CPP names a directory. -if test -n "$CPP" && test -d "$CPP"; then - CPP= -fi -if test -z "$CPP"; then -if eval "test \"`echo '$''{'ac_cv_prog_CPP'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - # This must be in double quotes, not single quotes, because CPP may get - # substituted into the Makefile and "${CC-cc}" will confuse make. - CPP="${CC-cc} -E" - # On the NeXT, cc -E runs the code through the compiler's parser, - # not just through cpp. - cat > conftest.$ac_ext < -Syntax Error -EOF -ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:1717: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } -ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` -if test -z "$ac_err"; then - : -else - echo "$ac_err" >&5 - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - CPP="${CC-cc} -E -traditional-cpp" - cat > conftest.$ac_ext < -Syntax Error -EOF -ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:1734: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } -ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` -if test -z "$ac_err"; then - : -else - echo "$ac_err" >&5 - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - CPP="${CC-cc} -nologo -E" - cat > conftest.$ac_ext < -Syntax Error -EOF -ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:1751: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } -ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` -if test -z "$ac_err"; then - : -else - echo "$ac_err" >&5 - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - CPP=/lib/cpp -fi -rm -f conftest* -fi -rm -f conftest* -fi -rm -f conftest* - ac_cv_prog_CPP="$CPP" -fi - CPP="$ac_cv_prog_CPP" -else - ac_cv_prog_CPP="$CPP" -fi -echo "$ac_t""$CPP" 1>&6 - -echo $ac_n "checking for ANSI C header files""... $ac_c" 1>&6 -echo "configure:1776: checking for ANSI C header files" >&5 -if eval "test \"`echo '$''{'ac_cv_header_stdc'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -#include -#include -#include -EOF -ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:1789: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } -ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` -if test -z "$ac_err"; then - rm -rf conftest* - ac_cv_header_stdc=yes -else - echo "$ac_err" >&5 - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - ac_cv_header_stdc=no -fi -rm -f conftest* - -if test $ac_cv_header_stdc = yes; then - # SunOS 4.x string.h does not declare mem*, contrary to ANSI. -cat > conftest.$ac_ext < -EOF -if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | - egrep "memchr" >/dev/null 2>&1; then - : -else - rm -rf conftest* - ac_cv_header_stdc=no -fi -rm -f conftest* - -fi - -if test $ac_cv_header_stdc = yes; then - # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI. -cat > conftest.$ac_ext < -EOF -if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | - egrep "free" >/dev/null 2>&1; then - : -else - rm -rf conftest* - ac_cv_header_stdc=no -fi -rm -f conftest* - -fi - -if test $ac_cv_header_stdc = yes; then - # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi. -if test "$cross_compiling" = yes; then - : -else - cat > conftest.$ac_ext < -#define ISLOWER(c) ('a' <= (c) && (c) <= 'z') -#define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c)) -#define XOR(e, f) (((e) && !(f)) || (!(e) && (f))) -int main () { int i; for (i = 0; i < 256; i++) -if (XOR (islower (i), ISLOWER (i)) || toupper (i) != TOUPPER (i)) exit(2); -exit (0); } - -EOF -if { (eval echo configure:1856: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null -then - : -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -fr conftest* - ac_cv_header_stdc=no -fi -rm -fr conftest* -fi - -fi -fi - -echo "$ac_t""$ac_cv_header_stdc" 1>&6 -if test $ac_cv_header_stdc = yes; then - cat >> confdefs.h <<\EOF -#define STDC_HEADERS 1 -EOF - -fi - -for ac_hdr in limits.h -do -ac_safe=`echo "$ac_hdr" | sed 'y%./+-%__p_%'` -echo $ac_n "checking for $ac_hdr""... $ac_c" 1>&6 -echo "configure:1883: checking for $ac_hdr" >&5 -if eval "test \"`echo '$''{'ac_cv_header_$ac_safe'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -EOF -ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:1893: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } -ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` -if test -z "$ac_err"; then - rm -rf conftest* - eval "ac_cv_header_$ac_safe=yes" -else - echo "$ac_err" >&5 - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - eval "ac_cv_header_$ac_safe=no" -fi -rm -f conftest* -fi -if eval "test \"`echo '$ac_cv_header_'$ac_safe`\" = yes"; then - echo "$ac_t""yes" 1>&6 - ac_tr_hdr=HAVE_`echo $ac_hdr | sed 'y%abcdefghijklmnopqrstuvwxyz./-%ABCDEFGHIJKLMNOPQRSTUVWXYZ___%'` - cat >> confdefs.h <&6 -fi -done - - - - -echo $ac_n "checking for working const""... $ac_c" 1>&6 -echo "configure:1923: checking for working const" >&5 -if eval "test \"`echo '$''{'ac_cv_c_const'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext <j = 5; -} -{ /* ULTRIX-32 V3.1 (Rev 9) vcc rejects this */ - const int foo = 10; -} - -; return 0; } -EOF -if { (eval echo configure:1977: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then - rm -rf conftest* - ac_cv_c_const=yes -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - ac_cv_c_const=no -fi -rm -f conftest* -fi - -echo "$ac_t""$ac_cv_c_const" 1>&6 -if test $ac_cv_c_const = no; then - cat >> confdefs.h <<\EOF -#define const -EOF - -fi - -echo $ac_n "checking for size_t""... $ac_c" 1>&6 -echo "configure:1998: checking for size_t" >&5 -if eval "test \"`echo '$''{'ac_cv_type_size_t'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -#if STDC_HEADERS -#include -#include -#endif -EOF -if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | - egrep "(^|[^a-zA-Z_0-9])size_t[^a-zA-Z_0-9]" >/dev/null 2>&1; then - rm -rf conftest* - ac_cv_type_size_t=yes -else - rm -rf conftest* - ac_cv_type_size_t=no -fi -rm -f conftest* - -fi -echo "$ac_t""$ac_cv_type_size_t" 1>&6 -if test $ac_cv_type_size_t = no; then - cat >> confdefs.h <<\EOF -#define size_t unsigned -EOF - -fi - - - -trap '' 1 2 15 - -trap 'rm -fr conftest* confdefs* core core.* *.core $ac_clean_files; exit 1' 1 2 15 - -test "x$prefix" = xNONE && prefix=$ac_default_prefix -# Let make expand exec_prefix. -test "x$exec_prefix" = xNONE && exec_prefix='${prefix}' - -# Any assignment to VPATH causes Sun make to only execute -# the first set of double-colon rules, so remove it if not needed. -# If there is a colon in the path, we need to keep it. -if test "x$srcdir" = x.; then - ac_vpsub='/^[ ]*VPATH[ ]*=[^:]*$/d' -fi - -trap 'rm -f $CONFIG_STATUS conftest*; exit 1' 1 2 15 - -# Transform confdefs.h into DEFS. -# Protect against shell expansion while executing Makefile rules. -# Protect against Makefile macro expansion. -cat > conftest.defs <<\EOF -s%#define \([A-Za-z_][A-Za-z0-9_]*\) *\(.*\)%-D\1=\2%g -s%[ `~#$^&*(){}\\|;'"<>?]%\\&%g -s%\[%\\&%g -s%\]%\\&%g -s%\$%$$%g -EOF -DEFS=`sed -f conftest.defs confdefs.h | tr '\012' ' '` -rm -f conftest.defs - - -# Without the "./", some shells look in PATH for config.status. -: ${CONFIG_STATUS=./config.status} - -echo creating $CONFIG_STATUS -rm -f $CONFIG_STATUS -cat > $CONFIG_STATUS </dev/null | sed 1q`: -# -# $0 $ac_configure_args -# -# Compiler output produced by configure, useful for debugging -# configure, is in ./config.log if it exists. - -ac_cs_usage="Usage: $CONFIG_STATUS [--recheck] [--version] [--help]" -for ac_option -do - case "\$ac_option" in - -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r) - echo "running \${CONFIG_SHELL-/bin/sh} $0 $ac_configure_args --no-create --no-recursion" - exec \${CONFIG_SHELL-/bin/sh} $0 $ac_configure_args --no-create --no-recursion ;; - -version | --version | --versio | --versi | --vers | --ver | --ve | --v) - echo "$CONFIG_STATUS generated by autoconf version 2.13" - exit 0 ;; - -help | --help | --hel | --he | --h) - echo "\$ac_cs_usage"; exit 0 ;; - *) echo "\$ac_cs_usage"; exit 1 ;; - esac -done - -ac_given_srcdir=$srcdir -ac_given_INSTALL="$INSTALL" - -trap 'rm -fr `echo "Makefile src/Makefile src/test doc/Makefile data/Makefile" | sed "s/:[^ ]*//g"` conftest*; exit 1' 1 2 15 -EOF -cat >> $CONFIG_STATUS < conftest.subs <<\\CEOF -$ac_vpsub -$extrasub -s%@SHELL@%$SHELL%g -s%@CFLAGS@%$CFLAGS%g -s%@CPPFLAGS@%$CPPFLAGS%g -s%@CXXFLAGS@%$CXXFLAGS%g -s%@FFLAGS@%$FFLAGS%g -s%@DEFS@%$DEFS%g -s%@LDFLAGS@%$LDFLAGS%g -s%@LIBS@%$LIBS%g -s%@exec_prefix@%$exec_prefix%g -s%@prefix@%$prefix%g -s%@program_transform_name@%$program_transform_name%g -s%@bindir@%$bindir%g -s%@sbindir@%$sbindir%g -s%@libexecdir@%$libexecdir%g -s%@datadir@%$datadir%g -s%@sysconfdir@%$sysconfdir%g -s%@sharedstatedir@%$sharedstatedir%g -s%@localstatedir@%$localstatedir%g -s%@libdir@%$libdir%g -s%@includedir@%$includedir%g -s%@oldincludedir@%$oldincludedir%g -s%@infodir@%$infodir%g -s%@mandir@%$mandir%g -s%@INSTALL_PROGRAM@%$INSTALL_PROGRAM%g -s%@INSTALL_SCRIPT@%$INSTALL_SCRIPT%g -s%@INSTALL_DATA@%$INSTALL_DATA%g -s%@PACKAGE@%$PACKAGE%g -s%@VERSION@%$VERSION%g -s%@ACLOCAL@%$ACLOCAL%g -s%@AUTOCONF@%$AUTOCONF%g -s%@AUTOMAKE@%$AUTOMAKE%g -s%@AUTOHEADER@%$AUTOHEADER%g -s%@MAKEINFO@%$MAKEINFO%g -s%@SET_MAKE@%$SET_MAKE%g -s%@CC@%$CC%g -s%@MPICC0@%$MPICC0%g -s%@MPICC1@%$MPICC1%g -s%@MPICC2@%$MPICC2%g -s%@MPICC3@%$MPICC3%g -s%@MPICC4@%$MPICC4%g -s%@MPICC5@%$MPICC5%g -s%@MPICC@%$MPICC%g -s%@MPILIBS@%$MPILIBS%g -s%@MPIDEFS@%$MPIDEFS%g -s%@MPICFLAGS@%$MPICFLAGS%g -s%@PPUZZLE@%$PPUZZLE%g -s%@CPP@%$CPP%g - -CEOF -EOF - -cat >> $CONFIG_STATUS <<\EOF - -# Split the substitutions into bite-sized pieces for seds with -# small command number limits, like on Digital OSF/1 and HP-UX. -ac_max_sed_cmds=90 # Maximum number of lines to put in a sed script. -ac_file=1 # Number of current file. -ac_beg=1 # First line for current file. -ac_end=$ac_max_sed_cmds # Line after last line for current file. -ac_more_lines=: -ac_sed_cmds="" -while $ac_more_lines; do - if test $ac_beg -gt 1; then - sed "1,${ac_beg}d; ${ac_end}q" conftest.subs > conftest.s$ac_file - else - sed "${ac_end}q" conftest.subs > conftest.s$ac_file - fi - if test ! -s conftest.s$ac_file; then - ac_more_lines=false - rm -f conftest.s$ac_file - else - if test -z "$ac_sed_cmds"; then - ac_sed_cmds="sed -f conftest.s$ac_file" - else - ac_sed_cmds="$ac_sed_cmds | sed -f conftest.s$ac_file" - fi - ac_file=`expr $ac_file + 1` - ac_beg=$ac_end - ac_end=`expr $ac_end + $ac_max_sed_cmds` - fi -done -if test -z "$ac_sed_cmds"; then - ac_sed_cmds=cat -fi -EOF - -cat >> $CONFIG_STATUS <> $CONFIG_STATUS <<\EOF -for ac_file in .. $CONFIG_FILES; do if test "x$ac_file" != x..; then - # Support "outfile[:infile[:infile...]]", defaulting infile="outfile.in". - case "$ac_file" in - *:*) ac_file_in=`echo "$ac_file"|sed 's%[^:]*:%%'` - ac_file=`echo "$ac_file"|sed 's%:.*%%'` ;; - *) ac_file_in="${ac_file}.in" ;; - esac - - # Adjust a relative srcdir, top_srcdir, and INSTALL for subdirectories. - - # Remove last slash and all that follows it. Not all systems have dirname. - ac_dir=`echo $ac_file|sed 's%/[^/][^/]*$%%'` - if test "$ac_dir" != "$ac_file" && test "$ac_dir" != .; then - # The file is in a subdirectory. - test ! -d "$ac_dir" && mkdir "$ac_dir" - ac_dir_suffix="/`echo $ac_dir|sed 's%^\./%%'`" - # A "../" for each directory in $ac_dir_suffix. - ac_dots=`echo $ac_dir_suffix|sed 's%/[^/]*%../%g'` - else - ac_dir_suffix= ac_dots= - fi - - case "$ac_given_srcdir" in - .) srcdir=. - if test -z "$ac_dots"; then top_srcdir=. - else top_srcdir=`echo $ac_dots|sed 's%/$%%'`; fi ;; - /*) srcdir="$ac_given_srcdir$ac_dir_suffix"; top_srcdir="$ac_given_srcdir" ;; - *) # Relative path. - srcdir="$ac_dots$ac_given_srcdir$ac_dir_suffix" - top_srcdir="$ac_dots$ac_given_srcdir" ;; - esac - - case "$ac_given_INSTALL" in - [/$]*) INSTALL="$ac_given_INSTALL" ;; - *) INSTALL="$ac_dots$ac_given_INSTALL" ;; - esac - - echo creating "$ac_file" - rm -f "$ac_file" - configure_input="Generated automatically from `echo $ac_file_in|sed 's%.*/%%'` by configure." - case "$ac_file" in - *Makefile*) ac_comsub="1i\\ -# $configure_input" ;; - *) ac_comsub= ;; - esac - - ac_file_inputs=`echo $ac_file_in|sed -e "s%^%$ac_given_srcdir/%" -e "s%:% $ac_given_srcdir/%g"` - sed -e "$ac_comsub -s%@configure_input@%$configure_input%g -s%@srcdir@%$srcdir%g -s%@top_srcdir@%$top_srcdir%g -s%@INSTALL@%$INSTALL%g -" $ac_file_inputs | (eval "$ac_sed_cmds") > $ac_file -fi; done -rm -f conftest.s* - -EOF -cat >> $CONFIG_STATUS <> $CONFIG_STATUS <<\EOF - -exit 0 -EOF -chmod +x $CONFIG_STATUS -rm -fr confdefs* $ac_clean_files -test "$no_create" = yes || ${CONFIG_SHELL-/bin/sh} $CONFIG_STATUS || exit 1 - diff --git a/forester/archive/RIO/others/puzzle_dqo/configure.in b/forester/archive/RIO/others/puzzle_dqo/configure.in deleted file mode 100644 index 57f0e27..0000000 --- a/forester/archive/RIO/others/puzzle_dqo/configure.in +++ /dev/null @@ -1,117 +0,0 @@ - -dnl Disable caching. -define([AC_CACHE_LOAD], )dnl -define([AC_CACHE_SAVE], )dnl - -dnl Process this file with autoconf to produce a configure script. -AC_INIT(src/ml.h) - -AM_INIT_AUTOMAKE(tree-puzzle, 5.0) - -dnl Checks for programs. -AC_PROG_CC -AC_PROG_CC_C_O -AC_PROG_INSTALL -AC_PROG_MAKE_SET - - -AC_DEFUN(AC_TEST_MPICC,[dnl - if test "$1" != "" ; then - if test "$MPICCSET" = "" ; then -cat > conftest.c < -int main (int argc, char **argv) -{ -MPI_Init(&argc,&argv); -MPI_Finalize(); -exit(0); -} -EOF - - -MPICC=$1 -dnl if test "$MPICC" != "$CC" ; then -dnl -dnl fi - - if test "$MPICC" != "" ; then - AC_MSG_CHECKING(whether $MPICC works as MPI compiler) - $MPICC conftest.c -o conftest > /dev/null 2>&1 - if test $? = 0 ; then - AC_MSG_RESULT(yes) - #MPICC=$MPICC - MPILIBS= - MPICCSET=$MPICC - PPUZZLE=ppuzzle - else - AC_MSG_RESULT(no) - AC_MSG_CHECKING(whether $MPICC needs -lmpi) - $MPICC conftest.c -o conftest -lmpi > /dev/null 2>&1 - if test $? = 0 ; then - AC_MSG_RESULT(yes) - #MPICC=$PCC - MPILIBS=-lmpi - MPICCSET=$MPICC - PPUZZLE=ppuzzle - else - AC_MSG_RESULT(no) - MPICC= - MPILIBS= - MPICCSET= - PPUZZLE= - fi - fi - fi - rm -f conftest* - fi - fi ]) - -if test "$MPICC" != "" ; then - AC_PATH_PROG(MPICC0, $MPICC) -fi -AC_PATH_PROG(MPICC1, mpcc) -AC_PATH_PROG(MPICC2, hcc) -AC_PATH_PROG(MPICC3, mpicc) -AC_PATH_PROG(MPICC4, mpicc_lam) -AC_PATH_PROG(MPICC5, mpicc_mpich) - -AC_TEST_MPICC($MPICC0) -AC_TEST_MPICC($MPICC1) -AC_TEST_MPICC($MPICC2) -AC_TEST_MPICC($MPICC3) -AC_TEST_MPICC($MPICC4) -AC_TEST_MPICC($MPICC5) - -ac_cv_prog_MPICC=$MPICC - -AC_SUBST(MPICC) -AC_SUBST(MPILIBS) -AC_SUBST(MPIDEFS) -AC_SUBST(MPICFLAGS) -AC_SUBST(PPUZZLE) - -dnl Checks for libraries. -dnl Replace `main' with a function in -lm: -AC_CHECK_LIB(m, main) -dnl AC_CHECK_LIB(mpi, main) - -dnl Checks for header files. -AC_HEADER_STDC -AC_CHECK_HEADERS(limits.h) -dnl AC_HAVE_HEADERS(mpi.h) - -dnl AC_HAVE_HEADERS(rpc/xdr.h) - - -dnl Checks for typedefs, structures, and compiler characteristics. -AC_C_CONST -AC_TYPE_SIZE_T - -dnl Checks for library functions. -dnl AC_CHECK_FUNCS(xdr_u_char) -dnl AC_CHECK_FUNCS(xdr_double) -dnl AC_CHECK_FUNCS(xdrstdio_create) -dnl AC_CHECK_FUNCS(xdr_destroy) -dnl AC_CHECK_FUNCS(xdr_inline) - -AC_OUTPUT(Makefile src/Makefile src/test doc/Makefile data/Makefile) diff --git a/forester/archive/RIO/others/puzzle_dqo/data/Makefile b/forester/archive/RIO/others/puzzle_dqo/data/Makefile deleted file mode 100644 index 6f5f672..0000000 --- a/forester/archive/RIO/others/puzzle_dqo/data/Makefile +++ /dev/null @@ -1,177 +0,0 @@ -# Generated automatically from Makefile.in by configure. -# Makefile.in generated automatically by automake 1.4 from Makefile.am - -# Copyright (C) 1994, 1995-8, 1999 Free Software Foundation, Inc. -# This Makefile.in is free software; the Free Software Foundation -# gives unlimited permission to copy and/or distribute it, -# with or without modifications, as long as this notice is preserved. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY, to the extent permitted by law; without -# even the implied warranty of MERCHANTABILITY or FITNESS FOR A -# PARTICULAR PURPOSE. - - -SHELL = /bin/sh - -srcdir = . -top_srcdir = .. -prefix = /usr/local -exec_prefix = ${prefix} - -bindir = ${exec_prefix}/bin -sbindir = ${exec_prefix}/sbin -libexecdir = ${exec_prefix}/libexec -datadir = ${prefix}/share -sysconfdir = ${prefix}/etc -sharedstatedir = ${prefix}/com -localstatedir = ${prefix}/var -libdir = ${exec_prefix}/lib -infodir = ${prefix}/info -mandir = ${prefix}/man -includedir = ${prefix}/include -oldincludedir = /usr/include - -DESTDIR = - -pkgdatadir = $(datadir)/tree-puzzle -pkglibdir = $(libdir)/tree-puzzle -pkgincludedir = $(includedir)/tree-puzzle - -top_builddir = .. - -ACLOCAL = aclocal -AUTOCONF = autoconf -AUTOMAKE = automake -AUTOHEADER = autoheader - -INSTALL = /usr/bin/install -c -INSTALL_PROGRAM = ${INSTALL} $(AM_INSTALL_PROGRAM_FLAGS) -INSTALL_DATA = ${INSTALL} -m 644 -INSTALL_SCRIPT = ${INSTALL_PROGRAM} -transform = s,x,x, - -NORMAL_INSTALL = : -PRE_INSTALL = : -POST_INSTALL = : -NORMAL_UNINSTALL = : -PRE_UNINSTALL = : -POST_UNINSTALL = : -CC = gcc -MAKEINFO = makeinfo -MPICC = -MPICC0 = -MPICC1 = -MPICC2 = -MPICC3 = -MPICC4 = -MPICC5 = -MPICFLAGS = -MPIDEFS = -MPILIBS = -PACKAGE = tree-puzzle -PPUZZLE = -VERSION = 5.0 - -EXTRA_DIST = atp6.a globin.a marswolf.n primates.b -mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs -CONFIG_CLEAN_FILES = -DIST_COMMON = Makefile.am Makefile.in - - -DISTFILES = $(DIST_COMMON) $(SOURCES) $(HEADERS) $(TEXINFOS) $(EXTRA_DIST) - -TAR = tar -GZIP_ENV = --best -all: all-redirect -.SUFFIXES: -$(srcdir)/Makefile.in: Makefile.am $(top_srcdir)/configure.in $(ACLOCAL_M4) - cd $(top_srcdir) && $(AUTOMAKE) --gnu --include-deps data/Makefile - -Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status - cd $(top_builddir) \ - && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status - -tags: TAGS -TAGS: - - -distdir = $(top_builddir)/$(PACKAGE)-$(VERSION)/$(subdir) - -subdir = data - -distdir: $(DISTFILES) - @for file in $(DISTFILES); do \ - d=$(srcdir); \ - if test -d $$d/$$file; then \ - cp -pr $$/$$file $(distdir)/$$file; \ - else \ - test -f $(distdir)/$$file \ - || ln $$d/$$file $(distdir)/$$file 2> /dev/null \ - || cp -p $$d/$$file $(distdir)/$$file || :; \ - fi; \ - done -info-am: -info: info-am -dvi-am: -dvi: dvi-am -check-am: all-am -check: check-am -installcheck-am: -installcheck: installcheck-am -install-exec-am: -install-exec: install-exec-am - -install-data-am: -install-data: install-data-am - -install-am: all-am - @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am -install: install-am -uninstall-am: -uninstall: uninstall-am -all-am: Makefile -all-redirect: all-am -install-strip: - $(MAKE) $(AM_MAKEFLAGS) AM_INSTALL_PROGRAM_FLAGS=-s install -installdirs: - - -mostlyclean-generic: - -clean-generic: - -distclean-generic: - -rm -f Makefile $(CONFIG_CLEAN_FILES) - -rm -f config.cache config.log stamp-h stamp-h[0-9]* - -maintainer-clean-generic: -mostlyclean-am: mostlyclean-generic - -mostlyclean: mostlyclean-am - -clean-am: clean-generic mostlyclean-am - -clean: clean-am - -distclean-am: distclean-generic clean-am - -distclean: distclean-am - -maintainer-clean-am: maintainer-clean-generic distclean-am - @echo "This command is intended for maintainers to use;" - @echo "it deletes files that may require special tools to rebuild." - -maintainer-clean: maintainer-clean-am - -.PHONY: tags distdir info-am info dvi-am dvi check check-am \ -installcheck-am installcheck install-exec-am install-exec \ -install-data-am install-data install-am install uninstall-am uninstall \ -all-redirect all-am all installdirs mostlyclean-generic \ -distclean-generic clean-generic maintainer-clean-generic clean \ -mostlyclean distclean maintainer-clean - - -# Tell versions [3.59,3.63) of GNU make to not export all variables. -# Otherwise a system limit (for SysV at least) may be exceeded. -.NOEXPORT: diff --git a/forester/archive/RIO/others/puzzle_dqo/data/Makefile.am b/forester/archive/RIO/others/puzzle_dqo/data/Makefile.am deleted file mode 100644 index 9589f1e..0000000 --- a/forester/archive/RIO/others/puzzle_dqo/data/Makefile.am +++ /dev/null @@ -1 +0,0 @@ -EXTRA_DIST = atp6.a globin.a marswolf.n primates.b diff --git a/forester/archive/RIO/others/puzzle_dqo/data/Makefile.in b/forester/archive/RIO/others/puzzle_dqo/data/Makefile.in deleted file mode 100644 index f844e6e..0000000 --- a/forester/archive/RIO/others/puzzle_dqo/data/Makefile.in +++ /dev/null @@ -1,177 +0,0 @@ -# Makefile.in generated automatically by automake 1.4 from Makefile.am - -# Copyright (C) 1994, 1995-8, 1999 Free Software Foundation, Inc. -# This Makefile.in is free software; the Free Software Foundation -# gives unlimited permission to copy and/or distribute it, -# with or without modifications, as long as this notice is preserved. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY, to the extent permitted by law; without -# even the implied warranty of MERCHANTABILITY or FITNESS FOR A -# PARTICULAR PURPOSE. - - -SHELL = @SHELL@ - -srcdir = @srcdir@ -top_srcdir = @top_srcdir@ -VPATH = @srcdir@ -prefix = @prefix@ -exec_prefix = @exec_prefix@ - -bindir = @bindir@ -sbindir = @sbindir@ -libexecdir = @libexecdir@ -datadir = @datadir@ -sysconfdir = @sysconfdir@ -sharedstatedir = @sharedstatedir@ -localstatedir = @localstatedir@ -libdir = @libdir@ -infodir = @infodir@ -mandir = @mandir@ -includedir = @includedir@ -oldincludedir = /usr/include - -DESTDIR = - -pkgdatadir = $(datadir)/@PACKAGE@ -pkglibdir = $(libdir)/@PACKAGE@ -pkgincludedir = $(includedir)/@PACKAGE@ - -top_builddir = .. - -ACLOCAL = @ACLOCAL@ -AUTOCONF = @AUTOCONF@ -AUTOMAKE = @AUTOMAKE@ -AUTOHEADER = @AUTOHEADER@ - -INSTALL = @INSTALL@ -INSTALL_PROGRAM = @INSTALL_PROGRAM@ $(AM_INSTALL_PROGRAM_FLAGS) -INSTALL_DATA = @INSTALL_DATA@ -INSTALL_SCRIPT = @INSTALL_SCRIPT@ -transform = @program_transform_name@ - -NORMAL_INSTALL = : -PRE_INSTALL = : -POST_INSTALL = : -NORMAL_UNINSTALL = : -PRE_UNINSTALL = : -POST_UNINSTALL = : -CC = @CC@ -MAKEINFO = @MAKEINFO@ -MPICC = @MPICC@ -MPICC0 = @MPICC0@ -MPICC1 = @MPICC1@ -MPICC2 = @MPICC2@ -MPICC3 = @MPICC3@ -MPICC4 = @MPICC4@ -MPICC5 = @MPICC5@ -MPICFLAGS = @MPICFLAGS@ -MPIDEFS = @MPIDEFS@ -MPILIBS = @MPILIBS@ -PACKAGE = @PACKAGE@ -PPUZZLE = @PPUZZLE@ -VERSION = @VERSION@ - -EXTRA_DIST = atp6.a globin.a marswolf.n primates.b -mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs -CONFIG_CLEAN_FILES = -DIST_COMMON = Makefile.am Makefile.in - - -DISTFILES = $(DIST_COMMON) $(SOURCES) $(HEADERS) $(TEXINFOS) $(EXTRA_DIST) - -TAR = tar -GZIP_ENV = --best -all: all-redirect -.SUFFIXES: -$(srcdir)/Makefile.in: Makefile.am $(top_srcdir)/configure.in $(ACLOCAL_M4) - cd $(top_srcdir) && $(AUTOMAKE) --gnu --include-deps data/Makefile - -Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status - cd $(top_builddir) \ - && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status - -tags: TAGS -TAGS: - - -distdir = $(top_builddir)/$(PACKAGE)-$(VERSION)/$(subdir) - -subdir = data - -distdir: $(DISTFILES) - @for file in $(DISTFILES); do \ - d=$(srcdir); \ - if test -d $$d/$$file; then \ - cp -pr $$/$$file $(distdir)/$$file; \ - else \ - test -f $(distdir)/$$file \ - || ln $$d/$$file $(distdir)/$$file 2> /dev/null \ - || cp -p $$d/$$file $(distdir)/$$file || :; \ - fi; \ - done -info-am: -info: info-am -dvi-am: -dvi: dvi-am -check-am: all-am -check: check-am -installcheck-am: -installcheck: installcheck-am -install-exec-am: -install-exec: install-exec-am - -install-data-am: -install-data: install-data-am - -install-am: all-am - @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am -install: install-am -uninstall-am: -uninstall: uninstall-am -all-am: Makefile -all-redirect: all-am -install-strip: - $(MAKE) $(AM_MAKEFLAGS) AM_INSTALL_PROGRAM_FLAGS=-s install -installdirs: - - -mostlyclean-generic: - -clean-generic: - -distclean-generic: - -rm -f Makefile $(CONFIG_CLEAN_FILES) - -rm -f config.cache config.log stamp-h stamp-h[0-9]* - -maintainer-clean-generic: -mostlyclean-am: mostlyclean-generic - -mostlyclean: mostlyclean-am - -clean-am: clean-generic mostlyclean-am - -clean: clean-am - -distclean-am: distclean-generic clean-am - -distclean: distclean-am - -maintainer-clean-am: maintainer-clean-generic distclean-am - @echo "This command is intended for maintainers to use;" - @echo "it deletes files that may require special tools to rebuild." - -maintainer-clean: maintainer-clean-am - -.PHONY: tags distdir info-am info dvi-am dvi check check-am \ -installcheck-am installcheck install-exec-am install-exec \ -install-data-am install-data install-am install uninstall-am uninstall \ -all-redirect all-am all installdirs mostlyclean-generic \ -distclean-generic clean-generic maintainer-clean-generic clean \ -mostlyclean distclean maintainer-clean - - -# Tell versions [3.59,3.63) of GNU make to not export all variables. -# Otherwise a system limit (for SysV at least) may be exceeded. -.NOEXPORT: diff --git a/forester/archive/RIO/others/puzzle_dqo/doc/Makefile b/forester/archive/RIO/others/puzzle_dqo/doc/Makefile deleted file mode 100644 index 0f281d4..0000000 --- a/forester/archive/RIO/others/puzzle_dqo/doc/Makefile +++ /dev/null @@ -1,177 +0,0 @@ -# Generated automatically from Makefile.in by configure. -# Makefile.in generated automatically by automake 1.4 from Makefile.am - -# Copyright (C) 1994, 1995-8, 1999 Free Software Foundation, Inc. -# This Makefile.in is free software; the Free Software Foundation -# gives unlimited permission to copy and/or distribute it, -# with or without modifications, as long as this notice is preserved. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY, to the extent permitted by law; without -# even the implied warranty of MERCHANTABILITY or FITNESS FOR A -# PARTICULAR PURPOSE. - - -SHELL = /bin/sh - -srcdir = . -top_srcdir = .. -prefix = /usr/local -exec_prefix = ${prefix} - -bindir = ${exec_prefix}/bin -sbindir = ${exec_prefix}/sbin -libexecdir = ${exec_prefix}/libexec -datadir = ${prefix}/share -sysconfdir = ${prefix}/etc -sharedstatedir = ${prefix}/com -localstatedir = ${prefix}/var -libdir = ${exec_prefix}/lib -infodir = ${prefix}/info -mandir = ${prefix}/man -includedir = ${prefix}/include -oldincludedir = /usr/include - -DESTDIR = - -pkgdatadir = $(datadir)/tree-puzzle -pkglibdir = $(libdir)/tree-puzzle -pkgincludedir = $(includedir)/tree-puzzle - -top_builddir = .. - -ACLOCAL = aclocal -AUTOCONF = autoconf -AUTOMAKE = automake -AUTOHEADER = autoheader - -INSTALL = /usr/bin/install -c -INSTALL_PROGRAM = ${INSTALL} $(AM_INSTALL_PROGRAM_FLAGS) -INSTALL_DATA = ${INSTALL} -m 644 -INSTALL_SCRIPT = ${INSTALL_PROGRAM} -transform = s,x,x, - -NORMAL_INSTALL = : -PRE_INSTALL = : -POST_INSTALL = : -NORMAL_UNINSTALL = : -PRE_UNINSTALL = : -POST_UNINSTALL = : -CC = gcc -MAKEINFO = makeinfo -MPICC = -MPICC0 = -MPICC1 = -MPICC2 = -MPICC3 = -MPICC4 = -MPICC5 = -MPICFLAGS = -MPIDEFS = -MPILIBS = -PACKAGE = tree-puzzle -PPUZZLE = -VERSION = 5.0 - -EXTRA_DIST = manual.html ppuzzle.gif puzzle.gif -mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs -CONFIG_CLEAN_FILES = -DIST_COMMON = Makefile.am Makefile.in - - -DISTFILES = $(DIST_COMMON) $(SOURCES) $(HEADERS) $(TEXINFOS) $(EXTRA_DIST) - -TAR = tar -GZIP_ENV = --best -all: all-redirect -.SUFFIXES: -$(srcdir)/Makefile.in: Makefile.am $(top_srcdir)/configure.in $(ACLOCAL_M4) - cd $(top_srcdir) && $(AUTOMAKE) --gnu --include-deps doc/Makefile - -Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status - cd $(top_builddir) \ - && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status - -tags: TAGS -TAGS: - - -distdir = $(top_builddir)/$(PACKAGE)-$(VERSION)/$(subdir) - -subdir = doc - -distdir: $(DISTFILES) - @for file in $(DISTFILES); do \ - d=$(srcdir); \ - if test -d $$d/$$file; then \ - cp -pr $$/$$file $(distdir)/$$file; \ - else \ - test -f $(distdir)/$$file \ - || ln $$d/$$file $(distdir)/$$file 2> /dev/null \ - || cp -p $$d/$$file $(distdir)/$$file || :; \ - fi; \ - done -info-am: -info: info-am -dvi-am: -dvi: dvi-am -check-am: all-am -check: check-am -installcheck-am: -installcheck: installcheck-am -install-exec-am: -install-exec: install-exec-am - -install-data-am: -install-data: install-data-am - -install-am: all-am - @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am -install: install-am -uninstall-am: -uninstall: uninstall-am -all-am: Makefile -all-redirect: all-am -install-strip: - $(MAKE) $(AM_MAKEFLAGS) AM_INSTALL_PROGRAM_FLAGS=-s install -installdirs: - - -mostlyclean-generic: - -clean-generic: - -distclean-generic: - -rm -f Makefile $(CONFIG_CLEAN_FILES) - -rm -f config.cache config.log stamp-h stamp-h[0-9]* - -maintainer-clean-generic: -mostlyclean-am: mostlyclean-generic - -mostlyclean: mostlyclean-am - -clean-am: clean-generic mostlyclean-am - -clean: clean-am - -distclean-am: distclean-generic clean-am - -distclean: distclean-am - -maintainer-clean-am: maintainer-clean-generic distclean-am - @echo "This command is intended for maintainers to use;" - @echo "it deletes files that may require special tools to rebuild." - -maintainer-clean: maintainer-clean-am - -.PHONY: tags distdir info-am info dvi-am dvi check check-am \ -installcheck-am installcheck install-exec-am install-exec \ -install-data-am install-data install-am install uninstall-am uninstall \ -all-redirect all-am all installdirs mostlyclean-generic \ -distclean-generic clean-generic maintainer-clean-generic clean \ -mostlyclean distclean maintainer-clean - - -# Tell versions [3.59,3.63) of GNU make to not export all variables. -# Otherwise a system limit (for SysV at least) may be exceeded. -.NOEXPORT: diff --git a/forester/archive/RIO/others/puzzle_dqo/doc/Makefile.am b/forester/archive/RIO/others/puzzle_dqo/doc/Makefile.am deleted file mode 100644 index 3cb95e6..0000000 --- a/forester/archive/RIO/others/puzzle_dqo/doc/Makefile.am +++ /dev/null @@ -1 +0,0 @@ -EXTRA_DIST = manual.html ppuzzle.gif puzzle.gif diff --git a/forester/archive/RIO/others/puzzle_dqo/doc/Makefile.in b/forester/archive/RIO/others/puzzle_dqo/doc/Makefile.in deleted file mode 100644 index e48590c..0000000 --- a/forester/archive/RIO/others/puzzle_dqo/doc/Makefile.in +++ /dev/null @@ -1,177 +0,0 @@ -# Makefile.in generated automatically by automake 1.4 from Makefile.am - -# Copyright (C) 1994, 1995-8, 1999 Free Software Foundation, Inc. -# This Makefile.in is free software; the Free Software Foundation -# gives unlimited permission to copy and/or distribute it, -# with or without modifications, as long as this notice is preserved. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY, to the extent permitted by law; without -# even the implied warranty of MERCHANTABILITY or FITNESS FOR A -# PARTICULAR PURPOSE. - - -SHELL = @SHELL@ - -srcdir = @srcdir@ -top_srcdir = @top_srcdir@ -VPATH = @srcdir@ -prefix = @prefix@ -exec_prefix = @exec_prefix@ - -bindir = @bindir@ -sbindir = @sbindir@ -libexecdir = @libexecdir@ -datadir = @datadir@ -sysconfdir = @sysconfdir@ -sharedstatedir = @sharedstatedir@ -localstatedir = @localstatedir@ -libdir = @libdir@ -infodir = @infodir@ -mandir = @mandir@ -includedir = @includedir@ -oldincludedir = /usr/include - -DESTDIR = - -pkgdatadir = $(datadir)/@PACKAGE@ -pkglibdir = $(libdir)/@PACKAGE@ -pkgincludedir = $(includedir)/@PACKAGE@ - -top_builddir = .. - -ACLOCAL = @ACLOCAL@ -AUTOCONF = @AUTOCONF@ -AUTOMAKE = @AUTOMAKE@ -AUTOHEADER = @AUTOHEADER@ - -INSTALL = @INSTALL@ -INSTALL_PROGRAM = @INSTALL_PROGRAM@ $(AM_INSTALL_PROGRAM_FLAGS) -INSTALL_DATA = @INSTALL_DATA@ -INSTALL_SCRIPT = @INSTALL_SCRIPT@ -transform = @program_transform_name@ - -NORMAL_INSTALL = : -PRE_INSTALL = : -POST_INSTALL = : -NORMAL_UNINSTALL = : -PRE_UNINSTALL = : -POST_UNINSTALL = : -CC = @CC@ -MAKEINFO = @MAKEINFO@ -MPICC = @MPICC@ -MPICC0 = @MPICC0@ -MPICC1 = @MPICC1@ -MPICC2 = @MPICC2@ -MPICC3 = @MPICC3@ -MPICC4 = @MPICC4@ -MPICC5 = @MPICC5@ -MPICFLAGS = @MPICFLAGS@ -MPIDEFS = @MPIDEFS@ -MPILIBS = @MPILIBS@ -PACKAGE = @PACKAGE@ -PPUZZLE = @PPUZZLE@ -VERSION = @VERSION@ - -EXTRA_DIST = manual.html ppuzzle.gif puzzle.gif -mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs -CONFIG_CLEAN_FILES = -DIST_COMMON = Makefile.am Makefile.in - - -DISTFILES = $(DIST_COMMON) $(SOURCES) $(HEADERS) $(TEXINFOS) $(EXTRA_DIST) - -TAR = tar -GZIP_ENV = --best -all: all-redirect -.SUFFIXES: -$(srcdir)/Makefile.in: Makefile.am $(top_srcdir)/configure.in $(ACLOCAL_M4) - cd $(top_srcdir) && $(AUTOMAKE) --gnu --include-deps doc/Makefile - -Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status - cd $(top_builddir) \ - && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status - -tags: TAGS -TAGS: - - -distdir = $(top_builddir)/$(PACKAGE)-$(VERSION)/$(subdir) - -subdir = doc - -distdir: $(DISTFILES) - @for file in $(DISTFILES); do \ - d=$(srcdir); \ - if test -d $$d/$$file; then \ - cp -pr $$/$$file $(distdir)/$$file; \ - else \ - test -f $(distdir)/$$file \ - || ln $$d/$$file $(distdir)/$$file 2> /dev/null \ - || cp -p $$d/$$file $(distdir)/$$file || :; \ - fi; \ - done -info-am: -info: info-am -dvi-am: -dvi: dvi-am -check-am: all-am -check: check-am -installcheck-am: -installcheck: installcheck-am -install-exec-am: -install-exec: install-exec-am - -install-data-am: -install-data: install-data-am - -install-am: all-am - @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am -install: install-am -uninstall-am: -uninstall: uninstall-am -all-am: Makefile -all-redirect: all-am -install-strip: - $(MAKE) $(AM_MAKEFLAGS) AM_INSTALL_PROGRAM_FLAGS=-s install -installdirs: - - -mostlyclean-generic: - -clean-generic: - -distclean-generic: - -rm -f Makefile $(CONFIG_CLEAN_FILES) - -rm -f config.cache config.log stamp-h stamp-h[0-9]* - -maintainer-clean-generic: -mostlyclean-am: mostlyclean-generic - -mostlyclean: mostlyclean-am - -clean-am: clean-generic mostlyclean-am - -clean: clean-am - -distclean-am: distclean-generic clean-am - -distclean: distclean-am - -maintainer-clean-am: maintainer-clean-generic distclean-am - @echo "This command is intended for maintainers to use;" - @echo "it deletes files that may require special tools to rebuild." - -maintainer-clean: maintainer-clean-am - -.PHONY: tags distdir info-am info dvi-am dvi check check-am \ -installcheck-am installcheck install-exec-am install-exec \ -install-data-am install-data install-am install uninstall-am uninstall \ -all-redirect all-am all installdirs mostlyclean-generic \ -distclean-generic clean-generic maintainer-clean-generic clean \ -mostlyclean distclean maintainer-clean - - -# Tell versions [3.59,3.63) of GNU make to not export all variables. -# Otherwise a system limit (for SysV at least) may be exceeded. -.NOEXPORT: diff --git a/forester/archive/RIO/others/puzzle_dqo/install-sh b/forester/archive/RIO/others/puzzle_dqo/install-sh deleted file mode 100755 index e9de238..0000000 --- a/forester/archive/RIO/others/puzzle_dqo/install-sh +++ /dev/null @@ -1,251 +0,0 @@ -#!/bin/sh -# -# install - install a program, script, or datafile -# This comes from X11R5 (mit/util/scripts/install.sh). -# -# Copyright 1991 by the Massachusetts Institute of Technology -# -# Permission to use, copy, modify, distribute, and sell this software and its -# documentation for any purpose is hereby granted without fee, provided that -# the above copyright notice appear in all copies and that both that -# copyright notice and this permission notice appear in supporting -# documentation, and that the name of M.I.T. not be used in advertising or -# publicity pertaining to distribution of the software without specific, -# written prior permission. M.I.T. makes no representations about the -# suitability of this software for any purpose. It is provided "as is" -# without express or implied warranty. -# -# Calling this script install-sh is preferred over install.sh, to prevent -# `make' implicit rules from creating a file called install from it -# when there is no Makefile. -# -# This script is compatible with the BSD install script, but was written -# from scratch. It can only install one file at a time, a restriction -# shared with many OS's install programs. - - -# set DOITPROG to echo to test this script - -# Don't use :- since 4.3BSD and earlier shells don't like it. -doit="${DOITPROG-}" - - -# put in absolute paths if you don't have them in your path; or use env. vars. - -mvprog="${MVPROG-mv}" -cpprog="${CPPROG-cp}" -chmodprog="${CHMODPROG-chmod}" -chownprog="${CHOWNPROG-chown}" -chgrpprog="${CHGRPPROG-chgrp}" -stripprog="${STRIPPROG-strip}" -rmprog="${RMPROG-rm}" -mkdirprog="${MKDIRPROG-mkdir}" - -transformbasename="" -transform_arg="" -instcmd="$mvprog" -chmodcmd="$chmodprog 0755" -chowncmd="" -chgrpcmd="" -stripcmd="" -rmcmd="$rmprog -f" -mvcmd="$mvprog" -src="" -dst="" -dir_arg="" - -while [ x"$1" != x ]; do - case $1 in - -c) instcmd="$cpprog" - shift - continue;; - - -d) dir_arg=true - shift - continue;; - - -m) chmodcmd="$chmodprog $2" - shift - shift - continue;; - - -o) chowncmd="$chownprog $2" - shift - shift - continue;; - - -g) chgrpcmd="$chgrpprog $2" - shift - shift - continue;; - - -s) stripcmd="$stripprog" - shift - continue;; - - -t=*) transformarg=`echo $1 | sed 's/-t=//'` - shift - continue;; - - -b=*) transformbasename=`echo $1 | sed 's/-b=//'` - shift - continue;; - - *) if [ x"$src" = x ] - then - src=$1 - else - # this colon is to work around a 386BSD /bin/sh bug - : - dst=$1 - fi - shift - continue;; - esac -done - -if [ x"$src" = x ] -then - echo "install: no input file specified" - exit 1 -else - true -fi - -if [ x"$dir_arg" != x ]; then - dst=$src - src="" - - if [ -d $dst ]; then - instcmd=: - chmodcmd="" - else - instcmd=mkdir - fi -else - -# Waiting for this to be detected by the "$instcmd $src $dsttmp" command -# might cause directories to be created, which would be especially bad -# if $src (and thus $dsttmp) contains '*'. - - if [ -f $src -o -d $src ] - then - true - else - echo "install: $src does not exist" - exit 1 - fi - - if [ x"$dst" = x ] - then - echo "install: no destination specified" - exit 1 - else - true - fi - -# If destination is a directory, append the input filename; if your system -# does not like double slashes in filenames, you may need to add some logic - - if [ -d $dst ] - then - dst="$dst"/`basename $src` - else - true - fi -fi - -## this sed command emulates the dirname command -dstdir=`echo $dst | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'` - -# Make sure that the destination directory exists. -# this part is taken from Noah Friedman's mkinstalldirs script - -# Skip lots of stat calls in the usual case. -if [ ! -d "$dstdir" ]; then -defaultIFS=' -' -IFS="${IFS-${defaultIFS}}" - -oIFS="${IFS}" -# Some sh's can't handle IFS=/ for some reason. -IFS='%' -set - `echo ${dstdir} | sed -e 's@/@%@g' -e 's@^%@/@'` -IFS="${oIFS}" - -pathcomp='' - -while [ $# -ne 0 ] ; do - pathcomp="${pathcomp}${1}" - shift - - if [ ! -d "${pathcomp}" ] ; - then - $mkdirprog "${pathcomp}" - else - true - fi - - pathcomp="${pathcomp}/" -done -fi - -if [ x"$dir_arg" != x ] -then - $doit $instcmd $dst && - - if [ x"$chowncmd" != x ]; then $doit $chowncmd $dst; else true ; fi && - if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dst; else true ; fi && - if [ x"$stripcmd" != x ]; then $doit $stripcmd $dst; else true ; fi && - if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dst; else true ; fi -else - -# If we're going to rename the final executable, determine the name now. - - if [ x"$transformarg" = x ] - then - dstfile=`basename $dst` - else - dstfile=`basename $dst $transformbasename | - sed $transformarg`$transformbasename - fi - -# don't allow the sed command to completely eliminate the filename - - if [ x"$dstfile" = x ] - then - dstfile=`basename $dst` - else - true - fi - -# Make a temp file name in the proper directory. - - dsttmp=$dstdir/#inst.$$# - -# Move or copy the file name to the temp name - - $doit $instcmd $src $dsttmp && - - trap "rm -f ${dsttmp}" 0 && - -# and set any options; do chmod last to preserve setuid bits - -# If any of these fail, we abort the whole thing. If we want to -# ignore errors from any of these, just make sure not to ignore -# errors from the above "$doit $instcmd $src $dsttmp" command. - - if [ x"$chowncmd" != x ]; then $doit $chowncmd $dsttmp; else true;fi && - if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dsttmp; else true;fi && - if [ x"$stripcmd" != x ]; then $doit $stripcmd $dsttmp; else true;fi && - if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dsttmp; else true;fi && - -# Now rename the file to the real destination. - - $doit $rmcmd -f $dstdir/$dstfile && - $doit $mvcmd $dsttmp $dstdir/$dstfile - -fi && - - -exit 0 diff --git a/forester/archive/RIO/others/puzzle_dqo/missing b/forester/archive/RIO/others/puzzle_dqo/missing deleted file mode 100755 index 7789652..0000000 --- a/forester/archive/RIO/others/puzzle_dqo/missing +++ /dev/null @@ -1,190 +0,0 @@ -#! /bin/sh -# Common stub for a few missing GNU programs while installing. -# Copyright (C) 1996, 1997 Free Software Foundation, Inc. -# Franc,ois Pinard , 1996. - -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2, or (at your option) -# any later version. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. - -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA -# 02111-1307, USA. - -if test $# -eq 0; then - echo 1>&2 "Try \`$0 --help' for more information" - exit 1 -fi - -case "$1" in - - -h|--h|--he|--hel|--help) - echo "\ -$0 [OPTION]... PROGRAM [ARGUMENT]... - -Handle \`PROGRAM [ARGUMENT]...' for when PROGRAM is missing, or return an -error status if there is no known handling for PROGRAM. - -Options: - -h, --help display this help and exit - -v, --version output version information and exit - -Supported PROGRAM values: - aclocal touch file \`aclocal.m4' - autoconf touch file \`configure' - autoheader touch file \`config.h.in' - automake touch all \`Makefile.in' files - bison create \`y.tab.[ch]', if possible, from existing .[ch] - flex create \`lex.yy.c', if possible, from existing .c - lex create \`lex.yy.c', if possible, from existing .c - makeinfo touch the output file - yacc create \`y.tab.[ch]', if possible, from existing .[ch]" - ;; - - -v|--v|--ve|--ver|--vers|--versi|--versio|--version) - echo "missing - GNU libit 0.0" - ;; - - -*) - echo 1>&2 "$0: Unknown \`$1' option" - echo 1>&2 "Try \`$0 --help' for more information" - exit 1 - ;; - - aclocal) - echo 1>&2 "\ -WARNING: \`$1' is missing on your system. You should only need it if - you modified \`acinclude.m4' or \`configure.in'. You might want - to install the \`Automake' and \`Perl' packages. Grab them from - any GNU archive site." - touch aclocal.m4 - ;; - - autoconf) - echo 1>&2 "\ -WARNING: \`$1' is missing on your system. You should only need it if - you modified \`configure.in'. You might want to install the - \`Autoconf' and \`GNU m4' packages. Grab them from any GNU - archive site." - touch configure - ;; - - autoheader) - echo 1>&2 "\ -WARNING: \`$1' is missing on your system. You should only need it if - you modified \`acconfig.h' or \`configure.in'. You might want - to install the \`Autoconf' and \`GNU m4' packages. Grab them - from any GNU archive site." - files=`sed -n 's/^[ ]*A[CM]_CONFIG_HEADER(\([^)]*\)).*/\1/p' configure.in` - test -z "$files" && files="config.h" - touch_files= - for f in $files; do - case "$f" in - *:*) touch_files="$touch_files "`echo "$f" | - sed -e 's/^[^:]*://' -e 's/:.*//'`;; - *) touch_files="$touch_files $f.in";; - esac - done - touch $touch_files - ;; - - automake) - echo 1>&2 "\ -WARNING: \`$1' is missing on your system. You should only need it if - you modified \`Makefile.am', \`acinclude.m4' or \`configure.in'. - You might want to install the \`Automake' and \`Perl' packages. - Grab them from any GNU archive site." - find . -type f -name Makefile.am -print | - sed 's/\.am$/.in/' | - while read f; do touch "$f"; done - ;; - - bison|yacc) - echo 1>&2 "\ -WARNING: \`$1' is missing on your system. You should only need it if - you modified a \`.y' file. You may need the \`Bison' package - in order for those modifications to take effect. You can get - \`Bison' from any GNU archive site." - rm -f y.tab.c y.tab.h - if [ $# -ne 1 ]; then - eval LASTARG="\${$#}" - case "$LASTARG" in - *.y) - SRCFILE=`echo "$LASTARG" | sed 's/y$/c/'` - if [ -f "$SRCFILE" ]; then - cp "$SRCFILE" y.tab.c - fi - SRCFILE=`echo "$LASTARG" | sed 's/y$/h/'` - if [ -f "$SRCFILE" ]; then - cp "$SRCFILE" y.tab.h - fi - ;; - esac - fi - if [ ! -f y.tab.h ]; then - echo >y.tab.h - fi - if [ ! -f y.tab.c ]; then - echo 'main() { return 0; }' >y.tab.c - fi - ;; - - lex|flex) - echo 1>&2 "\ -WARNING: \`$1' is missing on your system. You should only need it if - you modified a \`.l' file. You may need the \`Flex' package - in order for those modifications to take effect. You can get - \`Flex' from any GNU archive site." - rm -f lex.yy.c - if [ $# -ne 1 ]; then - eval LASTARG="\${$#}" - case "$LASTARG" in - *.l) - SRCFILE=`echo "$LASTARG" | sed 's/l$/c/'` - if [ -f "$SRCFILE" ]; then - cp "$SRCFILE" lex.yy.c - fi - ;; - esac - fi - if [ ! -f lex.yy.c ]; then - echo 'main() { return 0; }' >lex.yy.c - fi - ;; - - makeinfo) - echo 1>&2 "\ -WARNING: \`$1' is missing on your system. You should only need it if - you modified a \`.texi' or \`.texinfo' file, or any other file - indirectly affecting the aspect of the manual. The spurious - call might also be the consequence of using a buggy \`make' (AIX, - DU, IRIX). You might want to install the \`Texinfo' package or - the \`GNU make' package. Grab either from any GNU archive site." - file=`echo "$*" | sed -n 's/.*-o \([^ ]*\).*/\1/p'` - if test -z "$file"; then - file=`echo "$*" | sed 's/.* \([^ ]*\) *$/\1/'` - file=`sed -n '/^@setfilename/ { s/.* \([^ ]*\) *$/\1/; p; q; }' $file` - fi - touch $file - ;; - - *) - echo 1>&2 "\ -WARNING: \`$1' is needed, and you do not seem to have it handy on your - system. You might have modified some files without having the - proper tools for further handling them. Check the \`README' file, - it often tells you about the needed prerequirements for installing - this package. You may also peek at any GNU archive site, in case - some other package would contain this missing \`$1' program." - exit 1 - ;; -esac - -exit 0 diff --git a/forester/archive/RIO/others/puzzle_dqo/mkinstalldirs b/forester/archive/RIO/others/puzzle_dqo/mkinstalldirs deleted file mode 100755 index bff4a66..0000000 --- a/forester/archive/RIO/others/puzzle_dqo/mkinstalldirs +++ /dev/null @@ -1,40 +0,0 @@ -#! /bin/sh -# mkinstalldirs --- make directory hierarchy -# Author: Noah Friedman -# Created: 1993-05-16 -# Public domain - -# $Id: mkinstalldirs,v 1.1.1.1 2005/03/22 08:34:59 cmzmasek Exp $ - -errstatus=0 - -for file -do - set fnord `echo ":$file" | sed -ne 's/^:\//#/;s/^://;s/\// /g;s/^#/\//;p'` - shift - - pathcomp= - for d - do - pathcomp="$pathcomp$d" - case "$pathcomp" in - -* ) pathcomp=./$pathcomp ;; - esac - - if test ! -d "$pathcomp"; then - echo "mkdir $pathcomp" - - mkdir "$pathcomp" || lasterr=$? - - if test ! -d "$pathcomp"; then - errstatus=$lasterr - fi - fi - - pathcomp="$pathcomp/" - done -done - -exit $errstatus - -# mkinstalldirs ends here diff --git a/forester/archive/RIO/others/puzzle_dqo/src/Makefile b/forester/archive/RIO/others/puzzle_dqo/src/Makefile deleted file mode 100644 index 8a90eaa..0000000 --- a/forester/archive/RIO/others/puzzle_dqo/src/Makefile +++ /dev/null @@ -1,317 +0,0 @@ -# Generated automatically from Makefile.in by configure. -# Makefile.in generated automatically by automake 1.4 from Makefile.am - -# Copyright (C) 1994, 1995-8, 1999 Free Software Foundation, Inc. -# This Makefile.in is free software; the Free Software Foundation -# gives unlimited permission to copy and/or distribute it, -# with or without modifications, as long as this notice is preserved. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY, to the extent permitted by law; without -# even the implied warranty of MERCHANTABILITY or FITNESS FOR A -# PARTICULAR PURPOSE. - - -SHELL = /bin/sh - -srcdir = . -top_srcdir = .. -prefix = /usr/local -exec_prefix = ${prefix} - -bindir = ${exec_prefix}/bin -sbindir = ${exec_prefix}/sbin -libexecdir = ${exec_prefix}/libexec -datadir = ${prefix}/share -sysconfdir = ${prefix}/etc -sharedstatedir = ${prefix}/com -localstatedir = ${prefix}/var -libdir = ${exec_prefix}/lib -infodir = ${prefix}/info -mandir = ${prefix}/man -includedir = ${prefix}/include -oldincludedir = /usr/include - -DESTDIR = - -pkgdatadir = $(datadir)/tree-puzzle -pkglibdir = $(libdir)/tree-puzzle -pkgincludedir = $(includedir)/tree-puzzle - -top_builddir = .. - -ACLOCAL = aclocal -AUTOCONF = autoconf -AUTOMAKE = automake -AUTOHEADER = autoheader - -INSTALL = /usr/bin/install -c -INSTALL_PROGRAM = ${INSTALL} $(AM_INSTALL_PROGRAM_FLAGS) -INSTALL_DATA = ${INSTALL} -m 644 -INSTALL_SCRIPT = ${INSTALL_PROGRAM} -transform = s,x,x, - -NORMAL_INSTALL = : -PRE_INSTALL = : -POST_INSTALL = : -NORMAL_UNINSTALL = : -PRE_UNINSTALL = : -POST_UNINSTALL = : -CC = gcc -MAKEINFO = makeinfo -MPICC = -MPICC0 = -MPICC1 = -MPICC2 = -MPICC3 = -MPICC4 = -MPICC5 = -MPICFLAGS = -MPIDEFS = -MPILIBS = -PACKAGE = tree-puzzle -PPUZZLE = -VERSION = 5.0 - -bin_PROGRAMS = puzzle - -puzzle_SOURCES = gamma.c ml1.c ml2.c ml3.c model1.c model2.c puzzle1.c puzzle2.c util.c ml.h util.h puzzle.h gamma.h -puzzle_LDADD = sgamma.o sml1.o sml2.o sml3.o smodel1.o smodel2.o spuzzle1.o spuzzle2.o sutil.o - -SDEFS = -SCFLAGS = -SLDFLAGS = -lm - -SCOMPILE = $(CC) $(SDEFS) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(SCFLAGS) $(CFLAGS) -SCCLD = $(CC) -SLINK = $(SCCLD) $(AM_CFLAGS) $(CFLAGS) $(SLDFLAGS) $(LDFLAGS) - -PCC = -PDEFS = -DPARALLEL -PCFLAGS = -PLDFLAGS = -lm -mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs -CONFIG_CLEAN_FILES = test -PROGRAMS = $(bin_PROGRAMS) - - -DEFS = -DPACKAGE=\"tree-puzzle\" -DVERSION=\"5.0\" -DHAVE_LIBM=1 -DSTDC_HEADERS=1 -DHAVE_LIMITS_H=1 -I. -I$(srcdir) -CPPFLAGS = -LDFLAGS = -LIBS = -lm -puzzle_OBJECTS = gamma.o ml1.o ml2.o ml3.o model1.o model2.o puzzle1.o \ -puzzle2.o util.o -puzzle_DEPENDENCIES = sgamma.o sml1.o sml2.o sml3.o smodel1.o smodel2.o \ -spuzzle1.o spuzzle2.o sutil.o -puzzle_LDFLAGS = -CFLAGS = -g -O2 -COMPILE = $(CC) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -CCLD = $(CC) -LINK = $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(LDFLAGS) -o $@ -DIST_COMMON = README Makefile.am Makefile.in test.in - - -DISTFILES = $(DIST_COMMON) $(SOURCES) $(HEADERS) $(TEXINFOS) $(EXTRA_DIST) - -TAR = gtar -GZIP_ENV = --best -SOURCES = $(puzzle_SOURCES) -OBJECTS = $(puzzle_OBJECTS) - -all: all-redirect -.SUFFIXES: -.SUFFIXES: .S .c .o .s -$(srcdir)/Makefile.in: Makefile.am $(top_srcdir)/configure.in $(ACLOCAL_M4) - cd $(top_srcdir) && $(AUTOMAKE) --gnu --include-deps src/Makefile - -Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status - cd $(top_builddir) \ - && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status - -test: $(top_builddir)/config.status test.in - cd $(top_builddir) && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status - -mostlyclean-binPROGRAMS: - -clean-binPROGRAMS: - -test -z "$(bin_PROGRAMS)" || rm -f $(bin_PROGRAMS) - -distclean-binPROGRAMS: - -maintainer-clean-binPROGRAMS: - -install-binPROGRAMS: $(bin_PROGRAMS) - @$(NORMAL_INSTALL) - $(mkinstalldirs) $(DESTDIR)$(bindir) - @list='$(bin_PROGRAMS)'; for p in $$list; do \ - if test -f $$p; then \ - echo " $(INSTALL_PROGRAM) $$p $(DESTDIR)$(bindir)/`echo $$p|sed 's/$(EXEEXT)$$//'|sed '$(transform)'|sed 's/$$/$(EXEEXT)/'`"; \ - $(INSTALL_PROGRAM) $$p $(DESTDIR)$(bindir)/`echo $$p|sed 's/$(EXEEXT)$$//'|sed '$(transform)'|sed 's/$$/$(EXEEXT)/'`; \ - else :; fi; \ - done - -uninstall-binPROGRAMS: - @$(NORMAL_UNINSTALL) - list='$(bin_PROGRAMS)'; for p in $$list; do \ - rm -f $(DESTDIR)$(bindir)/`echo $$p|sed 's/$(EXEEXT)$$//'|sed '$(transform)'|sed 's/$$/$(EXEEXT)/'`; \ - done - -.c.o: - $(COMPILE) -c $< - -.s.o: - $(COMPILE) -c $< - -.S.o: - $(COMPILE) -c $< - -mostlyclean-compile: - -rm -f *.o core *.core - -clean-compile: - -distclean-compile: - -rm -f *.tab.c - -maintainer-clean-compile: - -tags: TAGS - -ID: $(HEADERS) $(SOURCES) $(LISP) - list='$(SOURCES) $(HEADERS)'; \ - unique=`for i in $$list; do echo $$i; done | \ - awk ' { files[$$0] = 1; } \ - END { for (i in files) print i; }'`; \ - here=`pwd` && cd $(srcdir) \ - && mkid -f$$here/ID $$unique $(LISP) - -TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) $(LISP) - tags=; \ - here=`pwd`; \ - list='$(SOURCES) $(HEADERS)'; \ - unique=`for i in $$list; do echo $$i; done | \ - awk ' { files[$$0] = 1; } \ - END { for (i in files) print i; }'`; \ - test -z "$(ETAGS_ARGS)$$unique$(LISP)$$tags" \ - || (cd $(srcdir) && etags $(ETAGS_ARGS) $$tags $$unique $(LISP) -o $$here/TAGS) - -mostlyclean-tags: - -clean-tags: - -distclean-tags: - -rm -f TAGS ID - -maintainer-clean-tags: - -distdir = $(top_builddir)/$(PACKAGE)-$(VERSION)/$(subdir) - -subdir = src - -distdir: $(DISTFILES) - @for file in $(DISTFILES); do \ - d=$(srcdir); \ - if test -d $$d/$$file; then \ - cp -pr $$d/$$file $(distdir)/$$file; \ - else \ - test -f $(distdir)/$$file \ - || ln $$d/$$file $(distdir)/$$file 2> /dev/null \ - || cp -p $$d/$$file $(distdir)/$$file || :; \ - fi; \ - done -info-am: -info: info-am -dvi-am: -dvi: dvi-am -check-am: all-am -check: check-am -installcheck-am: -installcheck: installcheck-am -install-exec-am: install-binPROGRAMS -install-exec: install-exec-am - -install-data-am: -install-data: install-data-am - -install-am: all-am - @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am -install: install-am -uninstall-am: uninstall-binPROGRAMS -uninstall: uninstall-am -all-am: Makefile $(PROGRAMS) -all-redirect: all-am -install-strip: - $(MAKE) $(AM_MAKEFLAGS) AM_INSTALL_PROGRAM_FLAGS=-s install -installdirs: - $(mkinstalldirs) $(DESTDIR)$(bindir) - - -mostlyclean-generic: - -clean-generic: - -distclean-generic: - -rm -f Makefile $(CONFIG_CLEAN_FILES) - -rm -f config.cache config.log stamp-h stamp-h[0-9]* - -maintainer-clean-generic: -mostlyclean-am: mostlyclean-binPROGRAMS mostlyclean-compile \ - mostlyclean-tags mostlyclean-generic - -mostlyclean: mostlyclean-am - -clean-am: clean-binPROGRAMS clean-compile clean-tags clean-generic \ - mostlyclean-am - -clean: clean-am - -distclean-am: distclean-binPROGRAMS distclean-compile distclean-tags \ - distclean-generic clean-am - -distclean: distclean-am - -maintainer-clean-am: maintainer-clean-binPROGRAMS \ - maintainer-clean-compile maintainer-clean-tags \ - maintainer-clean-generic distclean-am - @echo "This command is intended for maintainers to use;" - @echo "it deletes files that may require special tools to rebuild." - -maintainer-clean: maintainer-clean-am - -.PHONY: mostlyclean-binPROGRAMS distclean-binPROGRAMS clean-binPROGRAMS \ -maintainer-clean-binPROGRAMS uninstall-binPROGRAMS install-binPROGRAMS \ -mostlyclean-compile distclean-compile clean-compile \ -maintainer-clean-compile tags mostlyclean-tags distclean-tags \ -clean-tags maintainer-clean-tags distdir info-am info dvi-am dvi check \ -check-am installcheck-am installcheck install-exec-am install-exec \ -install-data-am install-data install-am install uninstall-am uninstall \ -all-redirect all-am all installdirs mostlyclean-generic \ -distclean-generic clean-generic maintainer-clean-generic clean \ -mostlyclean distclean maintainer-clean - - -puzzle: $(puzzle_LDADD) $(puzzle_SOURCES) - $(SLINK) $(puzzle_LDADD) -o $@ - -sml1.o: ml1.c ml.h util.h - $(SCOMPILE) -c ml1.c && mv ml1.o $@ -sml2.o: ml2.c ml.h util.h - $(SCOMPILE) -c ml2.c && mv ml2.o $@ -sml3.o: ml3.c ml.h util.h gamma.h - $(SCOMPILE) -c ml3.c && mv ml3.o $@ -smodel1.o: model1.c ml.h util.h - $(SCOMPILE) -c model1.c && mv model1.o $@ -smodel2.o: model2.c ml.h util.h - $(SCOMPILE) -c model2.c && mv model2.o $@ -spuzzle1.o: puzzle1.c ml.h util.h puzzle.h gamma.h ppuzzle.h - $(SCOMPILE) -c puzzle1.c && mv puzzle1.o $@ -spuzzle2.o: puzzle2.c ml.h util.h puzzle.h ppuzzle.h - $(SCOMPILE) -c puzzle2.c && mv puzzle2.o $@ -sutil.o: util.c util.h - $(SCOMPILE) -c util.c && mv util.o $@ -sgamma.o: gamma.c gamma.h util.h - $(SCOMPILE) -c gamma.c && mv gamma.o $@ - -# Tell versions [3.59,3.63) of GNU make to not export all variables. -# Otherwise a system limit (for SysV at least) may be exceeded. -.NOEXPORT: diff --git a/forester/archive/RIO/others/puzzle_dqo/src/Makefile.am b/forester/archive/RIO/others/puzzle_dqo/src/Makefile.am deleted file mode 100644 index e28c498..0000000 --- a/forester/archive/RIO/others/puzzle_dqo/src/Makefile.am +++ /dev/null @@ -1,49 +0,0 @@ -bin_PROGRAMS = puzzle - -puzzle_SOURCES = gamma.c ml1.c ml2.c ml3.c model1.c model2.c puzzle1.c puzzle2.c util.c ml.h util.h puzzle.h gamma.h -puzzle_LDADD = sgamma.o sml1.o sml2.o sml3.o smodel1.o smodel2.o spuzzle1.o spuzzle2.o sutil.o - -SDEFS = -SCFLAGS = -SLDFLAGS = @LIBS@ - -SCOMPILE = $(CC) $(SDEFS) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(SCFLAGS) $(CFLAGS) -SCCLD = $(CC) -SLINK = $(SCCLD) $(AM_CFLAGS) $(CFLAGS) $(SLDFLAGS) $(LDFLAGS) - - - -PCC = @MPICC@ -PDEFS = -DPARALLEL -PCFLAGS = -PLDFLAGS = @LIBS@ @MPILIBS@ - - - - -puzzle: $(puzzle_LDADD) $(puzzle_SOURCES) - $(SLINK) $(puzzle_LDADD) -o $@ - -sml1.o: ml1.c ml.h util.h - $(SCOMPILE) -c ml1.c && mv ml1.o $@ -sml2.o: ml2.c ml.h util.h - $(SCOMPILE) -c ml2.c && mv ml2.o $@ -sml3.o: ml3.c ml.h util.h gamma.h - $(SCOMPILE) -c ml3.c && mv ml3.o $@ -smodel1.o: model1.c ml.h util.h - $(SCOMPILE) -c model1.c && mv model1.o $@ -smodel2.o: model2.c ml.h util.h - $(SCOMPILE) -c model2.c && mv model2.o $@ -spuzzle1.o: puzzle1.c ml.h util.h puzzle.h gamma.h ppuzzle.h - $(SCOMPILE) -c puzzle1.c && mv puzzle1.o $@ -spuzzle2.o: puzzle2.c ml.h util.h puzzle.h ppuzzle.h - $(SCOMPILE) -c puzzle2.c && mv puzzle2.o $@ -sutil.o: util.c util.h - $(SCOMPILE) -c util.c && mv util.o $@ -sgamma.o: gamma.c gamma.h util.h - $(SCOMPILE) -c gamma.c && mv gamma.o $@ - - - - - diff --git a/forester/archive/RIO/others/puzzle_dqo/src/Makefile.in b/forester/archive/RIO/others/puzzle_dqo/src/Makefile.in deleted file mode 100644 index a8fb19d..0000000 --- a/forester/archive/RIO/others/puzzle_dqo/src/Makefile.in +++ /dev/null @@ -1,317 +0,0 @@ -# Makefile.in generated automatically by automake 1.4 from Makefile.am - -# Copyright (C) 1994, 1995-8, 1999 Free Software Foundation, Inc. -# This Makefile.in is free software; the Free Software Foundation -# gives unlimited permission to copy and/or distribute it, -# with or without modifications, as long as this notice is preserved. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY, to the extent permitted by law; without -# even the implied warranty of MERCHANTABILITY or FITNESS FOR A -# PARTICULAR PURPOSE. - - -SHELL = @SHELL@ - -srcdir = @srcdir@ -top_srcdir = @top_srcdir@ -VPATH = @srcdir@ -prefix = @prefix@ -exec_prefix = @exec_prefix@ - -bindir = @bindir@ -sbindir = @sbindir@ -libexecdir = @libexecdir@ -datadir = @datadir@ -sysconfdir = @sysconfdir@ -sharedstatedir = @sharedstatedir@ -localstatedir = @localstatedir@ -libdir = @libdir@ -infodir = @infodir@ -mandir = @mandir@ -includedir = @includedir@ -oldincludedir = /usr/include - -DESTDIR = - -pkgdatadir = $(datadir)/@PACKAGE@ -pkglibdir = $(libdir)/@PACKAGE@ -pkgincludedir = $(includedir)/@PACKAGE@ - -top_builddir = .. - -ACLOCAL = @ACLOCAL@ -AUTOCONF = @AUTOCONF@ -AUTOMAKE = @AUTOMAKE@ -AUTOHEADER = @AUTOHEADER@ - -INSTALL = @INSTALL@ -INSTALL_PROGRAM = @INSTALL_PROGRAM@ $(AM_INSTALL_PROGRAM_FLAGS) -INSTALL_DATA = @INSTALL_DATA@ -INSTALL_SCRIPT = @INSTALL_SCRIPT@ -transform = @program_transform_name@ - -NORMAL_INSTALL = : -PRE_INSTALL = : -POST_INSTALL = : -NORMAL_UNINSTALL = : -PRE_UNINSTALL = : -POST_UNINSTALL = : -CC = @CC@ -MAKEINFO = @MAKEINFO@ -MPICC = @MPICC@ -MPICC0 = @MPICC0@ -MPICC1 = @MPICC1@ -MPICC2 = @MPICC2@ -MPICC3 = @MPICC3@ -MPICC4 = @MPICC4@ -MPICC5 = @MPICC5@ -MPICFLAGS = @MPICFLAGS@ -MPIDEFS = @MPIDEFS@ -MPILIBS = @MPILIBS@ -PACKAGE = @PACKAGE@ -PPUZZLE = @PPUZZLE@ -VERSION = @VERSION@ - -bin_PROGRAMS = puzzle - -puzzle_SOURCES = gamma.c ml1.c ml2.c ml3.c model1.c model2.c puzzle1.c puzzle2.c util.c ml.h util.h puzzle.h gamma.h -puzzle_LDADD = sgamma.o sml1.o sml2.o sml3.o smodel1.o smodel2.o spuzzle1.o spuzzle2.o sutil.o - -SDEFS = -SCFLAGS = -SLDFLAGS = @LIBS@ - -SCOMPILE = $(CC) $(SDEFS) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(SCFLAGS) $(CFLAGS) -SCCLD = $(CC) -SLINK = $(SCCLD) $(AM_CFLAGS) $(CFLAGS) $(SLDFLAGS) $(LDFLAGS) - -PCC = @MPICC@ -PDEFS = -DPARALLEL -PCFLAGS = -PLDFLAGS = @LIBS@ @MPILIBS@ -mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs -CONFIG_CLEAN_FILES = test -PROGRAMS = $(bin_PROGRAMS) - - -DEFS = @DEFS@ -I. -I$(srcdir) -CPPFLAGS = @CPPFLAGS@ -LDFLAGS = @LDFLAGS@ -LIBS = @LIBS@ -puzzle_OBJECTS = gamma.o ml1.o ml2.o ml3.o model1.o model2.o puzzle1.o \ -puzzle2.o util.o -puzzle_DEPENDENCIES = sgamma.o sml1.o sml2.o sml3.o smodel1.o smodel2.o \ -spuzzle1.o spuzzle2.o sutil.o -puzzle_LDFLAGS = -CFLAGS = @CFLAGS@ -COMPILE = $(CC) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -CCLD = $(CC) -LINK = $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(LDFLAGS) -o $@ -DIST_COMMON = README Makefile.am Makefile.in test.in - - -DISTFILES = $(DIST_COMMON) $(SOURCES) $(HEADERS) $(TEXINFOS) $(EXTRA_DIST) - -TAR = gtar -GZIP_ENV = --best -SOURCES = $(puzzle_SOURCES) -OBJECTS = $(puzzle_OBJECTS) - -all: all-redirect -.SUFFIXES: -.SUFFIXES: .S .c .o .s -$(srcdir)/Makefile.in: Makefile.am $(top_srcdir)/configure.in $(ACLOCAL_M4) - cd $(top_srcdir) && $(AUTOMAKE) --gnu --include-deps src/Makefile - -Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status - cd $(top_builddir) \ - && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status - -test: $(top_builddir)/config.status test.in - cd $(top_builddir) && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status - -mostlyclean-binPROGRAMS: - -clean-binPROGRAMS: - -test -z "$(bin_PROGRAMS)" || rm -f $(bin_PROGRAMS) - -distclean-binPROGRAMS: - -maintainer-clean-binPROGRAMS: - -install-binPROGRAMS: $(bin_PROGRAMS) - @$(NORMAL_INSTALL) - $(mkinstalldirs) $(DESTDIR)$(bindir) - @list='$(bin_PROGRAMS)'; for p in $$list; do \ - if test -f $$p; then \ - echo " $(INSTALL_PROGRAM) $$p $(DESTDIR)$(bindir)/`echo $$p|sed 's/$(EXEEXT)$$//'|sed '$(transform)'|sed 's/$$/$(EXEEXT)/'`"; \ - $(INSTALL_PROGRAM) $$p $(DESTDIR)$(bindir)/`echo $$p|sed 's/$(EXEEXT)$$//'|sed '$(transform)'|sed 's/$$/$(EXEEXT)/'`; \ - else :; fi; \ - done - -uninstall-binPROGRAMS: - @$(NORMAL_UNINSTALL) - list='$(bin_PROGRAMS)'; for p in $$list; do \ - rm -f $(DESTDIR)$(bindir)/`echo $$p|sed 's/$(EXEEXT)$$//'|sed '$(transform)'|sed 's/$$/$(EXEEXT)/'`; \ - done - -.c.o: - $(COMPILE) -c $< - -.s.o: - $(COMPILE) -c $< - -.S.o: - $(COMPILE) -c $< - -mostlyclean-compile: - -rm -f *.o core *.core - -clean-compile: - -distclean-compile: - -rm -f *.tab.c - -maintainer-clean-compile: - -tags: TAGS - -ID: $(HEADERS) $(SOURCES) $(LISP) - list='$(SOURCES) $(HEADERS)'; \ - unique=`for i in $$list; do echo $$i; done | \ - awk ' { files[$$0] = 1; } \ - END { for (i in files) print i; }'`; \ - here=`pwd` && cd $(srcdir) \ - && mkid -f$$here/ID $$unique $(LISP) - -TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) $(LISP) - tags=; \ - here=`pwd`; \ - list='$(SOURCES) $(HEADERS)'; \ - unique=`for i in $$list; do echo $$i; done | \ - awk ' { files[$$0] = 1; } \ - END { for (i in files) print i; }'`; \ - test -z "$(ETAGS_ARGS)$$unique$(LISP)$$tags" \ - || (cd $(srcdir) && etags $(ETAGS_ARGS) $$tags $$unique $(LISP) -o $$here/TAGS) - -mostlyclean-tags: - -clean-tags: - -distclean-tags: - -rm -f TAGS ID - -maintainer-clean-tags: - -distdir = $(top_builddir)/$(PACKAGE)-$(VERSION)/$(subdir) - -subdir = src - -distdir: $(DISTFILES) - @for file in $(DISTFILES); do \ - d=$(srcdir); \ - if test -d $$d/$$file; then \ - cp -pr $$d/$$file $(distdir)/$$file; \ - else \ - test -f $(distdir)/$$file \ - || ln $$d/$$file $(distdir)/$$file 2> /dev/null \ - || cp -p $$d/$$file $(distdir)/$$file || :; \ - fi; \ - done -info-am: -info: info-am -dvi-am: -dvi: dvi-am -check-am: all-am -check: check-am -installcheck-am: -installcheck: installcheck-am -install-exec-am: install-binPROGRAMS -install-exec: install-exec-am - -install-data-am: -install-data: install-data-am - -install-am: all-am - @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am -install: install-am -uninstall-am: uninstall-binPROGRAMS -uninstall: uninstall-am -all-am: Makefile $(PROGRAMS) -all-redirect: all-am -install-strip: - $(MAKE) $(AM_MAKEFLAGS) AM_INSTALL_PROGRAM_FLAGS=-s install -installdirs: - $(mkinstalldirs) $(DESTDIR)$(bindir) - - -mostlyclean-generic: - -clean-generic: - -distclean-generic: - -rm -f Makefile $(CONFIG_CLEAN_FILES) - -rm -f config.cache config.log stamp-h stamp-h[0-9]* - -maintainer-clean-generic: -mostlyclean-am: mostlyclean-binPROGRAMS mostlyclean-compile \ - mostlyclean-tags mostlyclean-generic - -mostlyclean: mostlyclean-am - -clean-am: clean-binPROGRAMS clean-compile clean-tags clean-generic \ - mostlyclean-am - -clean: clean-am - -distclean-am: distclean-binPROGRAMS distclean-compile distclean-tags \ - distclean-generic clean-am - -distclean: distclean-am - -maintainer-clean-am: maintainer-clean-binPROGRAMS \ - maintainer-clean-compile maintainer-clean-tags \ - maintainer-clean-generic distclean-am - @echo "This command is intended for maintainers to use;" - @echo "it deletes files that may require special tools to rebuild." - -maintainer-clean: maintainer-clean-am - -.PHONY: mostlyclean-binPROGRAMS distclean-binPROGRAMS clean-binPROGRAMS \ -maintainer-clean-binPROGRAMS uninstall-binPROGRAMS install-binPROGRAMS \ -mostlyclean-compile distclean-compile clean-compile \ -maintainer-clean-compile tags mostlyclean-tags distclean-tags \ -clean-tags maintainer-clean-tags distdir info-am info dvi-am dvi check \ -check-am installcheck-am installcheck install-exec-am install-exec \ -install-data-am install-data install-am install uninstall-am uninstall \ -all-redirect all-am all installdirs mostlyclean-generic \ -distclean-generic clean-generic maintainer-clean-generic clean \ -mostlyclean distclean maintainer-clean - - -puzzle: $(puzzle_LDADD) $(puzzle_SOURCES) - $(SLINK) $(puzzle_LDADD) -o $@ - -sml1.o: ml1.c ml.h util.h - $(SCOMPILE) -c ml1.c && mv ml1.o $@ -sml2.o: ml2.c ml.h util.h - $(SCOMPILE) -c ml2.c && mv ml2.o $@ -sml3.o: ml3.c ml.h util.h gamma.h - $(SCOMPILE) -c ml3.c && mv ml3.o $@ -smodel1.o: model1.c ml.h util.h - $(SCOMPILE) -c model1.c && mv model1.o $@ -smodel2.o: model2.c ml.h util.h - $(SCOMPILE) -c model2.c && mv model2.o $@ -spuzzle1.o: puzzle1.c ml.h util.h puzzle.h gamma.h ppuzzle.h - $(SCOMPILE) -c puzzle1.c && mv puzzle1.o $@ -spuzzle2.o: puzzle2.c ml.h util.h puzzle.h ppuzzle.h - $(SCOMPILE) -c puzzle2.c && mv puzzle2.o $@ -sutil.o: util.c util.h - $(SCOMPILE) -c util.c && mv util.o $@ -sgamma.o: gamma.c gamma.h util.h - $(SCOMPILE) -c gamma.c && mv gamma.o $@ - -# Tell versions [3.59,3.63) of GNU make to not export all variables. -# Otherwise a system limit (for SysV at least) may be exceeded. -.NOEXPORT: diff --git a/forester/archive/RIO/others/puzzle_dqo/src/README b/forester/archive/RIO/others/puzzle_dqo/src/README deleted file mode 100644 index 9c89883..0000000 --- a/forester/archive/RIO/others/puzzle_dqo/src/README +++ /dev/null @@ -1 +0,0 @@ -Sources of the TREE-PUZZLE package diff --git a/forester/archive/RIO/others/puzzle_dqo/src/gamma.c b/forester/archive/RIO/others/puzzle_dqo/src/gamma.c deleted file mode 100644 index ee1f6df..0000000 --- a/forester/archive/RIO/others/puzzle_dqo/src/gamma.c +++ /dev/null @@ -1,346 +0,0 @@ -/* - * gamma.c - * - * - * Part of TREE-PUZZLE 5.0 (June 2000) - * - * (c) 1999-2000 by Heiko A. Schmidt, Korbinian Strimmer, - * M. Vingron, and Arndt von Haeseler - * (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler - * - * All parts of the source except where indicated are distributed under - * the GNU public licence. See http://www.opensource.org for details. - */ - -#include -#include "util.h" -#include "gamma.h" - -/* private prototypes */ -static double IncompleteGamma (double x, double alpha, double ln_gamma_alpha); -static double PointNormal (double prob); -static double PointChi2 (double prob, double v); - -/* Gamma density function */ -double densityGamma (double x, double shape) -{ - return pow (shape, shape) * pow (x, shape-1) / - exp (shape*x + LnGamma(shape)); -} - -/* Gamma cdf */ -double cdfGamma (double x, double shape) -{ - double result; - - result = IncompleteGamma (shape*x, shape, LnGamma(shape)); - - return result; -} - -/* Gamma inverse cdf */ -double icdfGamma (double y, double shape) -{ - double result; - - result = PointChi2 (y, 2.0*shape)/(2.0*shape); - - /* to avoid -1.0 */ - if (result < 0.0) - { - result = 0.0; - } - - return result; -} - -/* Gamma n-th moment */ -double momentGamma (int n, double shape) -{ - int i; - double tmp = 1.0; - - for (i = 1; i < n; i++) - { - tmp *= (shape + i)/shape; - } - - return tmp; -} - -/* The following code comes from tools.c in Yang's PAML package */ - -double LnGamma (double alpha) -{ -/* returns ln(gamma(alpha)) for alpha>0, accurate to 10 decimal places. - Stirling's formula is used for the central polynomial part of the procedure. - Pike MC & Hill ID (1966) Algorithm 291: Logarithm of the gamma function. - Communications of the Association for Computing Machinery, 9:684 -*/ - double x=alpha, f=0, z; - - if (x<7) { - f=1; z=x-1; - while (++z<7) f*=z; - x=z; f=-log(f); - } - z = 1/(x*x); - return f + (x-0.5)*log(x) - x + .918938533204673 - + (((-.000595238095238*z+.000793650793651)*z-.002777777777778)*z - +.083333333333333)/x; -} - -static double IncompleteGamma (double x, double alpha, double ln_gamma_alpha) -{ -/* returns the incomplete gamma ratio I(x,alpha) where x is the upper - limit of the integration and alpha is the shape parameter. - returns (-1) if in error - (1) series expansion if (alpha>x || x<=1) - (2) continued fraction otherwise - RATNEST FORTRAN by - Bhattacharjee GP (1970) The incomplete gamma integral. Applied Statistics, - 19: 285-287 (AS32) -*/ - int i; - double p=alpha, g=ln_gamma_alpha; - double accurate=1e-8, overflow=1e30; - double factor, gin=0, rn=0, a=0,b=0,an=0,dif=0, term=0, pn[6]; - - if (x==0) return (0); - if (x<0 || p<=0) return (-1); - - factor=exp(p*log(x)-x-g); - if (x>1 && x>=p) goto l30; - /* (1) series expansion */ - gin=1; term=1; rn=p; - l20: - rn++; - term*=x/rn; gin+=term; - - if (term > accurate) goto l20; - gin*=factor/p; - goto l50; - l30: - /* (2) continued fraction */ - a=1-p; b=a+x+1; term=0; - pn[0]=1; pn[1]=x; pn[2]=x+1; pn[3]=x*b; - gin=pn[2]/pn[3]; - l32: - a++; b+=2; term++; an=a*term; - for (i=0; i<2; i++) pn[i+4]=b*pn[i+2]-an*pn[i]; - if (pn[5] == 0) goto l35; - rn=pn[4]/pn[5]; dif=fabs(gin-rn); - if (dif>accurate) goto l34; - if (dif<=accurate*rn) goto l42; - l34: - gin=rn; - l35: - for (i=0; i<4; i++) pn[i]=pn[i+2]; - if (fabs(pn[4]) < overflow) goto l32; - for (i=0; i<4; i++) pn[i]/=overflow; - goto l32; - l42: - gin=1-factor*gin; - - l50: - return (gin); -} - - -/* functions concerning the CDF and percentage points of the gamma and - Chi2 distribution -*/ -static double PointNormal (double prob) -{ -/* returns z so that Prob{x.999998 || v<=0) return (-1); - - g = LnGamma (v/2); - xx=v/2; c=xx-1; - if (v >= -1.24*log(p)) goto l1; - - ch=pow((p*xx*exp(g+xx*aa)), 1/xx); - if (ch-e<0) return (ch); - goto l4; -l1: - if (v>.32) goto l3; - ch=0.4; a=log(1-p); -l2: - q=ch; p1=1+ch*(4.67+ch); p2=ch*(6.73+ch*(6.66+ch)); - t=-0.5+(4.67+2*ch)/p1 - (6.73+ch*(13.32+3*ch))/p2; - ch-=(1-exp(a+g+.5*ch+c*aa)*p2/p1)/t; - if (fabs(q/ch-1)-.01 <= 0) goto l4; - else goto l2; - -l3: - x=PointNormal (p); - p1=0.222222/v; ch=v*pow((x*sqrt(p1)+1-p1), 3.0); - if (ch>2.2*v+6) ch=-2*(log(1-p)-c*log(.5*ch)+g); -l4: - - do - { - q=ch; p1=.5*ch; - if ((t=IncompleteGamma (p1, xx, g))<0) { - return (-1); - } - p2=p-t; - t=p2*exp(xx*aa+g+p1-c*log(ch)); - b=t/ch; a=0.5*t-b*c; - - s1=(210+a*(140+a*(105+a*(84+a*(70+60*a))))) / 420; - s2=(420+a*(735+a*(966+a*(1141+1278*a))))/2520; - s3=(210+a*(462+a*(707+932*a)))/2520; - s4=(252+a*(672+1182*a)+c*(294+a*(889+1740*a)))/5040; - s5=(84+264*a+c*(175+606*a))/2520; - s6=(120+c*(346+127*c))/5040; - ch+=t*(1+0.5*t*s1-b*c*(s1-b*(s2-b*(s3-b*(s4-b*(s5-b*s6)))))); - } - while (fabs(q/ch-1) > e); - - return (ch); -} - - -/* Incomplete Gamma function Q(a,x) - - this is a cleanroom implementation of NRs gammq(a,x) -*/ -double IncompleteGammaQ (double a, double x) -{ - return 1.0-IncompleteGamma (x, a, LnGamma(a)); -} - - -/* probability that the observed chi-square - exceeds chi2 even if model is correct */ -double chi2prob (int deg, double chi2) -{ - return IncompleteGammaQ (0.5*deg, 0.5*chi2); -} - - - -/* chi square test - ef expected frequencies (sum up to 1 !!) - of observed frequencies (sum up to the number of samples) - numcat number of categories - returns critical significance level */ -double chi2test(double *ef, int *of, int numcat, int *chi2fail) -{ - double chi2, criticals, efn; - int i, below1, below5, reducedcat; - int samples; - - *chi2fail = FALSE; - reducedcat = numcat; - below1 = 0; - below5 = 0; - - /* compute number of samples */ - samples = 0; - for (i = 0; i < numcat; i++) - samples = samples + of[i]; - - /* compute chi square */ - chi2 = 0; - for (i = 0; i < numcat; i++) { - efn = ef[i]*((double) samples); - if (efn < 1.0) below1++; - if (efn < 5.0) below5++; - if (efn == 0.0) { - reducedcat--; - fprintf(stdout, "FPE error: samples=%d, ef[%d]=%f, of[%d]=%d, efn=%f, nc=%d, rc=%d\n", - samples, i, ef[i], i, of[i], efn, numcat, reducedcat); - fprintf(stdout, "PLEASE REPORT THIS ERROR TO DEVELOPERS !!!\n"); - fflush(stdout); - } else chi2 = chi2 + ((double) of[i]-efn)*((double) of[i]-efn)/efn; - } - - /* compute significance */ - criticals = chi2prob (numcat-1, chi2); - - /* no expected frequency category (sum up to # samples) below 1.0 */ - if (below1 > 0) *chi2fail = TRUE; - /* no more than 1/5 of the frequency categories below 5.0 */ - if (below5 > (int) floor(samples/5.0)) *chi2fail = TRUE; - - return criticals; -} - - -/* chi square test - ef expected frequencies (sum up to 1 !!) - of observed frequencies (sum up to the number of samples) - numcat number of categories - returns critical significance level */ -double altchi2test(double *ef, int *of, int numcat, int *chi2fail) -{ - double chi2, criticals, efn; - int i, below1, below5; - int samples; - - *chi2fail = FALSE; - below1 = 0; - below5 = 0; - - /* compute number of samples */ - samples = 0; - for (i = 0; i < numcat; i++) - samples = samples + of[i]; - - /* compute chi square */ - chi2 = 0; - for (i = 0; i < numcat; i++) { - efn = ef[i]*((double) samples); - if (efn < 1.0) below1++; - if (efn < 5.0) below5++; - chi2 = chi2 + ((double) of[i]-efn)*((double) of[i]-efn)/efn; - } - - /* compute significance */ - criticals = chi2prob (numcat-1, chi2); - - /* no expected frequency category (sum up to # samples) below 1.0 */ - if (below1 > 0) *chi2fail = TRUE; - /* no more than 1/5 of the frequency categories below 5.0 */ - if (below5 > (int) floor(samples/5.0)) *chi2fail = TRUE; - - return criticals; -} diff --git a/forester/archive/RIO/others/puzzle_dqo/src/gamma.h b/forester/archive/RIO/others/puzzle_dqo/src/gamma.h deleted file mode 100644 index 975f4ee..0000000 --- a/forester/archive/RIO/others/puzzle_dqo/src/gamma.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - * gamma.h - * - * - * Part of TREE-PUZZLE 5.0 (June 2000) - * - * (c) 1999-2000 by Heiko A. Schmidt, Korbinian Strimmer, - * M. Vingron, and Arndt von Haeseler - * (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler - * - * All parts of the source except where indicated are distributed under - * the GNU public licence. See http://www.opensource.org for details. - */ - -#ifndef _GAMMA_ -#define _GAMMA_ - -double densityGamma (double, double); -double cdfGamma (double, double); -double icdfGamma (double, double); -double momentGamma (int, double); - -double LnGamma (double); -double IncompleteGammaQ (double, double); - -double chi2prob (int, double); -double chi2test (double *, int *, int , int *); - - -#endif /* _GAMMA_ */ diff --git a/forester/archive/RIO/others/puzzle_dqo/src/ml.h b/forester/archive/RIO/others/puzzle_dqo/src/ml.h deleted file mode 100644 index 7dfd2b0..0000000 --- a/forester/archive/RIO/others/puzzle_dqo/src/ml.h +++ /dev/null @@ -1,279 +0,0 @@ -/* - * ml.h - * - * - * Part of TREE-PUZZLE 5.0 (June 2000) - * - * (c) 1999-2000 by Heiko A. Schmidt, Korbinian Strimmer, - * M. Vingron, and Arndt von Haeseler - * (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler - * - * All parts of the source except where indicated are distributed under - * the GNU public licence. See http://www.opensource.org for details. - */ - - -#ifndef _ML_ -#define _ML_ - -/* definitions */ - -#define MINTS 0.20 /* Ts/Tv parameter */ -#define MAXTS 30.0 -#define MINYR 0.10 /* Y/R Ts parameter */ -#define MAXYR 6.00 -#define MINFI 0.00 /* fraction invariable sites */ -#define MAXFI 0.99 /* only for input */ -#define MINGE 0.01 /* rate heterogeneity parameter */ -#define MAXGE 0.99 -#define MINCAT 4 /* discrete Gamma categories */ -#define MAXCAT 16 - -#define RMHROOT 5.0 /* upper relative bound for height of root */ -#define MAXARC 900.0 /* upper limit on branch length (PAM) = 6.0 */ -#define MINARC 0.001 /* lower limit on branch length (PAM) = 0.00001 */ -#define EPSILON 0.0001 /* error in branch length (PAM) = 0.000001 */ -#define HEPSILON 0.0001 /* error in node and root heights */ -#define MAXIT 100 /* maximum number of iterates of smoothing */ -#define MINFDIFF 0.00002 /* lower limit on base frequency differences */ -#define MINFREQ 0.0001 /* lower limit on base frequencies = 0.01% */ -#define NUMQBRNCH 5 /* number of branches in a quartet */ -#define NUMQIBRNCH 1 /* number of internal branches in a quartet */ -#define NUMQSPC 4 /* number of sequences in a quartet */ - -/* 2D minimisation */ -#define PEPS1 0.01 /* epsilon substitution process estimation */ -#define PEPS2 0.01 /* epsilon rate heterogeneity estimation */ - -/* quartet series */ -#define MINPERTAXUM 2 -#define MAXPERTAXUM 6 -#define TSDIFF 0.20 -#define YRDIFF 0.10 - -/* type definitions */ - -typedef struct node -{ - struct node *isop; - struct node *kinp; - int descen; - int number; - double length; - double lengthc; - double varlen; - double height; - double varheight; - ivector paths; - cvector eprob; - dcube partials; /* partial likelihoods */ - char *label; /* internal labels */ -} Node; - -typedef struct tree -{ - Node *rootp; - Node **ebrnchp; /* list of pointers to external branches */ - Node **ibrnchp; /* list of pointers to internal branches */ - double lklhd; /* total log-likelihood */ - double lklhdc; /* total log-likelihood clock */ - dmatrix condlkl; /* likelihoods for each pattern and non-zero rate */ - double rssleast; -} Tree; - - -/* global variables */ - -EXTERN Node *chep; /* pointer to current height node */ -EXTERN Node *rootbr; /* pointer to root branch */ -EXTERN Node **heights; /* pointer to height nodes in unrooted tree */ -EXTERN int Numhts; /* number of height nodes in unrooted tree */ -EXTERN double hroot; /* height of root */ -EXTERN double varhroot; /* variance of height of root */ -EXTERN double maxhroot; /* maximal height of root */ -EXTERN int locroot; /* location of root */ -EXTERN int numbestroot; /* number of best locations for root */ -EXTERN int clockmode; /* clocklike vs. nonclocklike computation */ -EXTERN cmatrix Identif; /* sequence names */ -EXTERN cmatrix Seqchar; /* ML sequence data */ -EXTERN cmatrix Seqpat; /* ordered site patterns */ -EXTERN ivector constpat; /* indicates constant site patterns */ -EXTERN cvector seqchi; -EXTERN cvector seqchj; -EXTERN dcube partiali; -EXTERN dcube partialj; -EXTERN dcube ltprobr; /* transition probabilites (for all non-zero rates */ -EXTERN dvector Distanmat; /* vector with maximum likelihood distances CZ 05/16/01 */ -EXTERN dmatrix Evec; /* Eigenvectors */ -EXTERN dmatrix Ievc; /* Inverse eigenvectors */ -EXTERN double TSparam; /* Ts/Tv parameter */ -EXTERN double tsmean, yrmean; -EXTERN double YRparam; /* Y/R Ts parameter */ -EXTERN double geerr; /* estimated error of rate heterogeneity */ -EXTERN double Geta; /* rate heterogeneity parameter */ -EXTERN double fracconst; /* fraction of constant sites */ -EXTERN double fracconstpat;/* fraction of constant patterns */ -EXTERN double Proportion; /* for tree drawing */ -EXTERN double tserr; /* estimated error of TSparam */ -EXTERN double yrerr; /* estimated error of YRparam */ -EXTERN double fracinv; /* fraction of invariable sites */ -EXTERN double fierr; /* estimated error of fracinv */ -EXTERN dvector Brnlength; -EXTERN dvector Distanvec; -EXTERN dvector Eval; /* Eigenvalues of 1 PAM rate matrix */ -EXTERN dvector Freqtpm; /* base frequencies */ -EXTERN dvector Rates; /* rate of each of the categories */ -EXTERN dmatrix iexp; -EXTERN imatrix Basecomp; /* base composition of each taxon */ -EXTERN ivector usedtaxa; /* list needed in the input treefile procedure */ -EXTERN int numtc; /* auxiliary variable for printing rooted tree */ -EXTERN int qcalg_optn; /* use quartet subsampling algorithm */ -EXTERN int approxp_optn; /* approximate parameter estimation */ -EXTERN int chi2fail; /* flag for chi2 test */ -EXTERN int Converg; /* flag for ML convergence (no clock) */ -EXTERN int Convergc; /* flag for ML convergence (clock) */ -EXTERN int data_optn; /* type of sequence input data */ -EXTERN int Dayhf_optn; /* Dayhoff model */ -EXTERN int HKY_optn; /* use HKY model */ -EXTERN int Jtt_optn; /* JTT model */ -EXTERN int blosum62_optn; /* BLOSUM 62 model */ -EXTERN int mtrev_optn; /* mtREV model */ -EXTERN int cprev_optn; /* cpREV model */ -EXTERN int vtmv_optn; /* VT model */ -EXTERN int wag_optn; /* WAG model */ -EXTERN int Maxsite; /* number of ML characters per taxum */ -EXTERN int Maxspc; /* number of sequences */ -EXTERN int mlmode; /* quartet ML or user defined tree ML */ -EXTERN int nuc_optn; /* nucleotide (4x4) models */ -EXTERN int Numbrnch; /* number of branches of current tree */ -EXTERN int numcats; /* number of rate categories */ -EXTERN int Numconst; /* number of constant sites */ -EXTERN int Numconstpat; /* number of constant patterns */ -EXTERN int Numibrnch; /* number of internal branches of current tree */ -EXTERN int Numitc; /* number of ML iterations assumning clock */ -EXTERN int Numit; /* number of ML iterations if there is convergence */ -EXTERN int Numptrn; /* number of site patterns */ -EXTERN int Numspc; /* number of sequences of current tree */ -EXTERN int optim_optn; /* optimize model parameters */ -EXTERN int grate_optim; /* optimize Gamma rate heterogeneity parameter */ -EXTERN int SH_optn; /* SH nucleotide (16x16) model */ -EXTERN int TN_optn; /* use TN model */ -EXTERN int tpmradix; /* number of different states */ -EXTERN int fracinv_optim; /* optimize fraction of invariable sites */ -EXTERN int typ_optn; /* type of PUZZLE analysis */ -EXTERN ivector Weight; /* weight of each site pattern */ -EXTERN Tree *Ctree; /* pointer to current tree */ -EXTERN ulivector badtaxon; /* involment of each taxon in a bad quartet */ -EXTERN int qca, qcb, qcc, qcd; /* quartet currently optimized */ -EXTERN ivector Alias; /* link site -> corresponding site pattern */ -EXTERN ivector bestrate; /* optimal assignment of rates to sequence sites */ - -EXTERN int bestratefound; - -/* function prototypes of all ml function */ - -void convfreq(dvector); -void radixsort(cmatrix, ivector, int, int, int *); -void condenceseq(cmatrix, ivector, cmatrix, ivector, int, int, int); -void countconstantsites(cmatrix, ivector, int, int, int *, int*); -void evaluateseqs(void); -void elmhes(dmatrix, ivector, int); -void eltran(dmatrix, dmatrix, ivector, int); -void mcdiv(double, double, double, double, double *, double *); -void hqr2(int, int, int, dmatrix, dmatrix, dvector, dvector); -void onepamratematrix(dmatrix); -void eigensystem(dvector, dmatrix); -void luinverse(dmatrix, dmatrix, int); -void checkevector(dmatrix, dmatrix, int); -void tranprobmat(void); -void tprobmtrx(double, dmatrix); -double comptotloglkl(dmatrix); -void allsitelkl(dmatrix, dvector); -double pairlkl(double); -double mldistance(int); -void initdistan(void); -void computedistan(void); -void productpartials(Node *); -void partialsinternal(Node *); -void partialsexternal(Node *); -void initpartials(Tree *); -double intlkl(double); -void optinternalbranch(Node *); -double extlkl(double); -void optexternalbranch(Node *); -void finishlkl(Node *); -double optlkl(Tree *); -double treelkl(Tree *); -void luequation(dmatrix, dvector, int); -void lslength(Tree *, dvector, int, int, dvector); - -void getusertree(FILE *, cvector, int); -Node *internalnode(Tree *, char **, int *); -void constructtree(Tree *, cvector); -void removebasalbif(cvector); -void makeusertree(FILE *); -Tree *new_tree(int, int, cmatrix); -Tree *new_quartet(int, cmatrix); -void free_tree(Tree *, int); -void make_quartet(int, int, int, int); -void changedistan(dmatrix, dvector, int); -double quartet_lklhd(int, int, int, int); -double quartet_alklhd(int, int, int, int); -void readusertree(FILE *); -double usertree_lklhd(void); -double usertree_alklhd(void); -void mlstart(void); -void distupdate(int, int, int, int); -void mlfinish(void); -void prbranch(Node *, int, int, int, ivector, ivector, FILE *); -void getproportion(double *, dvector, int); -void prtopology(FILE *); -void fputphylogeny(FILE *); -void resulttree(FILE *); -void njtree(FILE *); -void njdistantree(Tree *); -void findbestratecombination(void); -void printbestratecombination(FILE *); -int checkedge(int); -void fputsubstree(FILE *, Node *); -void fputrooted(FILE *, int); -void findheights(Node *); -void initclock(int); -double clock_alklhd(int); -double heightlkl(double); -void optheight(void); -double rheightlkl(double); -void optrheight(void); -double clock_lklhd(int); -int findrootedge(void); -void resultheights(FILE *); - -double homogentest(int); -void YangDiscreteGamma(double, int, double *); -void updaterates(void); -void computestat(double *, int, double *, double *); -double quartetml(int, int, int, int); -double opttsq(double); -double optyrq(double); -void optimseqevolparamsq(void); -double opttst(double); -double optyrt(double); -void optimseqevolparamst(void); -double optfi(double); -double optge(double); -void optimrateparams(void); - -int gettpmradix(void); -void rtfdata(dmatrix, double *); -int code2int(cvector); -char *int2code(int); - -void jttdata(dmatrix, double *); -void dyhfdata(dmatrix, double *); -void mtrevdata(dmatrix, double *); -void cprev45data(dmatrix, double *); -void blosum62data(dmatrix, double *); -void vtmvdata(dmatrix, double *); -void wagdata(dmatrix, double *); - -#endif diff --git a/forester/archive/RIO/others/puzzle_dqo/src/ml1.c b/forester/archive/RIO/others/puzzle_dqo/src/ml1.c deleted file mode 100644 index a3a561f..0000000 --- a/forester/archive/RIO/others/puzzle_dqo/src/ml1.c +++ /dev/null @@ -1,1743 +0,0 @@ -/* - * ml1.c - * - * - * Part of TREE-PUZZLE 5.0 (June 2000) - * - * (c) 1999-2000 by Heiko A. Schmidt, Korbinian Strimmer, - * M. Vingron, and Arndt von Haeseler - * (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler - * - * All parts of the source except where indicated are distributed under - * the GNU public licence. See http://www.opensource.org for details. - */ - - -/******************************************************************************/ -/* definitions and prototypes */ -/******************************************************************************/ - -#define EXTERN extern - -/* prototypes */ -#include -#include -#include -#include -#include "util.h" -#include "ml.h" - -#define STDOUT stdout -#ifndef PARALLEL /* because printf() runs significantly faster */ - /* than fprintf(stdout) on an Apple McIntosh */ - /* (HS) */ -# define FPRINTF printf -# define STDOUTFILE -#else -# define FPRINTF fprintf -# define STDOUTFILE STDOUT, -#endif - - -/******************************************************************************/ -/* compacting sequence data information */ -/******************************************************************************/ - - -/***************************** internal functions *****************************/ - - -/* make all frequencies a little different */ -void convfreq(dvector freqemp) -{ - int i, j, maxi=0; - double freq, maxfreq, sum; - - - sum = 0.0; - maxfreq = 0.0; - for (i = 0; i < tpmradix; i++) { - freq = freqemp[i]; - if (freq < MINFREQ) freqemp[i] = MINFREQ; - if (freq > maxfreq) { - maxfreq = freq; - maxi = i; - } - sum += freqemp[i]; - } - freqemp[maxi] += 1.0 - sum; - - for (i = 0; i < tpmradix - 1; i++) { - for (j = i + 1; j < tpmradix; j++) { - if (freqemp[i] == freqemp[j]) { - freqemp[i] += MINFDIFF/2.0; - freqemp[j] -= MINFDIFF/2.0; - } - } - } -} - -/* sort site patters of original input data */ -void radixsort(cmatrix seqchar, ivector ali, int maxspc, int maxsite, - int *numptrn) -{ - int i, j, k, l, n, pass; - int *awork; - int *count; - - - awork = new_ivector(maxsite); - count = new_ivector(tpmradix+1); - for (i = 0; i < maxsite; i++) - ali[i] = i; - for (pass = maxspc - 1; pass >= 0; pass--) { - for (j = 0; j < tpmradix+1; j++) - count[j] = 0; - for (i = 0; i < maxsite; i++) - count[(int) seqchar[pass][ali[i]]]++; - for (j = 1; j < tpmradix+1; j++) - count[j] += count[j-1]; - for (i = maxsite-1; i >= 0; i--) - awork[ --count[(int) seqchar[pass][ali[i]]] ] = ali[i]; - for (i = 0; i < maxsite; i++) - ali[i] = awork[i]; - } - free_ivector(awork); - free_ivector(count); - n = 1; - for (j = 1; j < maxsite; j++) { - k = ali[j]; - l = ali[j-1]; - for (i = 0; i < maxspc; i++) { - if (seqchar[i][l] != seqchar[i][k]) { - n++; - break; - } - } - } - *numptrn = n; -} - - -void condenceseq(cmatrix seqchar, ivector ali, cmatrix seqconint, - ivector weight, int maxspc, int maxsite, int numptrn) -{ - int i, j, k, n; - int agree_flag; /* boolean */ - - - n = 0; - k = ali[n]; - for (i = 0; i < maxspc; i++) { - seqconint[i][n] = seqchar[i][k]; - } - weight[n] = 1; - Alias[k] = 0; - for (j = 1; j < maxsite; j++) { - k = ali[j]; - agree_flag = TRUE; - for (i = 0; i < maxspc; i++) { - if (seqconint[i][n] != seqchar[i][k]) { - agree_flag = FALSE; - break; - } - } - if (agree_flag == FALSE) { - n++; - for (i = 0; i < maxspc; i++) { - seqconint[i][n] = seqchar[i][k]; - } - weight[n] = 1; - Alias[k] = n; - } else { - weight[n]++; - Alias[k] = n; - } - } - n++; - if (numptrn != n) { - /* Problem in condenceseq */ - FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR A TO DEVELOPERS\n\n\n"); - exit(1); - } -} - -void countconstantsites(cmatrix seqpat, ivector weight, int maxspc, int numptrn, - int *numconst, int *numconstpat) -{ - int character, s, i, constflag; - - *numconst = 0; - *numconstpat = 0; - for (s = 0; s < numptrn; s++) { /* check all patterns */ - constpat[s] = FALSE; - constflag = TRUE; - character = seqpat[0][s]; - for (i = 1; i < maxspc; i++) { - if (seqpat[i][s] != character) { - constflag = FALSE; - break; - } - } - if (character != tpmradix && constflag) { - (*numconst) = (*numconst) + weight[s]; - (*numconstpat)++; - constpat[s] = TRUE; - } - } -} - -/***************************** exported functions *****************************/ - - -void evaluateseqs() -{ - ivector ali; - - convfreq(Freqtpm); /* make all frequencies slightly different */ - ali = new_ivector(Maxsite); - radixsort(Seqchar, ali, Maxspc, Maxsite, &Numptrn); - Seqpat = new_cmatrix(Maxspc, Numptrn); - constpat = new_ivector(Numptrn); - Weight = new_ivector(Numptrn); - condenceseq(Seqchar, ali, Seqpat, Weight, Maxspc, Maxsite, Numptrn); - free_ivector(ali); - countconstantsites(Seqpat, Weight, Maxspc, Numptrn, &Numconst, &Numconstpat); - fracconstpat = (double) Numconstpat / (double) Numptrn; - fracconst = (double) Numconst / (double) Maxsite; -} - - -/******************************************************************************/ -/* computation of Pij(t) */ -/******************************************************************************/ - - -/***************************** internal functions *****************************/ - - -void elmhes(dmatrix a, ivector ordr, int n) -{ - int m, j, i; - double y, x; - - - for (i = 0; i < n; i++) - ordr[i] = 0; - for (m = 2; m < n; m++) { - x = 0.0; - i = m; - for (j = m; j <= n; j++) { - if (fabs(a[j - 1][m - 2]) > fabs(x)) { - x = a[j - 1][m - 2]; - i = j; - } - } - ordr[m - 1] = i; /* vector */ - if (i != m) { - for (j = m - 2; j < n; j++) { - y = a[i - 1][j]; - a[i - 1][j] = a[m - 1][j]; - a[m - 1][j] = y; - } - for (j = 0; j < n; j++) { - y = a[j][i - 1]; - a[j][i - 1] = a[j][m - 1]; - a[j][m - 1] = y; - } - } - if (x != 0.0) { - for (i = m; i < n; i++) { - y = a[i][m - 2]; - if (y != 0.0) { - y /= x; - a[i][m - 2] = y; - for (j = m - 1; j < n; j++) - a[i][j] -= y * a[m - 1][j]; - for (j = 0; j < n; j++) - a[j][m - 1] += y * a[j][i]; - } - } - } - } -} - - -void eltran(dmatrix a, dmatrix zz, ivector ordr, int n) -{ - int i, j, m; - - - for (i = 0; i < n; i++) { - for (j = i + 1; j < n; j++) { - zz[i][j] = 0.0; - zz[j][i] = 0.0; - } - zz[i][i] = 1.0; - } - if (n <= 2) - return; - for (m = n - 1; m >= 2; m--) { - for (i = m; i < n; i++) - zz[i][m - 1] = a[i][m - 2]; - i = ordr[m - 1]; - if (i != m) { - for (j = m - 1; j < n; j++) { - zz[m - 1][j] = zz[i - 1][j]; - zz[i - 1][j] = 0.0; - } - zz[i - 1][m - 1] = 1.0; - } - } -} - - -void mcdiv(double ar, double ai, double br, double bi, - double *cr, double *ci) -{ - double s, ars, ais, brs, bis; - - - s = fabs(br) + fabs(bi); - ars = ar / s; - ais = ai / s; - brs = br / s; - bis = bi / s; - s = brs * brs + bis * bis; - *cr = (ars * brs + ais * bis) / s; - *ci = (ais * brs - ars * bis) / s; -} - - -void hqr2(int n, int low, int hgh, dmatrix h, - dmatrix zz, dvector wr, dvector wi) -{ - int i, j, k, l=0, m, en, na, itn, its; - double p=0, q=0, r=0, s=0, t, w, x=0, y, ra, sa, vi, vr, z=0, norm, tst1, tst2; - int notlas; /* boolean */ - - - norm = 0.0; - k = 1; - /* store isolated roots and compute matrix norm */ - for (i = 0; i < n; i++) { - for (j = k - 1; j < n; j++) - norm += fabs(h[i][j]); - k = i + 1; - if (i + 1 < low || i + 1 > hgh) { - wr[i] = h[i][i]; - wi[i] = 0.0; - } - } - en = hgh; - t = 0.0; - itn = n * 30; - while (en >= low) { /* search for next eigenvalues */ - its = 0; - na = en - 1; - while (en >= 1) { - /* look for single small sub-diagonal element */ - for (l = en; l > low; l--) { - s = fabs(h[l - 2][l - 2]) + fabs(h[l - 1][l - 1]); - if (s == 0.0) - s = norm; - tst1 = s; - tst2 = tst1 + fabs(h[l - 1][l - 2]); - if (tst2 == tst1) - goto L100; - } - l = low; - L100: - x = h[en - 1][en - 1]; /* form shift */ - if (l == en || l == na) - break; - if (itn == 0) { - /* all eigenvalues have not converged */ - FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR B TO DEVELOPERS\n\n\n"); - exit(1); - } - y = h[na - 1][na - 1]; - w = h[en - 1][na - 1] * h[na - 1][en - 1]; - /* form exceptional shift */ - if (its == 10 || its == 20) { - t += x; - for (i = low - 1; i < en; i++) - h[i][i] -= x; - s = fabs(h[en - 1][na - 1]) + fabs(h[na - 1][en - 3]); - x = 0.75 * s; - y = x; - w = -0.4375 * s * s; - } - its++; - itn--; - /* look for two consecutive small sub-diagonal elements */ - for (m = en - 2; m >= l; m--) { - z = h[m - 1][m - 1]; - r = x - z; - s = y - z; - p = (r * s - w) / h[m][m - 1] + h[m - 1][m]; - q = h[m][m] - z - r - s; - r = h[m + 1][m]; - s = fabs(p) + fabs(q) + fabs(r); - p /= s; - q /= s; - r /= s; - if (m == l) - break; - tst1 = fabs(p) * - (fabs(h[m - 2][m - 2]) + fabs(z) + fabs(h[m][m])); - tst2 = tst1 + fabs(h[m - 1][m - 2]) * (fabs(q) + fabs(r)); - if (tst2 == tst1) - break; - } - for (i = m + 2; i <= en; i++) { - h[i - 1][i - 3] = 0.0; - if (i != m + 2) - h[i - 1][i - 4] = 0.0; - } - for (k = m; k <= na; k++) { - notlas = (k != na); - if (k != m) { - p = h[k - 1][k - 2]; - q = h[k][k - 2]; - r = 0.0; - if (notlas) - r = h[k + 1][k - 2]; - x = fabs(p) + fabs(q) + fabs(r); - if (x != 0.0) { - p /= x; - q /= x; - r /= x; - } - } - if (x != 0.0) { - if (p < 0.0) /* sign */ - s = - sqrt(p * p + q * q + r * r); - else - s = sqrt(p * p + q * q + r * r); - if (k != m) - h[k - 1][k - 2] = -s * x; - else { - if (l != m) - h[k - 1][k - 2] = -h[k - 1][k - 2]; - } - p += s; - x = p / s; - y = q / s; - z = r / s; - q /= p; - r /= p; - if (!notlas) { - for (j = k - 1; j < n; j++) { /* row modification */ - p = h[k - 1][j] + q * h[k][j]; - h[k - 1][j] -= p * x; - h[k][j] -= p * y; - } - j = (en < (k + 3)) ? en : (k + 3); /* min */ - for (i = 0; i < j; i++) { /* column modification */ - p = x * h[i][k - 1] + y * h[i][k]; - h[i][k - 1] -= p; - h[i][k] -= p * q; - } - /* accumulate transformations */ - for (i = low - 1; i < hgh; i++) { - p = x * zz[i][k - 1] + y * zz[i][k]; - zz[i][k - 1] -= p; - zz[i][k] -= p * q; - } - } else { - for (j = k - 1; j < n; j++) { /* row modification */ - p = h[k - 1][j] + q * h[k][j] + r * h[k + 1][j]; - h[k - 1][j] -= p * x; - h[k][j] -= p * y; - h[k + 1][j] -= p * z; - } - j = (en < (k + 3)) ? en : (k + 3); /* min */ - for (i = 0; i < j; i++) { /* column modification */ - p = x * h[i][k - 1] + y * h[i][k] + z * h[i][k + 1]; - h[i][k - 1] -= p; - h[i][k] -= p * q; - h[i][k + 1] -= p * r; - } - /* accumulate transformations */ - for (i = low - 1; i < hgh; i++) { - p = x * zz[i][k - 1] + y * zz[i][k] + - z * zz[i][k + 1]; - zz[i][k - 1] -= p; - zz[i][k] -= p * q; - zz[i][k + 1] -= p * r; - } - } - } - } /* for k */ - } /* while infinite loop */ - if (l == en) { /* one root found */ - h[en - 1][en - 1] = x + t; - wr[en - 1] = h[en - 1][en - 1]; - wi[en - 1] = 0.0; - en = na; - continue; - } - y = h[na - 1][na - 1]; - w = h[en - 1][na - 1] * h[na - 1][en - 1]; - p = (y - x) / 2.0; - q = p * p + w; - z = sqrt(fabs(q)); - h[en - 1][en - 1] = x + t; - x = h[en - 1][en - 1]; - h[na - 1][na - 1] = y + t; - if (q >= 0.0) { /* real pair */ - if (p < 0.0) /* sign */ - z = p - fabs(z); - else - z = p + fabs(z); - wr[na - 1] = x + z; - wr[en - 1] = wr[na - 1]; - if (z != 0.0) - wr[en - 1] = x - w / z; - wi[na - 1] = 0.0; - wi[en - 1] = 0.0; - x = h[en - 1][na - 1]; - s = fabs(x) + fabs(z); - p = x / s; - q = z / s; - r = sqrt(p * p + q * q); - p /= r; - q /= r; - for (j = na - 1; j < n; j++) { /* row modification */ - z = h[na - 1][j]; - h[na - 1][j] = q * z + p * h[en - 1][j]; - h[en - 1][j] = q * h[en - 1][j] - p * z; - } - for (i = 0; i < en; i++) { /* column modification */ - z = h[i][na - 1]; - h[i][na - 1] = q * z + p * h[i][en - 1]; - h[i][en - 1] = q * h[i][en - 1] - p * z; - } - /* accumulate transformations */ - for (i = low - 1; i < hgh; i++) { - z = zz[i][na - 1]; - zz[i][na - 1] = q * z + p * zz[i][en - 1]; - zz[i][en - 1] = q * zz[i][en - 1] - p * z; - } - } else { /* complex pair */ - wr[na - 1] = x + p; - wr[en - 1] = x + p; - wi[na - 1] = z; - wi[en - 1] = -z; - } - en -= 2; - } /* while en >= low */ - /* backsubstitute to find vectors of upper triangular form */ - if (norm != 0.0) { - for (en = n; en >= 1; en--) { - p = wr[en - 1]; - q = wi[en - 1]; - na = en - 1; - if (q == 0.0) {/* real vector */ - m = en; - h[en - 1][en - 1] = 1.0; - if (na != 0) { - for (i = en - 2; i >= 0; i--) { - w = h[i][i] - p; - r = 0.0; - for (j = m - 1; j < en; j++) - r += h[i][j] * h[j][en - 1]; - if (wi[i] < 0.0) { - z = w; - s = r; - } else { - m = i + 1; - if (wi[i] == 0.0) { - t = w; - if (t == 0.0) { - tst1 = norm; - t = tst1; - do { - t = 0.01 * t; - tst2 = norm + t; - } while (tst2 > tst1); - } - h[i][en - 1] = -(r / t); - } else { /* solve real equations */ - x = h[i][i + 1]; - y = h[i + 1][i]; - q = (wr[i] - p) * (wr[i] - p) + wi[i] * wi[i]; - t = (x * s - z * r) / q; - h[i][en - 1] = t; - if (fabs(x) > fabs(z)) - h[i + 1][en - 1] = (-r - w * t) / x; - else - h[i + 1][en - 1] = (-s - y * t) / z; - } - /* overflow control */ - t = fabs(h[i][en - 1]); - if (t != 0.0) { - tst1 = t; - tst2 = tst1 + 1.0 / tst1; - if (tst2 <= tst1) { - for (j = i; j < en; j++) - h[j][en - 1] /= t; - } - } - } - } - } - } else if (q > 0.0) { - m = na; - if (fabs(h[en - 1][na - 1]) > fabs(h[na - 1][en - 1])) { - h[na - 1][na - 1] = q / h[en - 1][na - 1]; - h[na - 1][en - 1] = (p - h[en - 1][en - 1]) / - h[en - 1][na - 1]; - } else - mcdiv(0.0, -h[na - 1][en - 1], h[na - 1][na - 1] - p, q, - &h[na - 1][na - 1], &h[na - 1][en - 1]); - h[en - 1][na - 1] = 0.0; - h[en - 1][en - 1] = 1.0; - if (en != 2) { - for (i = en - 3; i >= 0; i--) { - w = h[i][i] - p; - ra = 0.0; - sa = 0.0; - for (j = m - 1; j < en; j++) { - ra += h[i][j] * h[j][na - 1]; - sa += h[i][j] * h[j][en - 1]; - } - if (wi[i] < 0.0) { - z = w; - r = ra; - s = sa; - } else { - m = i + 1; - if (wi[i] == 0.0) - mcdiv(-ra, -sa, w, q, &h[i][na - 1], - &h[i][en - 1]); - else { /* solve complex equations */ - x = h[i][i + 1]; - y = h[i + 1][i]; - vr = (wr[i] - p) * (wr[i] - p); - vr = vr + wi[i] * wi[i] - q * q; - vi = (wr[i] - p) * 2.0 * q; - if (vr == 0.0 && vi == 0.0) { - tst1 = norm * (fabs(w) + fabs(q) + fabs(x) + - fabs(y) + fabs(z)); - vr = tst1; - do { - vr = 0.01 * vr; - tst2 = tst1 + vr; - } while (tst2 > tst1); - } - mcdiv(x * r - z * ra + q * sa, - x * s - z * sa - q * ra, vr, vi, - &h[i][na - 1], &h[i][en - 1]); - if (fabs(x) > fabs(z) + fabs(q)) { - h[i + 1] - [na - 1] = (q * h[i][en - 1] - - w * h[i][na - 1] - ra) / x; - h[i + 1][en - 1] = (-sa - w * h[i][en - 1] - - q * h[i][na - 1]) / x; - } else - mcdiv(-r - y * h[i][na - 1], - -s - y * h[i][en - 1], z, q, - &h[i + 1][na - 1], &h[i + 1][en - 1]); - } - /* overflow control */ - t = (fabs(h[i][na - 1]) > fabs(h[i][en - 1])) ? - fabs(h[i][na - 1]) : fabs(h[i][en - 1]); - if (t != 0.0) { - tst1 = t; - tst2 = tst1 + 1.0 / tst1; - if (tst2 <= tst1) { - for (j = i; j < en; j++) { - h[j][na - 1] /= t; - h[j][en - 1] /= t; - } - } - } - } - } - } - } - } - /* end back substitution. vectors of isolated roots */ - for (i = 0; i < n; i++) { - if (i + 1 < low || i + 1 > hgh) { - for (j = i; j < n; j++) - zz[i][j] = h[i][j]; - } - } - /* multiply by transformation matrix to give vectors of - * original full matrix. */ - for (j = n - 1; j >= low - 1; j--) { - m = ((j + 1) < hgh) ? (j + 1) : hgh; /* min */ - for (i = low - 1; i < hgh; i++) { - z = 0.0; - for (k = low - 1; k < m; k++) - z += zz[i][k] * h[k][j]; - zz[i][j] = z; - } - } - } - return; -} - - -/* make rate matrix with 0.01 expected substitutions per unit time */ -void onepamratematrix(dmatrix a) -{ - int i, j; - double delta, temp, sum; - dvector m; - - for (i = 0; i < tpmradix; i++) - { - for (j = 0; j < tpmradix; j++) - { - a[i][j] = Freqtpm[j]*a[i][j]; - } - } - - m = new_dvector(tpmradix); - for (i = 0, sum = 0.0; i < tpmradix; i++) - { - for (j = 0, temp = 0.0; j < tpmradix; j++) - temp += a[i][j]; - m[i] = temp; /* row sum */ - sum += temp*Freqtpm[i]; /* exp. rate */ - } - delta = 0.01 / sum; /* 0.01 subst. per unit time */ - for (i = 0; i < tpmradix; i++) { - for (j = 0; j < tpmradix; j++) { - if (i != j) - a[i][j] = delta * a[i][j]; - else - a[i][j] = delta * (-m[i]); - } - } - free_dvector(m); -} - - -void eigensystem(dvector eval, dmatrix evec) -{ - dvector evali, forg; - dmatrix a, b; - ivector ordr; - int i, j, k, error; - double zero; - - - ordr = new_ivector(tpmradix); - evali = new_dvector(tpmradix); - forg = new_dvector(tpmradix); - a = new_dmatrix(tpmradix,tpmradix); - b = new_dmatrix(tpmradix,tpmradix); - - rtfdata(a, forg); /* get relative transition matrix and frequencies */ - - onepamratematrix(a); /* make 1 PAM rate matrix */ - - /* copy a to b */ - for (i = 0; i < tpmradix; i++) - for (j = 0; j < tpmradix; j++) - b[i][j] = a[i][j]; - - elmhes(a, ordr, tpmradix); /* compute eigenvalues and eigenvectors */ - eltran(a, evec, ordr, tpmradix); - hqr2(tpmradix, 1, tpmradix, a, evec, eval, evali); - - /* check eigenvalue equation */ - error = FALSE; - for (j = 0; j < tpmradix; j++) { - for (i = 0, zero = 0.0; i < tpmradix; i++) { - for (k = 0; k < tpmradix; k++) zero += b[i][k] * evec[k][j]; - zero -= eval[j] * evec[i][j]; - if (fabs(zero) > 1.0e-5) - error = TRUE; - } - } - if (error) - FPRINTF(STDOUTFILE "\nWARNING: Eigensystem doesn't satisfy eigenvalue equation!\n"); - - free_ivector(ordr); - free_dvector(evali); - free_dvector(forg); - free_dmatrix(a); - free_dmatrix(b); -} - - -void luinverse(dmatrix inmat, dmatrix imtrx, int size) -{ - double eps = 1.0e-20; /* ! */ - int i, j, k, l, maxi=0, idx, ix, jx; - double sum, tmp, maxb, aw; - ivector index; - double *wk; - dmatrix omtrx; - - - index = new_ivector(tpmradix); - omtrx = new_dmatrix(tpmradix,tpmradix); - - /* copy inmat to omtrx */ - for (i = 0; i < tpmradix; i++) - for (j = 0; j < tpmradix; j++) - omtrx[i][j] = inmat[i][j]; - - wk = (double *) malloc((unsigned)size * sizeof(double)); - aw = 1.0; - for (i = 0; i < size; i++) { - maxb = 0.0; - for (j = 0; j < size; j++) { - if (fabs(omtrx[i][j]) > maxb) - maxb = fabs(omtrx[i][j]); - } - if (maxb == 0.0) { - /* Singular matrix */ - FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR C TO DEVELOPERS\n\n\n"); - exit(1); - } - wk[i] = 1.0 / maxb; - } - for (j = 0; j < size; j++) { - for (i = 0; i < j; i++) { - sum = omtrx[i][j]; - for (k = 0; k < i; k++) - sum -= omtrx[i][k] * omtrx[k][j]; - omtrx[i][j] = sum; - } - maxb = 0.0; - for (i = j; i < size; i++) { - sum = omtrx[i][j]; - for (k = 0; k < j; k++) - sum -= omtrx[i][k] * omtrx[k][j]; - omtrx[i][j] = sum; - tmp = wk[i] * fabs(sum); - if (tmp >= maxb) { - maxb = tmp; - maxi = i; - } - } - if (j != maxi) { - for (k = 0; k < size; k++) { - tmp = omtrx[maxi][k]; - omtrx[maxi][k] = omtrx[j][k]; - omtrx[j][k] = tmp; - } - aw = -aw; - wk[maxi] = wk[j]; - } - index[j] = maxi; - if (omtrx[j][j] == 0.0) - omtrx[j][j] = eps; - if (j != size - 1) { - tmp = 1.0 / omtrx[j][j]; - for (i = j + 1; i < size; i++) - omtrx[i][j] *= tmp; - } - } - for (jx = 0; jx < size; jx++) { - for (ix = 0; ix < size; ix++) - wk[ix] = 0.0; - wk[jx] = 1.0; - l = -1; - for (i = 0; i < size; i++) { - idx = index[i]; - sum = wk[idx]; - wk[idx] = wk[i]; - if (l != -1) { - for (j = l; j < i; j++) - sum -= omtrx[i][j] * wk[j]; - } else if (sum != 0.0) - l = i; - wk[i] = sum; - } - for (i = size - 1; i >= 0; i--) { - sum = wk[i]; - for (j = i + 1; j < size; j++) - sum -= omtrx[i][j] * wk[j]; - wk[i] = sum / omtrx[i][i]; - } - for (ix = 0; ix < size; ix++) - imtrx[ix][jx] = wk[ix]; - } - free((char *)wk); - wk = NULL; - free_ivector(index); - free_dmatrix(omtrx); -} - - -void checkevector(dmatrix evec, dmatrix ivec, int nn) -{ - int i, j, ia, ib, ic, error; - dmatrix matx; - double sum; - - - matx = new_dmatrix(nn, nn); - /* multiply matrix of eigenvectors and its inverse */ - for (ia = 0; ia < nn; ia++) { - for (ic = 0; ic < nn; ic++) { - sum = 0.0; - for (ib = 0; ib < nn; ib++) sum += evec[ia][ib] * ivec[ib][ic]; - matx[ia][ic] = sum; - } - } - /* check whether the unitary matrix is obtained */ - error = FALSE; - for (i = 0; i < nn; i++) { - for (j = 0; j < nn; j++) { - if (i == j) { - if (fabs(matx[i][j] - 1.0) > 1.0e-5) - error = TRUE; - } else { - if (fabs(matx[i][j]) > 1.0e-5) - error = TRUE; - } - } - } - if (error) { - FPRINTF(STDOUTFILE "\nWARNING: Inversion of eigenvector matrix not perfect!\n"); - } - free_dmatrix(matx); -} - - -/***************************** exported functions *****************************/ - - -/* compute 1 PAM rate matrix, its eigensystem, and the inverse matrix thereof */ -void tranprobmat() -{ - eigensystem(Eval, Evec); /* eigensystem of 1 PAM rate matrix */ - luinverse(Evec, Ievc, tpmradix); /* inverse eigenvectors are in Ievc */ - checkevector(Evec, Ievc, tpmradix); /* check whether inversion was OK */ -} - - -/* compute P(t) */ -void tprobmtrx(double arc, dmatrix tpr) -{ - register int i, j, k; - register double temp; - - - for (k = 0; k < tpmradix; k++) { - temp = exp(arc * Eval[k]); - for (j = 0; j < tpmradix; j++) - iexp[k][j] = Ievc[k][j] * temp; - } - for (i = 0; i < tpmradix; i++) { - for (j = 0; j < tpmradix; j++) { - temp = 0.0; - for (k = 0; k < tpmradix; k++) - temp += Evec[i][k] * iexp[k][j]; - tpr[i][j] = fabs(temp); - } - } -} - - -/******************************************************************************/ -/* estimation of maximum likelihood distances */ -/******************************************************************************/ - -/* compute total log-likelihood - input: likelihoods for each site and non-zero rate - output: total log-likelihood (incl. zero rate category) */ -double comptotloglkl(dmatrix cdl) -{ - int k, r; - double loglkl, fv, fv2, sitelkl; - - loglkl = 0.0; - fv = 1.0-fracinv; - fv2 = (1.0-fracinv)/(double) numcats; - - if (numcats == 1) { - - for (k = 0; k < Numptrn; k++) { - - /* compute likelihood for pattern k */ - sitelkl = cdl[0][k]*fv; - if (constpat[k] == TRUE) - sitelkl += fracinv*Freqtpm[(int) Seqpat[0][k]]; - - /* total log-likelihood */ - loglkl += log(sitelkl)*Weight[k]; - - } - - } else { - - for (k = 0; k < Numptrn; k++) { - - /* this general routine works always but it's better - to run it only when it's really necessary */ - - /* compute likelihood for pattern k */ - sitelkl = 0.0; - for (r = 0; r < numcats; r++) - sitelkl += cdl[r][k]; - sitelkl = fv2*sitelkl; - if (constpat[k] == TRUE) - sitelkl += fracinv*Freqtpm[(int) Seqpat[0][k]]; - - /* total log-likelihood */ - loglkl += log(sitelkl)*Weight[k]; - - } - - } - - return loglkl; -} - - -/* computes the site log-likelihoods - input: likelihoods for each site and non-zero rate - output: log-likelihood for each site */ -void allsitelkl(dmatrix cdl, dvector aslkl) -{ - int k, r; - double fv, fv2, sitelkl; - - fv = 1.0-fracinv; - fv2 = (1.0-fracinv)/(double) numcats; - - if (numcats == 1) { - - for (k = 0; k < Numptrn; k++) { - - /* compute likelihood for pattern k */ - sitelkl = cdl[0][k]*fv; - if (constpat[k] == TRUE) - sitelkl += fracinv*Freqtpm[(int) Seqpat[0][k]]; - - /* site log-likelihood */ - aslkl[k] = log(sitelkl); - } - - } else { - - for (k = 0; k < Numptrn; k++) { - - /* this general routine works always but it's better - to run it only when it's really necessary */ - - /* compute likelihood for pattern k */ - sitelkl = 0.0; - for (r = 0; r < numcats; r++) - sitelkl += cdl[r][k]; - sitelkl = fv2*sitelkl; - if (constpat[k] == TRUE) - sitelkl += fracinv*Freqtpm[(int) Seqpat[0][k]]; - - /* total log-likelihood */ - aslkl[k] = log(sitelkl); - - } - } -} - - -/***************************** internal functions *****************************/ - -/* compute negative log-likelihood of distance arc between sequences seqchi/j */ -double pairlkl(double arc) -{ - int k, r, ci, cj; - double loglkl, fv, sitelkl; - - - /* compute tpms */ - for (r = 0; r < numcats; r++) - /* compute tpm for rate category r */ - tprobmtrx(arc*Rates[r], ltprobr[r]); - - loglkl = 0.0; - fv = 1.0-fracinv; - - if (numcats == 1) { - - for (k = 0; k < Numptrn; k++) { - - /* compute likelihood for site k */ - ci = seqchi[k]; - cj = seqchj[k]; - if (ci != tpmradix && cj != tpmradix) - sitelkl = ltprobr[0][ci][cj]*fv; - else - sitelkl = fv; - if (ci == cj && ci != tpmradix) - sitelkl += fracinv*Freqtpm[ci]; - - /* total log-likelihood */ - loglkl += log(sitelkl)*Weight[k]; - - } - - } else { - - for (k = 0; k < Numptrn; k++) { - - /* this general routine works always but it's better - to run it only when it's really necessary */ - - /* compute likelihood for site k */ - ci = seqchi[k]; - cj = seqchj[k]; - if (ci != tpmradix && cj != tpmradix) { - sitelkl = 0.0; - for (r = 0; r < numcats; r++) - sitelkl += ltprobr[r][ci][cj]; - sitelkl = fv*sitelkl/(double) numcats; - } else - sitelkl = fv; - if (ci == cj && ci != tpmradix) - sitelkl += fracinv*Freqtpm[ci]; - - /* total log-likelihood */ - loglkl += log(sitelkl)*Weight[k]; - - } - - } - - /* return negative log-likelihood as we use a minimizing procedure */ - return -loglkl; -} - - -/***************************** exported functions *****************************/ - - - -/******************************************************************************/ - -/* maximum likelihood distance between sequence i and j */ -/* CZ changed 05/16/01 */ -double mldistance( int i ) { - double dist, fx, f2x; - - /* use old distance as start value */ - dist = Distanmat[ i ]; - - if ( dist == 0.0 ) { - return 0.0; - } - - seqchi = Seqpat[ Maxspc - 1 ]; - seqchj = Seqpat[ i ]; - - if (dist <= MINARC) dist = MINARC+1.0; - if (dist >= MAXARC) dist = MAXARC-1.0; - - dist = onedimenmin(MINARC, dist, MAXARC, pairlkl, EPSILON, &fx, &f2x); - - return dist; -} - - - -/* initialize distance matrix */ -/* CZ changed 05/16/01 */ -void initdistan() { - int i, k, diff, x, y; - double obs, temp; - - for (i = 0; i < Maxspc - 1 ; i++) { - - seqchi = Seqpat[i]; - seqchj = Seqpat[Maxspc - 1]; - - /* count observed differences */ - diff = 0; - for (k = 0; k < Numptrn; k++) { - x = seqchi[k]; - y = seqchj[k]; - if (x != y && - x != tpmradix && - y != tpmradix) - diff += Weight[k]; - } - if (diff == 0) - Distanmat[i] = 0.0; - else { - /* use generalized JC correction to get first estimate - (for the SH model the observed distance is used) */ - /* observed distance */ - obs = (double) diff / (double) Maxsite; - temp = 1.0 - (double) obs*tpmradix/(tpmradix-1.0); - if (temp > 0.0 && !(data_optn == 0 && SH_optn)) - /* use JC corrected distance */ - Distanmat[i] = -100.0*(tpmradix-1.0)/tpmradix * log(temp); - else - /* use observed distance */ - Distanmat[i] = obs * 100.0; - if (Distanmat[i] < MINARC) Distanmat[i] = MINARC; - if (Distanmat[i] > MAXARC) Distanmat[i] = MAXARC; - } - } - -} - - - - -/* compute distance matrix */ -/* CZ changed 05/16/01 */ -void computedistan() { - int i; - - for ( i = 0; i < Maxspc - 1; i++ ) { - Distanmat[ i ] = mldistance( i ); - } -} - - -/******************************************************************************/ - - - - - -/******************************************************************************/ -/* computation of maximum likelihood edge lengths for a given tree */ -/******************************************************************************/ - - -/***************************** internal functions *****************************/ - - -/* multiply partial likelihoods */ -void productpartials(Node *op) -{ - Node *cp; - int i, j, r; - dcube opc, cpc; - - cp = op; - opc = op->partials; - while (cp->isop->isop != op) { - cp = cp->isop; - cpc = cp->partials; - for (r = 0; r < numcats; r++) - for (i = 0; i < Numptrn; i++) - for (j = 0; j < tpmradix; j++) - opc[r][i][j] *= cpc[r][i][j]; - } -} - - -/* compute internal partial likelihoods */ -void partialsinternal(Node *op) -{ - int i, j, k, r; - double sum; - dcube oprob, cprob; - - if (clockmode == 1) { /* clocklike branch lengths */ - for (r = 0; r < numcats; r++) { - tprobmtrx((op->lengthc)*Rates[r], ltprobr[r]); - } - } else { /* non-clocklike branch lengths */ - for (r = 0; r < numcats; r++) { - tprobmtrx((op->length)*Rates[r], ltprobr[r]); - } - } - - oprob = op->partials; - cprob = op->kinp->isop->partials; - for (r = 0; r < numcats; r++) { - for (k = 0; k < Numptrn; k++) { - for (i = 0; i < tpmradix; i++) { - sum = 0.0; - for (j = 0; j < tpmradix; j++) - sum += ltprobr[r][i][j] * cprob[r][k][j]; - oprob[r][k][i] = sum; - } - } - } -} - - -/* compute external partial likelihoods */ -void partialsexternal(Node *op) -{ - int i, j, k, r; - dcube oprob; - cvector dseqi; - - if (clockmode == 1) { /* clocklike branch lengths */ - for (r = 0; r < numcats; r++) { - tprobmtrx((op->lengthc)*Rates[r], ltprobr[r]); - } - } else { /* nonclocklike branch lengths */ - for (r = 0; r < numcats; r++) { - tprobmtrx((op->length)*Rates[r], ltprobr[r]); - } - } - - oprob = op->partials; - dseqi = op->kinp->eprob; - for (r = 0; r < numcats; r++) { - for (k = 0; k < Numptrn; k++) { - if ((j = dseqi[k]) == tpmradix) { - for (i = 0; i < tpmradix; i++) - oprob[r][k][i] = 1.0; - } else { - for (i = 0; i < tpmradix; i++) - oprob[r][k][i] = ltprobr[r][i][j]; - } - } - } -} - - -/* compute all partial likelihoods */ -void initpartials(Tree *tr) -{ - Node *cp, *rp; - - cp = rp = tr->rootp; - do { - cp = cp->isop->kinp; - if (cp->isop == NULL) { /* external node */ - cp = cp->kinp; /* not descen */ - partialsexternal(cp); - } else { /* internal node */ - if (!cp->descen) { - productpartials(cp->kinp->isop); - partialsinternal(cp); - } - } - } while (cp != rp); -} - - -/* compute log-likelihood given internal branch with length arc - between partials partiali and partials partialj */ -double intlkl(double arc) -{ - double sumlk, slk; - int r, s, i, j; - dmatrix cdl; - - cdl = Ctree->condlkl; - for (r = 0; r < numcats; r++) { - tprobmtrx(arc*Rates[r], ltprobr[r]); - } - for (r = 0; r < numcats; r++) { - for (s = 0; s < Numptrn; s++) { - sumlk = 0.0; - for (i = 0; i < tpmradix; i++) { - slk = 0.0; - for (j = 0; j < tpmradix; j++) - slk += partialj[r][s][j] * ltprobr[r][i][j]; - sumlk += Freqtpm[i] * partiali[r][s][i] * slk; - } - cdl[r][s] = sumlk; - } - } - - /* compute total log-likelihood for current tree */ - Ctree->lklhd = comptotloglkl(cdl); - - return -(Ctree->lklhd); /* we use a minimizing procedure */ -} - - -/* optimize internal branch */ -void optinternalbranch(Node *op) -{ - double arc, fx, f2x; - - partiali = op->isop->partials; - partialj = op->kinp->isop->partials; - arc = op->length; /* nonclocklike branch lengths */ - if (arc <= MINARC) arc = MINARC+1.0; - if (arc >= MAXARC) arc = MAXARC-1.0; - arc = onedimenmin(MINARC, arc, MAXARC, intlkl, EPSILON, &fx, &f2x); - op->kinp->length = arc; - op->length = arc; - - /* variance of branch length */ - f2x = fabs(f2x); - if (1.0/(MAXARC*MAXARC) < f2x) - op->varlen = 1.0/f2x; - else - op->varlen = MAXARC*MAXARC; -} - - -/* compute log-likelihood given external branch with length arc - between partials partiali and sequence seqchi */ -double extlkl(double arc) -{ - double sumlk; - int r, s, i, j; - dvector opb; - dmatrix cdl; - - cdl = Ctree->condlkl; - for (r = 0; r < numcats; r++) { - tprobmtrx(arc*Rates[r], ltprobr[r]); - } - for (r = 0; r < numcats; r++) { - for (s = 0; s < Numptrn; s++) { - opb = partiali[r][s]; - sumlk = 0.0; - if ((j = seqchi[s]) != tpmradix) { - for (i = 0; i < tpmradix; i++) - sumlk += (Freqtpm[i] * (opb[i] * ltprobr[r][i][j])); - } else { - for (i = 0; i < tpmradix; i++) - sumlk += Freqtpm[i] * opb[i]; - } - cdl[r][s] = sumlk; - } - } - - /* compute total log-likelihood for current tree */ - Ctree->lklhd = comptotloglkl(cdl); - - return -(Ctree->lklhd); /* we use a minimizing procedure */ -} - -/* optimize external branch */ -void optexternalbranch(Node *op) -{ - double arc, fx, f2x; - - partiali = op->isop->partials; - seqchi = op->kinp->eprob; - arc = op->length; /* nonclocklike branch lengths */ - if (arc <= MINARC) arc = MINARC+1.0; - if (arc >= MAXARC) arc = MAXARC-1.0; - arc = onedimenmin(MINARC, arc, MAXARC, extlkl, EPSILON, &fx, &f2x); - op->kinp->length = arc; - op->length = arc; - - /* variance of branch length */ - f2x = fabs(f2x); - if (1.0/(MAXARC*MAXARC) < f2x) - op->varlen = 1.0/f2x; - else - op->varlen = MAXARC*MAXARC; -} - - -/* finish likelihoods for each rate and site */ -void finishlkl(Node *op) -{ - int r, k, i, j; - double arc, sumlk, slk; - dmatrix cdl; - - partiali = op->isop->partials; - partialj = op->kinp->isop->partials; - cdl = Ctree->condlkl; - arc = op->length; /* nonclocklike branch lengths */ - for (r = 0; r < numcats; r++) { - tprobmtrx(arc*Rates[r], ltprobr[r]); - } - for (r = 0; r < numcats; r++) { - for (k = 0; k < Numptrn; k++) { - sumlk = 0.0; - for (i = 0; i < tpmradix; i++) { - slk = 0.0; - for (j = 0; j < tpmradix; j++) - slk += partialj[r][k][j] * ltprobr[r][i][j]; - sumlk += Freqtpm[i] * partiali[r][k][i] * slk; - } - cdl[r][k] = sumlk; - } - } -} - - -/***************************** exported functions *****************************/ - - -/* optimize branch lengths to get maximum likelihood (nonclocklike branchs) */ -double optlkl(Tree *tr) -{ - Node *cp, *rp; - int nconv; - double lendiff; - - clockmode = 0; /* nonclocklike branch lengths */ - nconv = 0; - Converg = FALSE; - initpartials(tr); - for (Numit = 1; (Numit <= MAXIT) && (!Converg); Numit++) { - - cp = rp = tr->rootp; - do { - cp = cp->isop->kinp; - productpartials(cp->kinp->isop); - if (cp->isop == NULL) { /* external node */ - cp = cp->kinp; /* not descen */ - - lendiff = cp->length; - optexternalbranch(cp); - lendiff = fabs(lendiff - cp->length); - if (lendiff < EPSILON) nconv++; - else nconv = 0; - - partialsexternal(cp); - } else { /* internal node */ - if (cp->descen) { - partialsinternal(cp); - } else { - - lendiff = cp->length; - optinternalbranch(cp); - lendiff = fabs(lendiff - cp->length); - if (lendiff < EPSILON) nconv++; - else nconv = 0; - - /* eventually compute likelihoods for each site */ - if ((cp->number == Numibrnch-1 && lendiff < EPSILON) || - Numit == MAXIT-1) finishlkl(cp); - - partialsinternal(cp); - } - } - if (nconv >= Numbrnch) { /* convergence */ - Converg = TRUE; - cp = rp; /* get out of here */ - } - } while (cp != rp); - } - - /* compute total log-likelihood for current tree */ - return comptotloglkl(tr->condlkl); -} - - -/* compute likelihood of tree for given branch lengths */ -double treelkl(Tree *tr) -{ - int i, k, r; - Node *cp; - dmatrix cdl; - dcube prob1, prob2; - double sumlk; - - /* compute for each site and rate log-likelihoods */ - initpartials(tr); - cp = tr->rootp; - productpartials(cp->isop); - prob1 = cp->partials; - prob2 = cp->isop->partials; - cdl = tr->condlkl; - for (r = 0; r < numcats; r++) { - for (k = 0; k < Numptrn; k++) { - sumlk = 0.0; - for (i = 0; i < tpmradix; i++) - sumlk += Freqtpm[i] * (prob1[r][k][i] * prob2[r][k][i]); - cdl[r][k] = sumlk; - } - } - - /* return total log-likelihood for current tree */ - return comptotloglkl(cdl); -} - - -/******************************************************************************/ -/* least-squares estimate of branch lengths */ -/******************************************************************************/ - - -/***************************** internal functions *****************************/ - - -void luequation(dmatrix amat, dvector yvec, int size) -{ - double eps = 1.0e-20; /* ! */ - int i, j, k, l, maxi=0, idx; - double sum, tmp, maxb, aw; - dvector wk; - ivector index; - - - wk = new_dvector(size); - index = new_ivector(size); - aw = 1.0; - for (i = 0; i < size; i++) { - maxb = 0.0; - for (j = 0; j < size; j++) { - if (fabs(amat[i][j]) > maxb) - maxb = fabs(amat[i][j]); - } - if (maxb == 0.0) { - /* Singular matrix */ - FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR D TO DEVELOPERS\n\n\n"); - exit(1); - } - wk[i] = 1.0 / maxb; - } - for (j = 0; j < size; j++) { - for (i = 0; i < j; i++) { - sum = amat[i][j]; - for (k = 0; k < i; k++) - sum -= amat[i][k] * amat[k][j]; - amat[i][j] = sum; - } - maxb = 0.0; - for (i = j; i < size; i++) { - sum = amat[i][j]; - for (k = 0; k < j; k++) - sum -= amat[i][k] * amat[k][j]; - amat[i][j] = sum; - tmp = wk[i] * fabs(sum); - if (tmp >= maxb) { - maxb = tmp; - maxi = i; - } - } - if (j != maxi) { - for (k = 0; k < size; k++) { - tmp = amat[maxi][k]; - amat[maxi][k] = amat[j][k]; - amat[j][k] = tmp; - } - aw = -aw; - wk[maxi] = wk[j]; - } - index[j] = maxi; - if (amat[j][j] == 0.0) - amat[j][j] = eps; - if (j != size - 1) { - tmp = 1.0 / amat[j][j]; - for (i = j + 1; i < size; i++) - amat[i][j] *= tmp; - } - } - l = -1; - for (i = 0; i < size; i++) { - idx = index[i]; - sum = yvec[idx]; - yvec[idx] = yvec[i]; - if (l != -1) { - for (j = l; j < i; j++) - sum -= amat[i][j] * yvec[j]; - } else if (sum != 0.0) - l = i; - yvec[i] = sum; - } - for (i = size - 1; i >= 0; i--) { - sum = yvec[i]; - for (j = i + 1; j < size; j++) - sum -= amat[i][j] * yvec[j]; - yvec[i] = sum / amat[i][i]; - } - free_ivector(index); - free_dvector(wk); -} - - -/* least square estimation of branch lengths - used for the approximate ML and as starting point - in the calculation of the exact value of the ML */ -void lslength(Tree *tr, dvector distanvec, int numspc, int numibrnch, dvector Brnlength) -{ - int i, i1, j, j1, j2, k, numbrnch, numpair; - double sum, leng, alllen, rss; - ivector pths; - dmatrix atmt, atamt; - Node **ebp, **ibp; - - numbrnch = numspc + numibrnch; - numpair = (numspc * (numspc - 1)) / 2; - atmt = new_dmatrix(numbrnch, numpair); - atamt = new_dmatrix(numbrnch, numbrnch); - ebp = tr->ebrnchp; - ibp = tr->ibrnchp; - for (i = 0; i < numspc; i++) { - for (j1 = 1, j = 0; j1 < numspc; j1++) { - if (j1 == i) { - for (j2 = 0; j2 < j1; j2++, j++) { - atmt[i][j] = 1.0; - } - } else { - for (j2 = 0; j2 < j1; j2++, j++) { - if (j2 == i) - atmt[i][j] = 1.0; - else - atmt[i][j] = 0.0; - } - } - } - } - for (i1 = 0, i = numspc; i1 < numibrnch; i1++, i++) { - pths = ibp[i1]->paths; - for (j1 = 1, j = 0; j1 < numspc; j1++) { - for (j2 = 0; j2 < j1; j2++, j++) { - if (pths[j1] != pths[j2]) - atmt[i][j] = 1.0; - else - atmt[i][j] = 0.0; - } - } - } - for (i = 0; i < numbrnch; i++) { - for (j = 0; j <= i; j++) { - for (k = 0, sum = 0.0; k < numpair; k++) - sum += atmt[i][k] * atmt[j][k]; - atamt[i][j] = sum; - atamt[j][i] = sum; - } - } - for (i = 0; i < numbrnch; i++) { - for (k = 0, sum = 0.0; k < numpair; k++) - sum += atmt[i][k] * distanvec[k]; - Brnlength[i] = sum; - } - luequation(atamt, Brnlength, numbrnch); - for (i = 0, rss = 0.0; i < numpair; i++) { - sum = distanvec[i]; - for (j = 0; j < numbrnch; j++) { - if (atmt[j][i] == 1.0 && Brnlength[j] > 0.0) - sum -= Brnlength[j]; - } - rss += sum * sum; - } - tr->rssleast = sqrt(rss); - alllen = 0.0; - for (i = 0; i < numspc; i++) { - leng = Brnlength[i]; - alllen += leng; - if (leng < MINARC) leng = MINARC; - if (leng > MAXARC) leng = MAXARC; - if (clockmode) { /* clock */ - ebp[i]->lengthc = leng; - ebp[i]->kinp->lengthc = leng; - } else { /* no clock */ - ebp[i]->length = leng; - ebp[i]->kinp->length = leng; - } - Brnlength[i] = leng; - } - for (i = 0, j = numspc; i < numibrnch; i++, j++) { - leng = Brnlength[j]; - alllen += leng; - if (leng < MINARC) leng = MINARC; - if (leng > MAXARC) leng = MAXARC; - if (clockmode) { /* clock */ - ibp[i]->lengthc = leng; - ibp[i]->kinp->lengthc = leng; - } else { /* no clock */ - ibp[i]->length = leng; - ibp[i]->kinp->length = leng; - } - Brnlength[j] = leng; - } - free_dmatrix(atmt); - free_dmatrix(atamt); -} diff --git a/forester/archive/RIO/others/puzzle_dqo/src/ml2.c b/forester/archive/RIO/others/puzzle_dqo/src/ml2.c deleted file mode 100644 index 7e1b3db..0000000 --- a/forester/archive/RIO/others/puzzle_dqo/src/ml2.c +++ /dev/null @@ -1,1637 +0,0 @@ -/* - * ml2.c - * - * - * Part of TREE-PUZZLE 5.0 (June 2000) - * - * (c) 1999-2000 by Heiko A. Schmidt, Korbinian Strimmer, - * M. Vingron, and Arndt von Haeseler - * (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler - * - * All parts of the source except where indicated are distributed under - * the GNU public licence. See http://www.opensource.org for details. - */ - - -#define EXTERN extern - -/* prototypes */ -#include -#include -#include -#include -#include -#include "util.h" -#include "ml.h" - -#define STDOUT stdout -#ifndef PARALLEL /* because printf() runs significantly faster */ - /* than fprintf(stdout) on an Apple McIntosh */ - /* (HS) */ -# define FPRINTF printf -# define STDOUTFILE -#else -# define FPRINTF fprintf -# define STDOUTFILE STDOUT, -#endif - -/* prototypes for two functions of puzzle2.c */ -void fputid10(FILE *, int); -int fputid(FILE *, int); - - -/******************************************************************************/ -/* user tree input */ -/******************************************************************************/ - -/* read user tree, drop all blanks, tabs, and newlines. - Drop edgelengths (after :) but keep internal - labels. Check whether all pairs of brackets match. */ -void getusertree(FILE *itfp, cvector tr, int maxlen) -{ - int n, brac, ci; - int comment = 0; - - /* look for opening bracket */ - n = 0; - brac = 0; - do { - ci = fgetc(itfp); - if (ci == EOF) { - FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (missing start bracket in tree)\n\n\n"); - exit(1); - } - if (ci == '[') comment = 1; - if ((ci == ']') && comment) { - comment = 0; - ci = fgetc(itfp); - } - } while (comment || ((char) ci != '(')); - tr[n] = (char) ci; - brac++; - - do { - /* get next character (skip blanks, newlines, and tabs) */ - do { - ci = fgetc(itfp); - if (ci == EOF) { - FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (no more characters in tree)\n\n\n"); - exit(1); - } - if (ci == '[') comment = 1; - if ((ci == ']') && comment) { - comment = 0; - ci = fgetc(itfp); - } - } while (comment || (char) ci == ' ' || (char) ci == '\n' || (char) ci == '\t'); - - if ((char) ci == ':') { /* skip characters until a ,) appears */ - do { - ci = fgetc(itfp); - if (ci == EOF) { - FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (missing ';' or ',' in tree)\n\n\n"); - exit(1); - } - if (ci == '[') comment = 1; - if ((ci == ']') && comment) { - comment = 0; - ci = fgetc(itfp); - } - } while (comment || ((char) ci != ',' && (char) ci != ')') ); - } - - if ((char) ci == '(') { - brac++; - } - if ((char) ci == ')') { - brac--; - } - - n++; - tr[n] = (char) ci; - - } while (((char) ci != ';') && (n != maxlen-2)); - - if (n == maxlen-2) { - FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (tree description too long)\n\n\n"); - exit(1); - } - - if (brac != 0) { - FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (brackets don't match in tree)\n\n\n"); - exit(1); - } - - n++; - tr[n] = '\0'; -} - - -Node *internalnode(Tree *tr, char **chpp, int *ninode) -{ - Node *xp, *np, *rp; - int i, j, dvg, ff, stop, numc; - char ident[100], idcomp[27]; /*CZ*/ - char *idp; - - (*chpp)++; - if (**chpp == '(') { /* process subgroup */ - - xp = internalnode(tr, chpp, ninode); - xp->isop = xp; - dvg = 1; - while (**chpp != ')') { - if (**chpp == '\0') { - FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (unexpected end of tree)\n\n\n"); - exit(1); - } - dvg++; - /* insert edges around node */ - np = internalnode(tr, chpp, ninode); - np->isop = xp->isop; - xp->isop = np; - xp = np; - } - /* closing bracket reached */ - - (*chpp)++; - if (dvg < 2) { - FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (only one OTU inside pair of brackets)\n\n\n"); - exit(1); - } - - if ((*ninode) >= Maxspc-3) { /* all internal nodes already used */ - FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (no unrooted tree)\n\n\n"); - exit(1); - } - - rp = tr->ibrnchp[*ninode]; - rp->isop = xp->isop; - xp->isop = rp; - - for (j = 0; j < Numspc; j++) - rp->paths[j] = 0; - xp = rp->isop; - while (xp != rp) { - for (j = 0; j < Numspc; j++) { - if (xp->paths[j] == 1) - rp->paths[j] = 1; - } - xp = xp->isop; - } - (*ninode)++; - - if ((**chpp) == ',' || (**chpp) == ')') return rp->kinp; - if ((**chpp) == '\0') { - FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (unexpected end of tree)\n\n\n"); - exit(1); - } - - /* read internal label into rp->label (max. 20 characters) */ - rp->label = new_cvector(21); - (rp->label)[0] = **chpp; - (rp->label)[1] = '\0'; - for (numc = 1; numc < 20; numc++) { - (*chpp)++; - if ((**chpp) == ',' || (**chpp) == ')') return rp->kinp; - if ((**chpp) == '\0') { - FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (unexpected end of tree)\n\n\n"); - exit(1); - } - (rp->label)[numc] = **chpp; - (rp->label)[numc+1] = '\0'; - } - do { /* skip the rest of the internal label */ - (*chpp)++; - if ((**chpp) == '\0') { - FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (unexpected end of tree)\n\n\n"); - exit(1); - } - } while (((**chpp) != ',' && (**chpp) != ')')); - - return rp->kinp; - - } else { /* process species names */ - /* read species name */ - for (idp = ident; **chpp != ',' && - **chpp != ')' && **chpp != '\0'; (*chpp)++) { - *idp++ = **chpp; - } - *idp = '\0'; - /* look for internal number */ - idcomp[26] = '\0'; /*CZ*/ - - for (i = 0; i < Maxspc; i++) { - ff = 0; - stop = FALSE; - do { - idcomp[ff] = Identif[i][ff]; - ff++; - if (idcomp[ff-1] == ' ') stop = TRUE; - } while (!stop && (ff != 26)); /*CZ*/ - if (stop) idcomp[ff-1] = '\0'; - - if (!strcmp(ident, idcomp)) { - if (usedtaxa[i]) { - FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (multiple occurence of sequence '"); - FPRINTF(STDOUTFILE "%s' in tree)\n\n\n", ident); - exit(1); - } - usedtaxa[i] = TRUE; - return tr->ebrnchp[i]->kinp; - } - } - - FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (unknown sequence '%s' in tree)\n\n\n", ident); - exit(1); - } - return NULL; /* never returned but without some compilers complain */ -} - -/* make tree structure, the tree description may contain internal - labels but no edge lengths */ -void constructtree(Tree *tr, cvector strtree) -{ - char *chp; - int ninode, i; - int dvg, numc; - Node *xp, *np; - - ninode = 0; - chp = strtree; - usedtaxa = new_ivector(Maxspc); - for (i = 0; i < Maxspc; i++) usedtaxa[i] = FALSE; - - xp = internalnode(tr, &chp, &ninode); - xp->isop = xp; - dvg = 1; - while (*chp != ')') { /* look for closing bracket */ - if (*chp == '\0') { - FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (unexpected end of tree)\n\n\n"); - exit(1); - } - dvg++; - /* insert edges around node */ - np = internalnode(tr, &chp, &ninode); - np->isop = xp->isop; - xp->isop = np; - xp = np; - } - - for (i = 0; i < Maxspc; i++) - if (usedtaxa[i] == FALSE) { - FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (sequences missing in tree)\n\n\n"); - exit(1); - } - - /* closing bracket reached */ - if (dvg < 3) { - FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (no unrooted tree)\n\n\n"); - exit(1); - } - tr->rootp = xp; - Numibrnch = ninode; - Numbrnch = Numspc + ninode; - - chp++; - if (*chp == ';' || *chp == '\0') { - free_ivector(usedtaxa); - return; - } - - /* copy last internal label (max. 20 characters) */ - xp->label = new_cvector(21); - (xp->label)[0] = *chp; - (xp->label)[1] = '\0'; - for (numc = 1; numc < 20; numc++) { - chp++; - if (*chp == ';' || *chp == '\0') { - free_ivector(usedtaxa); - return; - } else { - (xp->label)[numc] = *chp; - (xp->label)[numc+1] = '\0'; - } - } - free_ivector(usedtaxa); - return; -} - - -/* remove possible basal bifurcation */ -void removebasalbif(cvector strtree) -{ - int n, c, brak, cutflag, h; - - /* check how many OTUs on basal level */ - n = 0; - c = 0; - brak = 0; - do { - if (strtree[n] == '(') brak++; - if (strtree[n] == ')') brak--; - - if (strtree[n] == ',' && brak == 1) c++; /* number of commas in outer bracket */ - - n++; - } while (strtree[n] != '\0'); - - /* if only 1 OTU inside outer bracket stop now */ - if (c == 0) { - FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (Only 1 OTU inside outer bracket in tree)\n\n\n"); - exit(1); - } - - /* if only 2 OTUs inside outer bracket delete second pair of - brackets from the right to remove basal bifurcation */ - - if (c == 1) { - - n = 0; - brak = 0; - cutflag = 0; /* not yet cutted */ - h = 0; - do { - if (strtree[n] == '(') brak++; - if (strtree[n] == ')') brak--; - - if (brak == 2 && cutflag == 0) cutflag = 1; /* cutting */ - if (brak == 1 && cutflag == 1) { - cutflag = 2; /* cutted */ - /* leave out internal label */ - do { - h++; - } while (strtree[n+h] != ')' && strtree[n+h] != ','); - - } - - if (cutflag == 1) strtree[n] = strtree[n+1]; - if (cutflag == 2) strtree[n-1] = strtree[n+h]; - - n++; - } while (strtree[n] != '\0'); - } -} - - -void makeusertree(FILE *itfp) -{ - cvector strtree; - - strtree = new_cvector(23*Maxspc); /* for treefile */ - getusertree(itfp, strtree, 23*Maxspc); - removebasalbif(strtree); - constructtree(Ctree, strtree); - free_cvector(strtree); -} - - -/******************************************************************************/ -/* memory organisation for maximum likelihood tree */ -/******************************************************************************/ - -/* initialise new tree */ -Tree *new_tree(int maxspc, int numptrn, cmatrix seqconint) -{ - int n, i, maxibrnch; - Tree *tr; - Node *dp, *up; - - maxibrnch = maxspc - 3; - heights = (Node **) malloc((unsigned)(maxspc-2) * sizeof(Node *)); - if (heights == NULL) maerror("heights in new_tree"); - tr = (Tree *) malloc(sizeof(Tree)); - if (tr == NULL) maerror("tr in new_tree"); - tr->ebrnchp = (Node **) malloc((unsigned)maxspc * sizeof(Node *)); - if (tr->ebrnchp == NULL) maerror("ebrnchp in new_tree"); - tr->ibrnchp = (Node **) malloc((unsigned)maxibrnch * sizeof(Node *)); - if (tr->ibrnchp == NULL) maerror("ibrnchp in new_tree"); - tr->condlkl = new_dmatrix(numcats, numptrn); - for (n = 0; n < maxspc; n++) { - dp = (Node *) malloc(sizeof(Node)); - if (dp == NULL) maerror("dp in new_tree"); - up = (Node *) malloc(sizeof(Node)); - if (up == NULL) maerror("up in new_tree"); - dp->isop = NULL; - up->isop = NULL; - dp->kinp = up; - up->kinp = dp; - dp->descen = TRUE; - up->descen = FALSE; - dp->number = n; - up->number = n; - dp->length = 0.0; - up->length = 0.0; - dp->lengthc = 0.0; - up->lengthc = 0.0; - dp->varlen = 0.0; - up->varlen = 0.0; - dp->paths = new_ivector(maxspc); - up->paths = dp->paths; - for (i = 0; i < maxspc; i++) dp->paths[i] = 0; - dp->paths[n] = 1; - dp->eprob = seqconint[n]; - up->eprob = NULL; - dp->partials = NULL; - up->partials = new_dcube(numcats, numptrn, tpmradix); - tr->ebrnchp[n] = dp; - up->label = NULL; - dp->label = NULL; - } - for (n = 0; n < maxibrnch; n++) { - dp = (Node *) malloc(sizeof(Node)); - if (dp == NULL) maerror("dp in new_tree"); - up = (Node *) malloc(sizeof(Node)); - if (up == NULL) maerror("up in new_tree"); - dp->isop = NULL; - up->isop = NULL; - dp->kinp = up; - up->kinp = dp; - dp->descen = TRUE; - up->descen = FALSE; - dp->number = n; - up->number = n; - dp->length = 0.0; - up->length = 0.0; - dp->lengthc = 0.0; - up->lengthc = 0.0; - dp->varlen = 0.0; - up->varlen = 0.0; - dp->paths = new_ivector(maxspc); - up->paths = dp->paths; - for (i = 0; i < maxspc; i++) dp->paths[i] = 0; - dp->eprob = NULL; - up->eprob = NULL; - dp->partials = new_dcube(numcats, numptrn, tpmradix); - up->partials = new_dcube(numcats, numptrn, tpmradix); - tr->ibrnchp[n] = dp; - up->label = NULL; - dp->label = NULL; - } - tr->rootp = NULL; - - /* - * reserve memory for lengths of the tree branches - * and for the distance matrix as a vector - * (needed for LS estimation of tree branch lengths) - */ - - Brnlength = new_dvector(2 * maxspc - 3); - Distanvec = new_dvector((maxspc * (maxspc - 1)) / 2); - - return tr; -} - - -/* initialise quartet tree */ -Tree *new_quartet(int numptrn, cmatrix seqconint) -{ - int n, i; - Tree *tr; - Node *dp, *up; - - heights = (Node **) malloc((unsigned)2 * sizeof(Node *)); - if (heights == NULL) maerror("heights in new_quartet"); - /* reserve memory for tree */ - tr = (Tree *) malloc(sizeof(Tree)); - if (tr == NULL) maerror("tr in new_quartet"); - tr->ebrnchp = (Node **) malloc((unsigned) 4 * sizeof(Node *)); - if (tr->ebrnchp == NULL) maerror("ebrnchp in new_quartet"); - tr->ibrnchp = (Node **) malloc((unsigned) sizeof(Node *)); - if (tr->ibrnchp == NULL) maerror("ibrnchp in new_quartet"); - tr->condlkl = new_dmatrix(numcats, numptrn); - /* reserve memory for nodes */ - for (n = 0; n < 4; n++) { - dp = (Node *) malloc(sizeof(Node)); - if (dp == NULL) maerror("dp in new_quartet"); - up = (Node *) malloc(sizeof(Node)); - if (up == NULL) maerror("dp in new_quartet"); - dp->isop = NULL; - dp->kinp = up; - up->kinp = dp; - dp->descen = TRUE; - up->descen = FALSE; - dp->number = n; - up->number = n; - dp->length = 0.0; - up->length = 0.0; - dp->lengthc = 0.0; - up->lengthc = 0.0; - dp->varlen = 0.0; - up->varlen = 0.0; - dp->paths = new_ivector(4); - up->paths = dp->paths; - for (i = 0; i < 4; i++) dp->paths[i] = 0; - dp->paths[n] = 1; - dp->eprob = seqconint[n]; /* make quartet (0,1)-(2,3) as default */ - up->eprob = NULL; - dp->partials = NULL; - up->partials = new_dcube(numcats, numptrn, tpmradix); - tr->ebrnchp[n] = dp; - } - - /* reserve memory for internal branch */ - dp = (Node *) malloc(sizeof(Node)); - if (dp == NULL) maerror("dp in new_quartet"); - up = (Node *) malloc(sizeof(Node)); - if (up == NULL) maerror("dp in new_quartet"); - dp->isop = tr->ebrnchp[3]->kinp; /* connect internal branch */ - up->isop = tr->ebrnchp[0]->kinp; - dp->kinp = up; - up->kinp = dp; - dp->descen = TRUE; - up->descen = FALSE; - dp->number = 0; - up->number = 0; - dp->length = 0.0; - up->length = 0.0; - dp->lengthc = 0.0; - up->lengthc = 0.0; - dp->varlen = 0.0; - up->varlen = 0.0; - dp->paths = new_ivector(4); - up->paths = dp->paths; - up->paths[0] = 0; - up->paths[1] = 0; - up->paths[2] = 1; - up->paths[3] = 1; - dp->eprob = NULL; - up->eprob = NULL; - dp->partials = new_dcube(numcats, numptrn, tpmradix); - up->partials = new_dcube(numcats, numptrn, tpmradix); - tr->ibrnchp[0] = dp; - - /* place root */ - tr->rootp = up; - - /* connect external branches */ - tr->ebrnchp[0]->kinp->isop = tr->ebrnchp[1]->kinp; - tr->ebrnchp[1]->kinp->isop = tr->rootp; - tr->ebrnchp[3]->kinp->isop = tr->ebrnchp[2]->kinp; - tr->ebrnchp[2]->kinp->isop = tr->rootp->kinp; - - /* - * reserve memory for lengths of the five branches - * of a quartet and for the six possible distances - * (needed for LS estimation of branch lengths) - */ - Brnlength = new_dvector(NUMQBRNCH); - Distanvec = new_dvector(NUMQSPC*(NUMQSPC-1)/2); - - return tr; -} - - -/* free tree memory */ -void free_tree(Tree *tr, int taxa) -{ - int n; - Node *dp, *up; - - free(heights); - free_dmatrix(tr->condlkl); - for (n = 0; n < taxa; n++) { - dp = tr->ebrnchp[n]; - up = dp->kinp; - free_ivector(dp->paths); - free_dcube(up->partials); - free(dp); - free(up); - } - free(tr->ebrnchp); - for (n = 0; n < (taxa-3); n++) { - dp = tr->ibrnchp[n]; - up = dp->kinp; - free_dcube(dp->partials); - free_dcube(up->partials); - free_ivector(dp->paths); - free(dp); - free(up); - } - free(tr->ibrnchp); - free(tr); - free_dvector(Brnlength); /* branch lengths (for LS estimation) */ - free_dvector(Distanvec); /* distances (for LS estimation) */ -} - - -/* make (a,b)-(c,d) quartet - - a ---+ +--- c - +-----+ - b ---+ +--- d - - species numbers range from 0 to Maxspc - 1 */ - -void make_quartet(int a, int b, int c, int d) -{ - /* place sequences */ - /*Ctree->ebrnchp[0]->eprob = Seqpat[a]; - Ctree->ebrnchp[1]->eprob = Seqpat[b]; - Ctree->ebrnchp[2]->eprob = Seqpat[c]; - Ctree->ebrnchp[3]->eprob = Seqpat[d]; - CZ */ - /* make distance vector */ - /*Distanvec[0] = Distanmat[b][a]; - Distanvec[1] = Distanmat[c][a]; - Distanvec[2] = Distanmat[c][b]; - Distanvec[3] = Distanmat[d][a]; - Distanvec[4] = Distanmat[d][b]; - Distanvec[5] = Distanmat[d][c]; - CZ */ -} - -/* write distance matrix as vector */ -void changedistan(dmatrix distanmat, dvector distanvec, int numspc) -{ - int i, j, k; - - for (k = 0, i = 1; i < numspc; i++) { - for (j = 0; j < i; j++, k++) - distanvec[k] = distanmat[i][j]; - } -} - - -/******************************************************************************/ -/* computation of maximum likelihood tree */ -/******************************************************************************/ - - -/* compute the likelihood for (a,b)-(c,d) quartet */ -double quartet_lklhd(int a, int b, int c, int d) -{ - /* reserve memory for quartet if necessary */ - if (mlmode != 1) { /* no quartet tree */ - if (Ctree != NULL) - free_tree(Ctree, Numspc); - Ctree = new_quartet(Numptrn, Seqpat); - Numbrnch = NUMQBRNCH; - Numibrnch = NUMQIBRNCH; - Numspc = NUMQSPC; - mlmode = 1; - } - - /* make (a,b)-(c,d) quartet */ - make_quartet(a,b,c,d); - - clockmode = 0; /* nonclocklike branch lengths */ - - /* least square estimate for branch length */ - lslength(Ctree, Distanvec, Numspc, Numibrnch, Brnlength); - - /* compute likelihood */ - Ctree->lklhd = optlkl(Ctree); - - return Ctree->lklhd; -} - - -/* compute the approximate likelihood for (a,b)-(c,d) quartet */ -double quartet_alklhd(int a, int b, int c, int d) -{ - /* reserve memory for quartet if necessary */ - if (mlmode != 1) { /* no quartet tree */ - if (Ctree != NULL) - free_tree(Ctree, Numspc); - Ctree = new_quartet(Numptrn, Seqpat); - Numbrnch = NUMQBRNCH; - Numibrnch = NUMQIBRNCH; - Numspc = NUMQSPC; - mlmode = 1; - } - - /* make (a,b)-(c,d) quartet */ - make_quartet(a,b,c,d); - - clockmode = 0; /* nonclocklike branch lengths */ - - /* least square estimate for branch length */ - lslength(Ctree, Distanvec, Numspc, Numibrnch, Brnlength); - - /* compute likelihood */ - Ctree->lklhd = treelkl(Ctree); - - return Ctree->lklhd; -} - - -/* read usertree from file to memory */ -void readusertree(FILE *ifp) -{ - /* reserve memory for tree if necessary */ - if (mlmode != 2) { /* no tree */ - if (Ctree != NULL) - free_tree(Ctree, Numspc); - Ctree = new_tree(Maxspc, Numptrn, Seqpat); - Numbrnch = 2*Maxspc-3; - Numibrnch = Maxspc-3; - Numspc = Maxspc; - mlmode = 2; - } - - /* read tree */ - makeusertree(ifp); -} - - -/* compute the likelihood of a usertree */ -double usertree_lklhd() -{ - - /* CZ 05/16/01 */ - - return 6.66; -} - - -/* compute the approximate likelihood of a usertree */ -double usertree_alklhd() -{ - /* CZ 05/16/01 */ - - return 6.66; -} - - -/* preparation for ML analysis */ -void mlstart() -{ - /* number of states and code length */ - tpmradix = gettpmradix(); - - /* declare variables */ - Eval = new_dvector(tpmradix); - Evec = new_dmatrix(tpmradix,tpmradix); - Ievc = new_dmatrix(tpmradix,tpmradix); - iexp = new_dmatrix(tpmradix,tpmradix); - Alias = new_ivector(Maxsite); - - /* process sequence information */ - evaluateseqs(); - bestrate = new_ivector(Numptrn); - - /* compute transition probability matrix */ - tranprobmat(); - - /* non-zero rate categories */ - Rates = new_dvector(numcats); - updaterates(); - ltprobr = new_dcube(numcats, tpmradix,tpmradix); - - /* compute distance matrix */ - Distanmat = new_dvector( Maxspc - 1 ); - initdistan(); - - /* initialize tree pointer for quartet tree */ - /*mlmode = 1; - Ctree = new_quartet(Numptrn, Seqpat); - Numbrnch = NUMQBRNCH; - Numibrnch = NUMQIBRNCH; - Numspc = NUMQSPC; - CZ, */ - /* computing ML distances */ - computedistan(); -} - - -/* recompute ml distances for quartet only */ -void distupdate(int a, int b, int c, int d) -{ - /* update distance matrix */ - /* consider only entries relevant to quartet */ - /* - Distanmat[a][b] = mldistance(a, b); - Distanmat[b][a] = Distanmat[a][b]; - Distanmat[a][c] = mldistance(a, c); - Distanmat[c][a] = Distanmat[a][c]; - Distanmat[a][d] = mldistance(a, d); - Distanmat[d][a] = Distanmat[a][d]; - Distanmat[b][c] = mldistance(b, c); - Distanmat[c][b] = Distanmat[b][c]; - Distanmat[b][d] = mldistance(b, d); - Distanmat[d][b] = Distanmat[b][d]; - Distanmat[c][d] = mldistance(c, d); - Distanmat[d][c] = Distanmat[c][d]; - CZ */ -} - - -/* cleanup after ML analysis */ -void mlfinish() -{ - if (Ctree != NULL) - free_tree(Ctree, Numspc); - free_ivector(bestrate); - free_ivector(Alias); - free_cmatrix(Seqpat); - free_ivector(constpat); - free_ivector(Weight); - free_dvector(Distanmat); /* CZ */ - free_dvector(Eval); - free_dmatrix(Evec); - free_dmatrix(Ievc); - free_dvector(Rates); - free_dcube(ltprobr); - free_dmatrix(iexp); -} - - -/******************************************************************************/ -/* tree output */ -/******************************************************************************/ - - -#define MAXOVER 50 -#define MAXLENG 30 -#define MAXCOLUMN 80 - - -void prbranch(Node *up, int depth, int m, int maxm, - ivector umbrella, ivector column, FILE *outfp) -{ - int i, num, n, maxn, lim; - Node *cp; - char bch; - - if ((int)((clockmode ? up->lengthc : up->length) * Proportion) >= MAXOVER) { - column[depth] = MAXLENG; - bch = '+'; - } else { - column[depth] = (int)((clockmode ? up->lengthc : up->length) * Proportion) + 3; - bch = '-'; - } - - if (up->isop == NULL) { /* external branch */ - num = up->number + 1; /* offset */ - if (m == 1) umbrella[depth - 1] = TRUE; - for (i = 0; i < depth; i++) { - if (umbrella[i]) - fprintf(outfp, "%*c", column[i], ':'); - else - fprintf(outfp, "%*c", column[i], ' '); - } - if (m == maxm) - umbrella[depth - 1] = FALSE; - for (i = 0, lim = column[depth] - 3; i < lim; i++) - fputc(bch, outfp); - fprintf(outfp, "-%d ", num); - - fputid(outfp, up->number); - - - fputc('\n', outfp); - fputc(' ', outfp); - return; - } - - num = up->number + 1 + Numspc; /* offset, internal branch */ - for (cp = up->isop, maxn = 0; cp != up; cp = cp->isop, maxn++) - ; - for (cp = up->isop, n = 1; cp != up; cp = cp->isop, n++) { - prbranch(cp->kinp, depth + 1, n, maxn, umbrella, column, outfp); - if (m == 1 && n == maxn / 2) umbrella[depth - 1] = TRUE; - if (n != maxn) { - for (i = 0; i < depth; i++) { - if (umbrella[i]) - fprintf(outfp, "%*c", column[i], ':'); - else - fprintf(outfp, "%*c", column[i], ' '); - } - if (n == maxn / 2) { /* internal branch */ - for (i = 0, lim = column[depth] - 3; i < lim; i++) - fputc(bch, outfp); - if (num < 10) - fprintf(outfp, "--%d", num); - else if (num < 100) - fprintf(outfp, "-%2d", num); - else - fprintf(outfp, "%3d", num); - } else { - if (umbrella[depth]) - fprintf(outfp, "%*c", column[depth], ':'); - else - fprintf(outfp, "%*c", column[depth], ' '); - } - fputc('\n', outfp); - fputc(' ', outfp); - } - if (m == maxm) umbrella[depth - 1] = FALSE; - } - return; -} - - -void getproportion(double *proportion, dvector distanvec, int numspc) -{ - int i, maxpair; - double maxdis; - - maxpair = (numspc*(numspc-1))/2; - - maxdis = 0.0; - for (i = 0; i < maxpair; i++) { - if (distanvec[i] > maxdis) { - maxdis = distanvec[i]; - } - } - *proportion = (double) MAXCOLUMN / (maxdis * 3.0); - if (*proportion > 1.0) *proportion = 1.0; -} - - -void prtopology(FILE *outfp) -{ - int n, maxn, depth; - ivector umbrella; - ivector column; - Node *cp, *rp; - - getproportion(&Proportion, Distanvec, Numspc); - - umbrella = new_ivector(Numspc); - column = new_ivector(Numspc); - - for (n = 0; n < Numspc; n++) { - umbrella[n] = FALSE; - column[n] = 3; - } - column[0] = 1; - - fputc(' ', outfp); - - /* original code: rp = Ctree->rootp */ - /* but we want to print the first group in the - trichotomy as outgroup at the bottom! */ - rp = Ctree->rootp->isop; - - for (maxn = 1, cp = rp->isop; cp != rp; cp = cp->isop, maxn++) - ; - depth = 1; - n = 0; - - cp = rp; - do { - cp = cp->isop; - n++; - prbranch(cp->kinp, depth, n, maxn, umbrella, column, outfp); - if (cp != rp) fprintf(outfp, "%*c\n ", column[0], ':'); - } while (cp != rp); - - free_ivector(umbrella); - free_ivector(column); -} - - -/* print unrooted tree file with branch lengths */ -void fputphylogeny(FILE *fp) -{ - Node *cp, *rp; - int n; - - cp = rp = Ctree->rootp; - putc('(', fp); - n = 1; - do { - cp = cp->isop->kinp; - if (cp->isop == NULL) { /* external node */ - if (n > 60) { - fprintf(fp, "\n"); - n = 2; - } - n += fputid(fp, cp->number); - fprintf(fp, ":%.5f", ((clockmode ? cp->lengthc : cp->length))*0.01); - n += 7; - cp = cp->kinp; - } else { /* internal node */ - if (cp->descen) { - if (n > 60) { - fprintf(fp, "\n"); - n = 1; - } - putc('(', fp); - n++; - } else { - putc(')', fp); - n++; - if (n > 60) { - fprintf(fp, "\n"); - n = 1; - } - /* internal label */ - if (cp->kinp->label != NULL) { - fprintf(fp, "%s", cp->kinp->label); - n += strlen(cp->kinp->label); - } - fprintf(fp, ":%.5f", ((clockmode ? cp->lengthc : cp->length))*0.01); - n += 7; - } - } - if (!cp->descen && !cp->isop->descen && cp != rp) { - putc(',', fp); /* not last subtree */ - n++; - } - } while (cp != rp); - fprintf(fp, ")"); - /* internal label */ - if (cp->label != NULL) - fprintf(fp, "%s", cp->label); - fprintf(fp, ";\n"); -} - - -void resulttree(FILE *outfp) -{ - int n, ne, closeflag; - Node *ep, *ip; - double blen; - - closeflag = FALSE; - - if (clockmode) { - fprintf(outfp, "\n branch length nc/c"); - fprintf(outfp, " branch length nc/c (= non-clock/clock)\n"); - } else { - fprintf(outfp, "\n branch length S.E."); - fprintf(outfp, " branch length S.E.\n"); - } - for (n = 0; n < Numspc; n++) { - ep = Ctree->ebrnchp[n]; - ne = ep->number; - fputid10(outfp, ne); - fputs(" ", outfp); - fprintf(outfp, "%3d", ne + 1); - blen = (clockmode ? ep->lengthc : ep->length); - fprintf(outfp, "%9.5f", blen*0.01); - if (blen < 5.0*MINARC || blen > 0.95*MAXARC) closeflag = TRUE; - if (clockmode) - fprintf(outfp, "%9.3f", (ep->length)/(ep->lengthc)); - else - fprintf(outfp, "%9.5f", 0.01*sqrt(ep->kinp->varlen)); - if (n < Numibrnch) { - ip = Ctree->ibrnchp[n]; - fprintf(outfp, "%8d", n + 1 + Numspc); - blen = (clockmode ? ip->lengthc : ip->length); - fprintf(outfp, "%9.5f", blen*0.01); - if (blen < 5.0*MINARC || blen > 0.95*MAXARC) closeflag = TRUE; - if (clockmode) - fprintf(outfp, "%9.3f", (ip->length)/(ip->lengthc)); - else - fprintf(outfp, "%9.5f", 0.01*sqrt(ip->kinp->varlen)); - fputc('\n', outfp); - } else { - if (n == Numspc - 3) { - fputc('\n', outfp); - } else if (n == Numspc - 2) { - if (clockmode) { - if (!Convergc) - fprintf(outfp, " No convergence after %d iterations!\n", Numitc); - else - fprintf(outfp, " %d iterations until convergence\n", Numitc); - } else { - if (!Converg) - fprintf(outfp, " No convergence after %d iterations!\n", Numit); - else - fprintf(outfp, " %d iterations until convergence\n", Numit); - } - } else if (n == Numspc - 1) { - fprintf(outfp, " log L: %.2f\n", (clockmode ? Ctree->lklhdc : Ctree->lklhd)); - } else { - fputc('\n', outfp); - } - } - } - if(closeflag) - fprintf(outfp, "\nWARNING --- at least one branch length is close to an internal boundary!\n"); -} - - -/******************************************************************************/ -/* Neighbor-joining tree */ -/******************************************************************************/ - - -/* compute NJ tree and write to file */ -void njtree(FILE *fp) -{ - /* reserve memory for tree if necessary */ - if (mlmode != 3) { /* no tree */ - if (Ctree != NULL) - free_tree(Ctree, Numspc); - Ctree = new_tree(Maxspc, Numptrn, Seqpat); - Numbrnch = 2*Maxspc-3; - Numibrnch = Maxspc-3; - Numspc = Maxspc; - mlmode = 3; - } - - /* construct NJ tree from distance matrix */ - njdistantree(Ctree); - - fputphylogeny(fp); -} - - -/* construct NJ tree from distance matrix */ -void njdistantree(Tree *tr) -{ - /* removed, CZ, 05/16/01 */ -} - -/******************************************************************************/ -/* find best assignment of rate categories */ -/******************************************************************************/ - -/* find best assignment of rate categories */ -void findbestratecombination() -{ - int k, u; - double bestvalue, fv2; - dvector catprob; - dmatrix cdl; - - cdl = Ctree->condlkl; - catprob = new_dvector(numcats+1); - fv2 = (1.0-fracinv)/(double) numcats; - - for (k = 0; k < Numptrn; k++) { - /* zero rate */ - if (constpat[k] == TRUE) - catprob[0] = fracinv*Freqtpm[(int) Seqpat[0][k]]; - else - catprob[0] = 0.0; - /* non-zero-rates */ - for (u = 1; u < numcats+1; u++) - catprob[u] = fv2*cdl[u-1][k]; - /* find best */ - bestvalue = catprob[0]; - bestrate[k] = 0; - for (u = 1; u < numcats+1; u++) - if (catprob[u] >= bestvalue) { - bestvalue = catprob[u]; - bestrate[k] = u; - } - } - free_dvector(catprob); - bestratefound = 1; -} - -/* print best assignment of rate categories */ -void printbestratecombination(FILE *fp) -{ - int s, k; - - for (s = 0; s < Maxsite; s++) { - k = Alias[s]; - fprintf(fp, "%2d", bestrate[k]); - if ((s+1) % 30 == 0) - fprintf(fp, "\n"); - else if ((s+1) % 10 == 0) - fprintf(fp, " "); - } - if (s % 70 != 0) - fprintf(fp, "\n"); -} - - -/******************************************************************************/ -/* computation of clocklike branch lengths */ -/******************************************************************************/ - -/* checks wether e is a valid edge specification */ -int checkedge(int e) -{ - /* there are Numspc external branches: - 0 - Numspc-1 - there are Numibrnch internal branches: - Numspc - Numspc+Numibrnch-1 - */ - - if (e < 0) return FALSE; - if (e < Numspc+Numibrnch) return TRUE; - else return FALSE; -} - -/* print topology of subtree */ -void fputsubstree(FILE *fp, Node *ip) -{ - Node *cp; - - if (ip->isop == NULL) { /* terminal nodes */ - numtc += fputid(fp, ip->number); - } else { - cp = ip; - fprintf(fp, "("); - numtc += 1; - do { - cp = cp->isop->kinp; - if (cp->isop == NULL) { /* external node */ - numtc += fputid(fp, cp->number); - fprintf(fp, ":%.5f", (cp->lengthc)*0.01); - numtc += 7; - cp = cp->kinp; - } else { /* internal node */ - if (cp->height > 0.0) { - fprintf(fp, "("); - numtc += 1; - } else if (cp->height < 0.0) { - fprintf(fp, ")"); - numtc += 1; - if (numtc > 60) { - fprintf(fp, "\n"); - numtc = 1; - } - /* internal label */ - if (cp->kinp->label != NULL) { - fprintf(fp, "%s", cp->kinp->label); - numtc += strlen(cp->kinp->label); - } - if (numtc > 60) { - fprintf(fp, "\n"); - numtc = 1; - } - fprintf(fp, ":%.5f", (cp->lengthc)*0.01); - numtc += 6; - if (numtc > 60) { - fprintf(fp, "\n"); - numtc = 1; - } - } - } - if (cp->height <= 0.0 && cp->isop->height <= 0.0 && - cp->isop != ip) { - putc(',', fp); /* not last subtree */ - numtc += 1; - if (numtc > 60) { - fprintf(fp, "\n"); - numtc = 1; - } - } - } while (cp->isop != ip); - fprintf(fp, ")"); - numtc += 1; - } - if (numtc > 60) { - fprintf(fp, "\n"); - numtc = 1; - } - -} - -/* print rooted tree file */ -void fputrooted(FILE *fp, int e) -{ - Node *rootbr; - - /* to be called only after clocklike branch - lengths have been computed */ - - /* pointer to root branch */ - if (e < Numspc) rootbr = Ctree->ebrnchp[e]; - else rootbr = Ctree->ibrnchp[e - Numspc]; - - fprintf(fp, "("); - numtc = 2; - fputsubstree(fp, rootbr); - /* internal label */ - if (rootbr->label != NULL) { - fprintf(fp, "%s", rootbr->label); - numtc += strlen(rootbr->label); - } - if (numtc > 60) { - fprintf(fp, "\n"); - numtc = 1; - } - fprintf(fp, ":%.5f,", (hroot - rootbr->height)*0.01); - numtc += 7; - if (numtc > 60) { - fprintf(fp, "\n"); - numtc = 1; - } - fputsubstree(fp, rootbr->kinp); - /* internal label */ - if (rootbr->kinp->label != NULL) { - fprintf(fp, "%s", rootbr->kinp->label); - numtc += strlen(rootbr->kinp->label); - } - if (numtc > 60) { - fprintf(fp, "\n"); - numtc = 1; - } - fprintf(fp, ":%.5f);\n", (hroot - rootbr->kinp->height)*0.01); -} - -/* finds heights in subtree */ -void findheights(Node *ip) -{ - Node *cp, *rp; - - if (ip->isop != NULL) { /* forget terminal nodes */ - - cp = ip; - - /* initialise node */ - cp->height = 1.0; /* up */ - rp = cp; - while (rp->isop != cp) { - rp = rp->isop; - rp->height = -1.0; /* down */ - } - - do { - cp = cp->isop->kinp; - if (cp->isop == NULL) { /* external node */ - cp = cp->kinp; - } else { /* internal node */ - if (cp->height == 0.0) { /* node not yet visited */ - cp->height = 1.0; /* up */ - rp = cp; - while (rp->isop != cp) { - rp = rp->isop; - rp->height = -1.0; /* down */ - } - } else if (cp->kinp->height == 1.0) { - /* cp->kinp is next height pointer */ - heights[Numhts] = cp->kinp; - Numhts++; - } - } - } while (cp->isop != ip); - /* ip is last height pointer */ - heights[Numhts] = ip; - Numhts++; - } -} - - -/* initialise clocklike branch lengths (with root on edge e) */ -void initclock(int e) -{ - /* CZ */ -} - -/* approximate likelihood under the constaining assumption of - clocklike branch lengths (with root on edge e) */ -double clock_alklhd(int e) -{ - initclock(e); - Ctree->lklhdc = treelkl(Ctree); - - return Ctree->lklhdc; -} - -/* log-likelihood given height ht at node pointed to by chep */ -double heightlkl(double ht) -{ - Node *rp; - double len; - - /* adjust branch lengths */ - chep->height = ht; - /* descendent branches */ - rp = chep; - while (rp->isop != chep) { - rp = rp->isop; - len = chep->height - rp->kinp->height; - rp->kinp->lengthc = len; - rp->lengthc = len; - } - /* upward branch */ - if (chep == rootbr || chep->kinp == rootbr) { - len = (hroot - chep->height) + (hroot - chep->kinp->height); - chep->lengthc = len; - chep->kinp->lengthc = len; - } else { - rp = chep->kinp; - while (rp->isop->height <= 0.0) - rp = rp->isop; - chep->lengthc = rp->isop->height - chep->height; - chep->kinp->lengthc = rp->isop->height - chep->height; - } - - /* compute likelihood */ - Ctree->lklhdc = treelkl(Ctree); - - return -(Ctree->lklhdc); /* we use a minimizing procedure */ -} - -/* optimize current height */ -void optheight(void) -{ - double he, fx, f2x, minh, maxh, len; - Node *rp; - - /* current height */ - he = chep->height; - - /* minimum */ - minh = 0.0; - rp = chep; - while (rp->isop != chep) { - rp = rp->isop; - if (rp->kinp->height > minh) - minh = rp->kinp->height; - } - minh += MINARC; - - /* maximum */ - if (chep == rootbr || chep->kinp == rootbr) { - maxh = hroot; - } else { - rp = chep->kinp; - while (rp->isop->height <= 0.0) - rp = rp->isop; - maxh = rp->isop->height; - } - maxh -= MINARC; - - /* check borders for height */ - if (he < minh) he = minh; - if (he > maxh) he = maxh; - - /* optimization */ - if (!(he == minh && he == maxh)) - he = onedimenmin(minh, he, maxh, heightlkl, HEPSILON, &fx, &f2x); - - /* variance of height */ - f2x = fabs(f2x); - if (1.0/(maxhroot*maxhroot) < f2x) - chep->varheight = 1.0/f2x; - else - chep->varheight = maxhroot*maxhroot; - - /* adjust branch lengths */ - chep->height = he; - /* descendent branches */ - rp = chep; - while (rp->isop != chep) { - rp = rp->isop; - len = chep->height - rp->kinp->height; - rp->kinp->lengthc = len; - rp->lengthc = len; - } - /* upward branch */ - if (chep == rootbr || chep->kinp == rootbr) { - len = (hroot - chep->height) + (hroot - chep->kinp->height); - chep->lengthc = len; - chep->kinp->lengthc = len; - } else { - rp = chep->kinp; - while (rp->isop->height <= 0.0) - rp = rp->isop; - chep->lengthc = rp->isop->height - chep->height; - chep->kinp->lengthc = rp->isop->height - chep->height; - } -} - -/* log-likelihood given height ht at root */ -double rheightlkl(double ht) -{ - double len; - - /* adjust branch lengths */ - hroot = ht; - len = (hroot - rootbr->height) + (hroot - rootbr->kinp->height); - rootbr->lengthc = len; - rootbr->kinp->lengthc = len; - - /* compute likelihood */ - Ctree->lklhdc = treelkl(Ctree); - - return -(Ctree->lklhdc); /* we use a minimizing procedure */ -} - -/* optimize height of root */ -void optrheight(void) -{ - double he, fx, f2x, minh, len; - - /* current height */ - he = hroot; - - /* minimum */ - if (rootbr->height > rootbr->kinp->height) - minh = rootbr->height; - else - minh = rootbr->kinp->height; - minh += MINARC; - - /* check borders for height */ - if (he < minh) he = minh; - if (he > maxhroot) he = maxhroot; - - /* optimization */ - he = onedimenmin(minh, he, maxhroot, rheightlkl, HEPSILON, &fx, &f2x); - - /* variance of height of root */ - f2x = fabs(f2x); - if (1.0/(maxhroot*maxhroot) < f2x) - varhroot = 1.0/f2x; - else - varhroot = maxhroot*maxhroot; - - /* adjust branch lengths */ - hroot = he; - len = (hroot - rootbr->height) + (hroot - rootbr->kinp->height); - rootbr->lengthc = len; - rootbr->kinp->lengthc = len; -} - -/* exact likelihood under the constaining assumption of - clocklike branch lengths (with root on edge e) */ -double clock_lklhd(int e) -{ - int h, nconv; - double old; - - Numitc = 0; - Convergc = FALSE; - - initclock(e); - - do { - - Numitc++; - nconv = 0; - - /* optimize height of root */ - old = hroot; - optrheight(); - if (fabs(old - hroot) < HEPSILON) nconv++; - - /* optimize height of nodes */ - for (h = Numhts-1; h >= 0; h--) { - - /* pointer chep to current height node */ - chep = heights[h]; - - /* store old value */ - old = chep->height; - - /* find better height */ - optheight(); - - /* converged ? */ - if (fabs(old - chep->height) < HEPSILON) nconv++; - } - - if (nconv == Numhts+1) Convergc = TRUE; - - } while (Numitc < MAXIT && !Convergc); - - /* compute final likelihood */ - Ctree->lklhdc = treelkl(Ctree); - - return Ctree->lklhdc; -} - -/* find out the edge containing the root */ -int findrootedge() -{ - int e, ebest; - double logbest, logtest; - - /* compute the likelihood for all edges and take the edge with - best likelihood (using approximate ML) */ - - ebest = 0; - logbest = clock_alklhd(0); - numbestroot = 1; - for (e = 1; e < Numspc+Numibrnch; e++) { - logtest = clock_alklhd(e); - if (logtest > logbest) { - ebest = e; - logbest = logtest; - numbestroot = 1; - } else if (logtest == logbest) { - numbestroot++; - } - } - - return ebest; -} - -/* show heights and corresponding standard errors */ -void resultheights(FILE *fp) -{ - int h, num; - Node *cp; - - fprintf(fp, " height S.E. of node common to branches\n"); - for (h = 0; h < Numhts; h++) { - fprintf(fp, "%.5f %.5f ", (heights[h]->height)*0.01, - sqrt(heights[h]->varheight)*0.01); - cp = heights[h]; - do { - num = (cp->number) + 1; - if (cp->kinp->isop != NULL) num += Numspc; /* internal branch */ - fprintf(fp, "%d ", num); - cp = cp->isop; - } while (cp != heights[h]); - fprintf(fp, "\n"); - - } - fprintf(fp, "%.5f %.5f of root at branch %d\n", - hroot*0.01, sqrt(varhroot)*0.01, locroot+1); -} - diff --git a/forester/archive/RIO/others/puzzle_dqo/src/ml3.c b/forester/archive/RIO/others/puzzle_dqo/src/ml3.c deleted file mode 100644 index a68a054..0000000 --- a/forester/archive/RIO/others/puzzle_dqo/src/ml3.c +++ /dev/null @@ -1,350 +0,0 @@ -/* - * ml3.c - * - * - * Part of TREE-PUZZLE 5.0 (June 2000) - * - * (c) 1999-2000 by Heiko A. Schmidt, Korbinian Strimmer, - * M. Vingron, and Arndt von Haeseler - * (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler - * - * All parts of the source except where indicated are distributed under - * the GNU public licence. See http://www.opensource.org for details. - */ - - -#define EXTERN extern - - -/* prototypes */ -#include -#include -#include -#include "util.h" -#include "ml.h" -#include "gamma.h" - - - -/******************************************************************************/ -/* discrete Gamma-distribution and related stuff */ -/******************************************************************************/ - -/* compare general base frequencies with frequencies of taxon i with chi square */ -double homogentest(int taxon) -{ - return chi2test(Freqtpm, Basecomp[taxon], gettpmradix(), &chi2fail); -} - - -/* discrete Gamma according to Yang 1994 (JME 39:306-314) */ -void YangDiscreteGamma (double shape, int c, dvector x) -{ - double twoc, mu; - int i; - - twoc = 2.0*c; - mu = 0.0; - for (i = 0; i < c; i++) - { - /* corresponding rates */ - x[i] = icdfGamma ( (2.0*i+1.0)/twoc, shape); - mu += x[i]; - } - mu = mu/c; - - /* rescale for avarage rate of 1.0 */ - for (i = 0; i < c; i++) - { - x[i] /= mu; - } -} - -/* compute rates of each category when rates are Gamma-distributed */ -void updaterates() -{ - int i; - double alpha; - - if (numcats == 1) - { - Rates[0] = 1.0; - return; - } - if (Geta == 0.0) - { - for (i = 0; i < numcats; i++) - Rates[i] = 1.0; - return; - } - alpha = (1.0 - Geta)/Geta; - - YangDiscreteGamma (alpha, numcats, Rates); - - /* if invariable sites are present */ - for (i = 0; i < numcats; i++) - Rates[i] = Rates[i]/(1.0-fracinv); - - /* check for very small rates */ - for (i = 0; i < numcats; i++) - if (Rates[i] < 0.000001) Rates[i] = 0.000001; -} - - - -/******************************************************************************/ -/* parameter estimation */ -/******************************************************************************/ - -/* compute sample mean and standard deviation of sample mean */ -void computestat(double *data, int n, double *mean, double *err) -{ - int i; - double sum; - - sum = 0; - for (i = 0; i < n; i++) sum += data[i]; - (*mean) = sum/(double) n; - - sum = 0; - for (i = 0; i < n; i++) sum += (data[i] - (*mean))*(data[i] - (*mean)); - if (n != 1) - (*err) = sqrt(sum)/sqrt((double)(n-1)*n); /* unbiased estimator */ - else - (*err) = 0.0; /* if n == 1 */ -} - -/* compute ML value of quartet (a,b,c,d) */ -double quartetml(int a, int b, int c, int d) -{ - double d1, d2, d3; - - /* compute ML for all topologies */ - if (approxp_optn) { /* approximate parameter mode */ - d1 = quartet_alklhd(a,b,c,d); /* (a,b)-(c,d) */ - d2 = quartet_alklhd(a,c,b,d); /* (a,c)-(b,d) */ - d3 = quartet_alklhd(a,d,b,c); /* (a,d)-(b,c) */ - } else { - d1 = quartet_lklhd(a,b,c,d); /* (a,b)-(c,d) */ - d2 = quartet_lklhd(a,c,b,d); /* (a,c)-(b,d) */ - d3 = quartet_lklhd(a,d,b,c); /* (a,d)-(b,c) */ - } - - /* looking for max(d1, d2, d3) */ - if (d1 < d2) { /* d2 > d1 */ - if (d2 < d3) { /* d3 > d2 > d1 */ - /* d3 maximum */ - return d3; - } else { /* d2 >= d3 > d1 */ - /* d2 maximum */ - return d2; - } - } else { /* d1 >= d2 */ - if (d1 < d3) { /* d3 > d1 >= d2 */ - /* d3 maximum */ - return d3; - } else { /* d1 >= d2 && d1 >= d3 */ - /* d1 maximum */ - return d1; - } - } -} - -/* optimization function TSparam - quartets */ -double opttsq(double x) -{ - if (x < MINTS) TSparam = MINTS; - else if (x > MAXTS) TSparam = MAXTS; - else TSparam = x; - tranprobmat(); - distupdate(qca, qcb, qcc, qcd); - return (-quartetml(qca, qcb, qcc, qcd)); -} - -/* optimization function YRparam - quartets */ -double optyrq(double x) -{ - if (x < MINYR) YRparam = MINYR; - else if (x > MAXYR) YRparam = MAXYR; - else YRparam = x; - tranprobmat(); - distupdate(qca, qcb, qcc, qcd); - return (-quartetml(qca, qcb, qcc, qcd)); -} - -/* estimate substitution process parameters - random quartets */ -void optimseqevolparamsq() -{ - double tsmeanold, yrmeanold; - dvector tslist, yrlist; - int fin; - ivector taxon; - uli minqts, maxqts, n; - - - taxon = new_ivector(4); - - /* number of quartets to be investigated */ - minqts = (uli) floor(0.25 * MINPERTAXUM * Maxspc) + 1; - maxqts = (uli) floor(0.25 * MAXPERTAXUM * Maxspc) + 1; - if (Maxspc == 4) { - minqts = (uli) 1; - maxqts = (uli) 1; - } - - tslist = new_dvector(maxqts); - yrlist = new_dvector(maxqts); - - /* initialize averages */ - tsmean = TSparam; - yrmean = YRparam; - - fin = FALSE; - - /* investigate maxqts random quartets */ - for (n = 0; n < maxqts; n++) { - - /* choose random quartet */ - chooser(Maxspc, 4, taxon); - - /* - * optimize parameters on this quartet - */ - - qca = taxon[0]; - qcb = taxon[1]; - qcc = taxon[2]; - qcd = taxon[3]; - - /* initialize start values with average value */ - if ((SH_optn || nuc_optn) && optim_optn && (data_optn == 0)) TSparam = tsmean; - if ((nuc_optn && TN_optn) && optim_optn && (data_optn == 0)) YRparam = yrmean; - - /* estimation */ - twodimenmin(PEPS1, - (SH_optn || nuc_optn) && optim_optn && (data_optn == 0), - MINTS, &TSparam, MAXTS, opttsq, &tserr, - (nuc_optn && TN_optn) && optim_optn && (data_optn == 0), - MINYR, &YRparam, MAXYR, optyrq, &yrerr); - - - tsmeanold = tsmean; - yrmeanold = yrmean; - tslist[n] = TSparam; - yrlist[n] = YRparam; - computestat(tslist, n+1 , &tsmean, &tserr); - computestat(yrlist, n+1 , &yrmean, &yrerr); - - /* check whether the means are converging */ - if (n > minqts-2) { - if ((fabs(tsmean-tsmeanold) < TSDIFF) && - (fabs(yrmean-yrmeanold) < YRDIFF)) - fin = TRUE; - } - - /* investigate at least minqts quartets */ - if (n > minqts-2 && (fin || n > maxqts-2)) break; - } - - /* round estimated numbers to 2 digits after the decimal point */ - if (tserr != 0.0) tsmean = floor(100.0*tsmean+0.5)/100.0; - if (yrerr != 0.0) yrmean = floor(100.0*yrmean+0.5)/100.0; - - /* update ML engine */ - TSparam = tsmean; - YRparam = yrmean; - tranprobmat(); - - free_ivector(taxon); -} - -/* optimization function TSparam - tree */ -double opttst(double x) -{ - double result; - - if (x < MINTS) TSparam = MINTS; - else if (x > MAXTS) TSparam = MAXTS; - else TSparam = x; - tranprobmat(); - computedistan(); - if (approxp_optn) result = usertree_alklhd(); - else result = usertree_lklhd(); - - return (-result); -} - -/* optimization function YRparam - tree */ -double optyrt(double x) -{ - double result; - - if (x < MINYR) YRparam = MINYR; - else if (x > MAXYR) YRparam = MAXYR; - else YRparam = x; - tranprobmat(); - computedistan(); - if (approxp_optn) result = usertree_alklhd(); - else result = usertree_lklhd(); - - return (-result); -} - - -/* optimize substitution process parameters - tree */ -void optimseqevolparamst() -{ - twodimenmin(PEPS1, - (SH_optn || nuc_optn) && optim_optn && (data_optn == 0), - MINTS, &TSparam, MAXTS, opttst, &tserr, - (nuc_optn && TN_optn) && optim_optn && (data_optn == 0), - MINYR, &YRparam, MAXYR, optyrt, &yrerr); -} - - -/* optimization function fracinv */ -double optfi(double x) -{ - double result; - - if (x < MINFI) fracinv = MINFI; - else if (x > MAXFI) fracinv = MAXFI; - else fracinv = x; - - computedistan(); - if (approxp_optn) result = usertree_alklhd(); - else result = usertree_lklhd(); - - return (-result); -} - - -/* optimization function Geta */ -double optge(double x) -{ - double result; - - if (x < MINGE) Geta = MINGE; - else if (x > MAXGE) Geta = MAXGE; - else Geta = x; - - updaterates(); - - computedistan(); - if (approxp_optn) result = usertree_alklhd(); - else result = usertree_lklhd(); - - return (-result); -} - - -/* optimize rate heterogeneity parameters */ -void optimrateparams() -{ - twodimenmin(PEPS2, - fracinv_optim, - MINFI, &fracinv, fracconst, optfi, &fierr, - grate_optim, - MINGE, &Geta, MAXGE, optge, &geerr); - -} diff --git a/forester/archive/RIO/others/puzzle_dqo/src/model1.c b/forester/archive/RIO/others/puzzle_dqo/src/model1.c deleted file mode 100644 index 54fb889..0000000 --- a/forester/archive/RIO/others/puzzle_dqo/src/model1.c +++ /dev/null @@ -1,326 +0,0 @@ -/* - * model1.c - * - * - * Part of TREE-PUZZLE 5.0 (June 2000) - * - * (c) 1999-2000 by Heiko A. Schmidt, Korbinian Strimmer, - * M. Vingron, and Arndt von Haeseler - * (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler - * - * All parts of the source except where indicated are distributed under - * the GNU public licence. See http://www.opensource.org for details. - */ - - -/* definitions */ -#define EXTERN extern - -/* prototypes */ -#include -#include "util.h" -#include "ml.h" - -/* number of states of the selected model */ -int gettpmradix() -{ - if (data_optn == 0) { /* nucleotides */ - if (nuc_optn) return 4; - if (SH_optn) return 16; - } else if (data_optn == 1) { /* amino acids */ - return 20; - } else { /* two-state model */ - return 2; - } - return 1; -} - -/* relative transition frequencies */ -void rtfdata(dmatrix q, double *f) -{ - double alp, alpy, alpr; - int i, j; - - if (data_optn == 0) - { /* nucleotides */ - - if (nuc_optn) - { /* 4x4 nucleotides */ - alp = 2.0*TSparam; - alpr = (alp * 2.0) / (YRparam + 1.0); - alpy = YRparam * alpr; - - q[0][1] = 1; q[0][2] = alpr; q[0][3] = 1; - q[1][2] = 1; q[1][3] = alpy; - q[2][3] = 1; - - f[0] = 0.25; f[1] = 0.25; f[2] = 0.25; f[3] = 0.25; - } - - if (SH_optn) - { /* 16x16 nucleotides */ - - alp = 2.0*TSparam; - - q[0][1] = 1; q[0][2] = alp; q[0][3] = 1; q[0][4] = 1; - q[0][5] = 0; q[0][6] = 0; q[0][7] = 0; q[0][8] = alp; - q[0][9] = 0; q[0][10] = 0; q[0][11] = 0; q[0][12] = 1; - q[0][13] = 0; q[0][14] = 0; q[0][15] = 0; - - q[1][2] = 1; q[1][3] = alp; q[1][4] = 0; q[1][5] = 1; - q[1][6] = 0; q[1][7] = 0; q[1][8] = 0; q[1][9] = alp; - q[1][10] = 0; q[1][11] = 0; q[1][12] = 0; q[1][13] = 1; - q[1][14] = 0; q[1][15] = 0; - - q[2][3] = 1; q[2][4] = 0; q[2][5] = 0; q[2][6] = 1; - q[2][7] = 0; q[2][8] = 0; q[2][9] = 0; q[2][10] = alp; - q[2][11] = 0; q[2][12] = 0; q[2][13] = 0; q[2][14] = 1; - q[2][15] = 0; - - q[3][4] = 0; q[3][5] = 0; q[3][6] = 0; q[3][7] = 1; - q[3][8] = 0; q[3][9] = 0; q[3][10] = 0; q[3][11] = alp; - q[3][12] = 0; q[3][13] = 0; q[3][14] = 0; q[3][15] = 1; - - q[4][5] = 1; q[4][6] = alp; q[4][7] = 1; q[4][8] = 1; - q[4][9] = 0; q[4][10] = 0; q[4][11] = 0; q[4][12] = alp; - q[4][13] = 0; q[4][14] = 0; q[4][15] = 0; - - q[5][6] = 1; q[5][7] = alp; q[5][8] = 0; q[5][9] = 1; - q[5][10] = 0; q[5][11] = 0; q[5][12] = 0; q[5][13] = alp; - q[5][14] = 0; q[5][15] = 0; - - q[6][7] = 1; q[6][8] = 0; q[6][9] = 0; q[6][10] = 1; - q[6][11] = 0; q[6][12] = 0; q[6][13] = 0; q[6][14] = alp; - q[6][15] = 0; - - q[7][8] = 0; q[7][9] = 0; q[7][10] = 0; q[7][11] = 1; - q[7][12] = 0; q[7][13] = 0; q[7][14] = 0; q[7][15] = alp; - - q[8][9] = 1; q[8][10] = alp; q[8][11] = 1; q[8][12] = 1; - q[8][13] = 0; q[8][14] = 0; q[8][15] = 0; - - q[9][10] = 1; q[9][11] = alp; q[9][12] = 0; q[9][13] = 1; - q[9][14] = 0; q[9][15] = 0; - - q[10][11] = 1; q[10][12] = 0; q[10][13] = 0; q[10][14] = 1; - q[10][15] = 0; - - q[11][12] = 0; q[11][13] = 0; q[11][14] = 0; q[11][15] = 1; - - q[12][13] = 1; q[12][14] = alp; q[12][15] = 1; - - q[13][14] = 1; q[13][15] = alp; - - q[14][15] = 1; - - - for (i = 0; i < 16; i++) f[i] = 0.0625; - } - } - else if (data_optn == 1) - { /* amino acids */ - if (Dayhf_optn) /* Dayhoff model */ - { - dyhfdata(q, f); - } - else if (Jtt_optn) /* JTT model */ - { - jttdata(q, f); - } - else if (blosum62_optn) /* BLOSUM 62 model */ - { - blosum62data(q, f); - } - else if (mtrev_optn) /* mtREV model */ - { - mtrevdata(q, f); - } - else if (cprev_optn) /* cpREV model */ - { - cprev45data(q, f); - } - else if (vtmv_optn) /* VT model */ - { - vtmvdata(q, f); - } - else /* if (wag_optn) */ /* WAG model */ - { - wagdata(q, f); - } - - } - else /* two-state model */ - { - q[0][1] = 1.0; - - f[0] = 0.5; f[1] = 0.5; - } - - /* fill matrix from upper triangle */ - for (i = 0; i < tpmradix; i++) - { - q[i][i] = 0.0; - for (j = i+1; j < tpmradix; j++) - { - q[j][i] = q[i][j]; - } - } -} - -/* transform letter codes to state numbers */ -int code2int(cvector c) -{ if (data_optn == 0) { /* nucleotides */ - if (nuc_optn) { /* 4x4 */ - switch (c[0]) { - case 'A': return 0; - case 'C': return 1; - case 'G': return 2; - case 'T': return 3; - case 'U': return 3; - default : return 4; - } - } - if (SH_optn) { /* 16x16 */ - if (c[0] == 'A') { - switch (c[1]) { - case 'A': return 0; /* AA */ - case 'C': return 1; /* AC */ - case 'G': return 2; /* AG */ - case 'T': return 3; /* AT */ - case 'U': return 3; /* AT */ - default: return 16; - } - } - if (c[0] == 'C') { - switch (c[1]) { - case 'A': return 4; /* CA */ - case 'C': return 5; /* CC */ - case 'G': return 6; /* CG */ - case 'T': return 7; /* CT */ - case 'U': return 7; /* CT */ - default: return 16; - } - } - if (c[0] == 'G') { - switch (c[1]) { - case 'A': return 8; /* GA */ - case 'C': return 9; /* GC */ - case 'G': return 10; /* GG */ - case 'T': return 11; /* GT */ - case 'U': return 11; /* GT */ - default: return 16; - } - } - if (c[0] == 'T' || c[0] == 'U') { - switch (c[1]) { - case 'A': return 12; /* TA */ - case 'C': return 13; /* TC */ - case 'G': return 14; /* TG */ - case 'T': return 15; /* TT */ - case 'U': return 15; /* TT */ - default: return 16; - } - } - return 16; - } - } else if (data_optn == 1) { /* amino acids */ - switch (c[0]) { - case 'A': return 0; - case 'C': return 4; - case 'D': return 3; - case 'E': return 6; - case 'F': return 13; - case 'G': return 7; - case 'H': return 8; - case 'I': return 9; - case 'K': return 11; - case 'L': return 10; - case 'M': return 12; - case 'N': return 2; - case 'P': return 14; - case 'Q': return 5; - case 'R': return 1; - case 'S': return 15; - case 'T': return 16; - case 'V': return 19; - case 'W': return 17; - case 'Y': return 18; - default : return 20; - } - } else { /* two-state model */ - switch (c[0]) { - case '0': return 0; - case '1': return 1; - default : return 2; - } - } - return 0; -} - -/* return letter code belonging to state number */ -char *int2code(int s) -{ - if (data_optn == 0) { /* nucleotides */ - if (nuc_optn) { /* 4x4 */ - switch (s) { - case 0: return "A"; - case 1: return "C"; - case 2: return "G"; - case 3: return "T"; - default : return "?"; - } - } - if (SH_optn) { /* 16x16 */ - switch (s) { - case 0: return "AA"; - case 1: return "AC"; - case 2: return "AG"; - case 3: return "AT"; - case 4: return "CA"; - case 5: return "CC"; - case 6: return "CG"; - case 7: return "CT"; - case 8: return "GA"; - case 9: return "GC"; - case 10: return "GG"; - case 11: return "GT"; - case 12: return "TA"; - case 13: return "TC"; - case 14: return "TG"; - case 15: return "TT"; - default : return "??"; - } - } - } else if (data_optn == 1) { /* amino acids */ - switch (s) { - case 0: return "A"; - case 1: return "R"; - case 2: return "N"; - case 3: return "D"; - case 4: return "C"; - case 5: return "Q"; - case 6: return "E"; - case 7: return "G"; - case 8: return "H"; - case 9: return "I"; - case 10: return "L"; - case 11: return "K"; - case 12: return "M"; - case 13: return "F"; - case 14: return "P"; - case 15: return "S"; - case 16: return "T"; - case 17: return "W"; - case 18: return "Y"; - case 19: return "V"; - default : return "?"; - } - } else { /* two-state model */ - switch (s) { - case 0: return "0"; - case 1: return "1"; - default : return "?"; - } - } - return "?"; -} diff --git a/forester/archive/RIO/others/puzzle_dqo/src/model2.c b/forester/archive/RIO/others/puzzle_dqo/src/model2.c deleted file mode 100644 index 9e2197f..0000000 --- a/forester/archive/RIO/others/puzzle_dqo/src/model2.c +++ /dev/null @@ -1,1125 +0,0 @@ -/* - * model2.c - * - * - * Part of TREE-PUZZLE 5.0 (June 2000) - * - * (c) 1999-2000 by Heiko A. Schmidt, Korbinian Strimmer, - * M. Vingron, and Arndt von Haeseler - * (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler - * - * All parts of the source except where indicated are distributed under - * the GNU public licence. See http://www.opensource.org for details. - */ - - -/* definitions */ -#define EXTERN extern - -/* prototypes */ -#include -#include "util.h" -#include "ml.h" - - -void jttdata(dmatrix q, double *f) -{ - /* - * JTT model for amino acid evolution - * D.T. Jones, W.R. Taylor, and J.M. Thornton - * "The rapid generation of mutation data matrices from protein sequences" - * CABIOS vol. 8 no. 3 1992 pp. 275-282 - */ - - q[0][1]=3.1628651460584e+00; q[0][2]=3.2804935927860e+00; - q[0][3]=4.8477237048666e+00; q[0][4]=3.4612244897959e+00; - q[0][5]=3.3130910900946e+00; q[0][6]=6.3199473337722e+00; - q[0][7]=1.0440154440154e+01; q[0][8]=1.3061224489796e+00; - q[0][9]=2.1726844583987e+00; q[0][10]=1.8443597219107e+00; - q[0][11]=2.2137668626773e+00; q[0][12]=2.7210884353741e+00; - q[0][13]=8.3265306122449e-01; q[0][14]=1.1537414965986e+01; - q[0][15]=2.2838213546288e+01; q[0][16]=2.7007955724663e+01; - q[0][17]=5.1311953352770e-01; q[0][18]=8.3673469387755e-01; - q[0][19]=1.7474335188621e+01; - - q[1][2]=2.6598918637222e+00; q[1][3]=9.1014867485456e-01; - q[1][4]=6.1624649859944e+00; q[1][5]=1.8036482885837e+01; - q[1][6]=1.8924731182796e+00; q[1][7]=8.1810886516769e+00; - q[1][8]=1.9119717452198e+01; q[1][9]=1.4410687351864e+00; - q[1][10]=2.2211961707760e+00; q[1][11]=3.9239234676922e+01; - q[1][12]=2.5060690943044e+00; q[1][13]=3.9439775910364e-01; - q[1][14]=4.1953094963476e+00; q[1][15]=5.9016766126741e+00; - q[1][16]=3.8437069743152e+00; q[1][17]=7.6766706682673e+00; - q[1][18]=1.4173669467787e+00; q[1][19]=1.0308123249300e+00; - - q[2][3]=3.2226935854843e+01; q[2][4]=1.8710963455150e+00; - q[2][5]=4.5351268130622e+00; q[2][6]=3.3951344979102e+00; - q[2][7]=4.5987249708180e+00; q[2][8]=2.3693774375271e+01; - q[2][9]=2.9235880398671e+00; q[2][10]=8.0960899565551e-01; - q[2][11]=1.5024269384537e+01; q[2][12]=1.9003322259136e+00; - q[2][13]=4.3853820598007e-01; q[2][14]=7.1083317047749e-01; - q[2][15]=2.9456208772690e+01; q[2][16]=1.3735908553410e+01; - q[2][17]=1.6706217370669e-01; q[2][18]=4.1661129568106e+00; - q[2][19]=9.7452934662237e-01; - - q[3][4]=6.2857142857143e-01; q[3][5]=3.0662020905923e+00; - q[3][6]=4.5450549450549e+01; q[3][7]=7.5402435402435e+00; - q[3][8]=6.0544672718586e+00; q[3][9]=6.8808114961961e-01; - q[3][10]=3.6130902064968e-01; q[3][11]=1.6718197057180e+00; - q[3][12]=1.0879120879121e+00; q[3][13]=1.9340659340659e-01; - q[3][14]=7.3949579831933e-01; q[3][15]=3.4196528109572e+00; - q[3][16]=2.4749487800335e+00; q[3][17]=3.4536891679749e-01; - q[3][18]=2.6895604395604e+00; q[3][19]=1.8608058608059e+00; - - q[4][5]=5.5191637630662e-01; q[4][6]=3.2442396313364e-01; - q[4][7]=3.3297297297297e+00; q[4][8]=4.3726708074534e+00; - q[4][9]=9.1868131868132e-01; q[4][10]=9.9466248037677e-01; - q[4][11]=2.9830508474576e-01; q[4][12]=2.4095238095238e+00; - q[4][13]=4.1485714285714e+00; q[4][14]=7.3949579831933e-01; - q[4][15]=1.2862939958592e+01; q[4][16]=2.8125907990315e+00; - q[4][17]=6.8244897959184e+00; q[4][18]=1.2885714285714e+01; - q[4][19]=3.7714285714286e+00; - - q[5][6]=2.0316061593796e+01; q[5][7]=1.3922214897825e+00; - q[5][8]=3.3861536130889e+01; q[5][9]=4.7172339855267e-01; - q[5][10]=4.2320327755868e+00; q[5][11]=1.7835941652395e+01; - q[5][12]=2.6573751451800e+00; q[5][13]=2.7595818815331e-01; - q[5][14]=9.4992143198743e+00; q[5][15]=3.2350653941322e+00; - q[5][16]=3.0973838067678e+00; q[5][17]=1.0512692882031e+00; - q[5][18]=1.5331010452962e+00; q[5][19]=1.0778164924506e+00; - - q[6][7]=6.6857641051189e+00; q[6][8]=1.4458024443999e+00; - q[6][9]=6.7068415455512e-01; q[6][10]=5.7932850559579e-01; - q[6][11]=1.0365070686558e+01; q[6][12]=1.0138248847926e+00; - q[6][13]=2.6359447004608e-01; q[6][14]=1.1291226167887e+00; - q[6][15]=1.8337006611901e+00; q[6][16]=1.9520424900414e+00; - q[6][17]=6.9519420671494e-01; q[6][18]=3.8018433179723e-01; - q[6][19]=2.7772657450077e+00; - - q[7][8]=1.2113479939567e+00; q[7][9]=3.2670032670033e-01; - q[7][10]=4.1817641817642e-01; q[7][11]=1.6354950592239e+00; - q[7][12]=7.6447876447876e-01; q[7][13]=3.0579150579151e-01; - q[7][14]=1.2391551215081e+00; q[7][15]=1.1138492529797e+01; - q[7][16]=1.8888816176952e+00; q[7][17]=3.3491450634308e+00; - q[7][18]=3.1853281853282e-01; q[7][19]=2.8416988416988e+00; - - q[8][9]=1.0931677018634e+00; q[8][10]=3.2194389461470e+00; - q[8][11]=3.1498052426571e+00; q[8][12]=1.9130434782609e+00; - q[8][13]=2.7329192546584e+00; q[8][14]=6.7304834977469e+00; - q[8][15]=4.3726708074534e+00; q[8][16]=2.8162964522581e+00; - q[8][17]=7.8083407275954e-01; q[8][18]=3.5118012422360e+01; - q[8][19]=7.2877846790890e-01; - - q[9][10]=1.4069798333535e+01; q[9][11]=1.2292791953809e+00; - q[9][12]=2.8366300366300e+01; q[9][13]=4.7384615384615e+00; - q[9][14]=5.8780435251023e-01; q[9][15]=2.4105749323141e+00; - q[9][16]=1.5243062022723e+01; q[9][17]=8.2888540031397e-01; - q[9][18]=1.8434065934066e+00; q[9][19]=5.7699633699634e+01; - - q[10][11]=8.8039805231089e-01; q[10][12]=2.2425954997384e+01; - q[10][13]=1.5099529042386e+01; q[10][14]=6.2626896912611e+00; - q[10][15]=3.4917298022888e+00; q[10][16]=1.6109411169944e+00; - q[10][17]=3.2366001345593e+00; q[10][18]=1.4505494505495e+00; - q[10][19]=1.0557823129252e+01; - - q[11][12]=3.6577885391445e+00; q[11][13]=1.4915254237288e-01; - q[11][14]=1.2868062479229e+00; q[11][15]=2.8162964522581e+00; - q[11][16]=5.7494151926786e+00; q[11][17]=5.4790729851263e-01; - q[11][18]=5.3268765133172e-01; q[11][19]=7.4899112187248e-01; - - q[12][13]=2.5666666666667e+00; q[12][14]=9.4491129785247e-01; - q[12][15]=1.6397515527950e+00; q[12][16]=1.2180790960452e+01; - q[12][17]=1.1972789115646e+00; q[12][18]=1.1130952380952e+00; - q[12][19]=1.7746031746032e+01; - - q[13][14]=8.8739495798319e-01; q[13][15]=5.6298136645963e+00; - q[13][16]=8.3099273607748e-01; q[13][17]=3.3224489795918e+00; - q[13][18]=3.3392857142857e+01; q[13][19]=3.6000000000000e+00; - - q[14][15]=1.6261762676085e+01; q[14][16]=6.8852490148602e+00; - q[14][17]=4.2256902761104e-01; q[14][18]=6.7787114845938e-01; - q[14][19]=1.2549019607843e+00; - - q[15][16]=2.7891216619293e+01; q[15][17]=1.8740017746229e+00; - q[15][18]=3.7349896480331e+00; q[15][19]=2.4182194616977e+00; - - q[16][17]=4.8702870978900e-01; q[16][18]=1.1985472154964e+00; - q[16][19]=6.7925746569814e+00; - - q[17][18]=4.6020408163265e+00; q[17][19]=1.4693877551020e+00; - - q[18][19]=1.0000000000000e+00; - - - f[0] = 0.077; f[1] = 0.051; f[2] = 0.043; f[3] = 0.052; - f[4] = 0.02; f[5] = 0.041; f[6] = 0.062; f[7] = 0.074; - f[8] = 0.023; f[9] = 0.052; f[10] = 0.091; f[11] = 0.059; - f[12] = 0.024; f[13] = 0.04; f[14] = 0.051; f[15] = 0.069; - f[16] = 0.059; f[17] = 0.014; f[18] = 0.032; f[19] = 0.066; -} - -void dyhfdata(dmatrix q, double *f) -{ - /* - * Dayhoff model for amino acid evolution - * Dayhoff, M.O., Schwartz, R.M., Orcutt, B.C. (1978) - * "A model of evolutionary change in proteins." - * Dayhoff, M.O. (ed.) Atlas of Protein Sequence Structur., Vol5, Suppl. 3, - * National Biomedical Research Foundation, Washington DC, pp. 345-352. - */ - - q[0][1]=9.6472567159749e-01; q[0][2]=3.5927991886410e+00; - q[0][3]=4.3200552414656e+00; q[0][4]=1.3184584178499e+00; - q[0][5]=3.2267534963169e+00; q[0][6]=7.0141987829615e+00; - q[0][7]=8.5773867857875e+00; q[0][8]=8.1434196396611e-01; - q[0][9]=2.3518447453539e+00; q[0][10]=1.4735711728911e+00; - q[0][11]=9.3940162271805e-01; q[0][12]=2.5490196078431e+00; - q[0][13]=6.5922920892495e-01; q[0][14]=8.9189834148670e+00; - q[0][15]=1.4540712836859e+01; q[0][16]=1.3411904595370e+01; - q[0][17]=3.8517964118027e-02; q[0][18]=8.7897227856660e-01; - q[0][19]=7.4036511156187e+00; - - q[1][2]=1.1890243902439e+00; q[1][3]=5.9525626545377e-02; - q[1][4]=8.4778922655537e-01; q[1][5]=8.8348561504191e+00; - q[1][6]=5.5954088952654e-02; q[1][7]=3.1434881434075e-01; - q[1][8]=8.4753987678285e+00; q[1][9]=2.2684090115941e+00; - q[1][10]=5.5954088952654e-01; q[1][11]=1.6681312769010e+01; - q[1][12]=3.1707317073171e+00; q[1][13]=4.8959827833572e-01; - q[1][14]=3.6754156468900e+00; q[1][15]=5.4755072760812e+00; - q[1][16]=9.6472567159749e-01; q[1][17]=7.5538020086083e+00; - q[1][18]=2.7977044476327e-01; q[1][19]=8.6083213773314e-01; - - q[2][3]=3.2459324155194e+01; q[2][4]=7.3852625416383e-02; - q[2][5]=3.7732198142415e+00; q[2][6]=5.3911764705882e+00; - q[2][7]=5.0264375413087e+00; q[2][8]=1.9061418685121e+01; - q[2][9]=2.7901430842607e+00; q[2][10]=1.2482698961938e+00; - q[2][11]=1.1542279411765e+01; q[2][12]=1.9117647058824e-01; - q[2][13]=5.0183823529412e-01; q[2][14]=1.5181660899654e+00; - q[2][15]=1.7697478991597e+01; q[2][16]=8.3557302231237e+00; - q[2][17]=8.6029411764706e-01; q[2][18]=3.4411764705882e+00; - q[2][19]=5.7352941176471e-01; - - q[3][4]=2.5534152404601e-02; q[3][5]=4.8811013767209e+00; - q[3][6]=4.0561952440551e+01; q[3][7]=4.4423506911730e+00; - q[3][8]=3.0865788117500e+00; q[3][9]=8.5749078239692e-01; - q[3][10]=2.5926985518518e-02; q[3][11]=2.5930851063830e+00; - q[3][12]=1.1667143483333e-01; q[3][13]=1.2963492759259e-02; - q[3][14]=4.7853935065891e-01; q[3][15]=3.4167709637046e+00; - q[3][16]=2.3984722282163e+00; q[3][17]=3.2408731898147e-02; - q[3][18]=8.1351689612015e-02; q[3][19]=6.3829787234043e-01; - - q[4][5]=2.1864264103535e-02; q[4][6]=1.4770525083277e-02; - q[4][7]=3.9055458751427e-01; q[4][8]=1.0223340673168e+00; - q[4][9]=1.5970515970516e+00; q[4][10]=3.9098448749850e-02; - q[4][11]=8.0776309049169e-03; q[4][12]=1.4155086538140e-01; - q[4][13]=8.6898395721925e-02; q[4][14]=6.8155604487784e-01; - q[4][15]=5.8097784568373e+00; q[4][16]=5.9929928084086e-01; - q[4][17]=3.4759358288770e-01; q[4][18]=3.4759358288770e+00; - q[4][19]=1.7647058823529e+00; - - q[5][6]=2.5476780185759e+01; q[5][7]=1.0174974779977e+00; - q[5][8]=2.1573939173192e+01; q[5][9]=6.5266504894988e-01; - q[5][10]=2.6634492806410e+00; q[5][11]=5.5466331269350e+00; - q[5][12]=4.0247678018576e+00; q[5][13]=1.8038017885416e-02; - q[5][14]=5.5044618466582e+00; q[5][15]=2.0267580716497e+00; - q[5][16]=1.9256432155439e+00; q[5][17]=9.6202762055552e-02; - q[5][18]=1.0061919504644e-01; q[5][19]=1.2538699690402e+00; - - q[6][7]=2.8869795109055e+00; q[6][8]=1.5519031141869e+00; - q[6][9]=2.1701112877583e+00; q[6][10]=4.0484429065744e-01; - q[6][11]=2.9823529411765e+00; q[6][12]=1.0705882352941e+00; - q[6][13]=1.9801735189768e-02; q[6][14]=1.7993079584775e+00; - q[6][15]=2.8184873949580e+00; q[6][16]=1.2261663286004e+00; - q[6][17]=7.3114099162219e-02; q[6][18]=7.6470588235294e-01; - q[6][19]=1.3058823529412e+00; - - q[7][8]=3.7906768788150e-01; q[7][9]=2.3128004846840e-02; - q[7][10]=2.5776602775942e-01; q[7][11]=9.6662260409782e-01; - q[7][12]=6.0145406477198e-01; q[7][13]=5.4775280898876e-01; - q[7][14]=1.2382877804129e+00; q[7][15]=8.2853366065527e+00; - q[7][16]=1.1110604644803e+00; q[7][17]=1.2888301387971e-01; - q[7][18]=1.7114723586662e-02; q[7][19]=1.9233311302049e+00; - - q[8][9]=2.7354343963341e-01; q[8][10]=1.5876246692449e+00; - q[8][11]=9.6993944636678e-01; q[8][12]=1.2544085640577e-01; - q[8][13]=1.6868512110727e+00; q[8][14]=3.3075513942601e+00; - q[8][15]=1.2530894710826e+00; q[8][16]=8.1434196396611e-01; - q[8][17]=1.0121107266436e+00; q[8][18]=4.4982698961938e+00; - q[8][19]=1.5570934256055e+00; - - q[9][10]=9.2275320303002e+00; q[9][11]=1.6663354531002e+00; - q[9][12]=1.1780604133545e+01; q[9][13]=6.9753577106518e+00; - q[9][14]=4.2551201720752e-01; q[9][15]=8.8575970928912e-01; - q[9][16]=6.8951811852420e+00; q[9][17]=9.8802836705702e-02; - q[9][18]=1.3434022257552e+00; q[9][19]=3.1526232114467e+01; - - q[10][11]=6.5787197231834e-01; q[10][12]=1.8622837370242e+01; - q[10][13]=5.6340830449827e+00; q[10][14]=1.1377976796255e+00; - q[10][15]=6.1690558576372e-01; q[10][16]=1.2098794893211e+00; - q[10][17]=1.7543252595156e+00; q[10][18]=1.0346020761246e+00; - q[10][19]=6.2906574394464e+00; - - q[11][12]=8.6029411764706e+00; q[11][13]=6.6640454965565e-03; - q[11][14]=1.2089100346021e+00; q[11][15]=3.4411764705882e+00; - q[11][16]=4.9442190669371e+00; q[11][17]=3.4272233982290e-02; - q[11][18]=4.7794117647059e-01; q[11][19]=3.7500000000000e-01; - - q[12][13]=3.2500000000000e+00; q[12][14]=5.9976931949250e-01; - q[12][15]=2.1848739495798e+00; q[12][16]=3.6916835699797e+00; - q[12][17]=1.6247577591604e-01; q[12][18]=1.1508700794053e-01; - q[12][19]=9.0588235294118e+00; - - q[13][14]=3.9359861591695e-01; q[13][15]=1.6386554621849e+00; - q[13][16]=4.9442190669371e-01; q[13][17]=2.8676470588235e+00; - q[13][18]=2.4852941176471e+01; q[13][19]=4.4117647058824e-01; - - q[14][15]=8.6431043005437e+00; q[14][16]=2.8308077795013e+00; - q[14][17]=3.5840244687362e-02; q[14][18]=4.3804743506776e-02; - q[14][19]=1.7301038062284e+00; - - q[15][16]=1.9663865546218e+01; q[15][17]=2.7857142857143e+00; - q[15][18]=1.2016806722689e+00; q[15][19]=1.0840336134454e+00; - - q[16][17]=4.2019597219666e-02; q[16][18]=1.5162271805274e+00; - q[16][19]=5.6592292089249e+00; - - q[17][18]=2.2941176470588e+00; q[17][19]=1.2654363316538e-01; - - q[18][19]=1.0000000000000e+00; - - - f[0] = 0.087; f[1] = 0.041; f[2] = 0.040; f[3] = 0.047; - f[4] = 0.033; f[5] = 0.038; f[6] = 0.05; f[7] = 0.089; - f[8] = 0.034; f[9] = 0.037; f[10] = 0.085; f[11] = 0.08; - f[12] = 0.015; f[13] = 0.04; f[14] = 0.051; f[15] = 0.07; - f[16] = 0.058; f[17] = 0.01; f[18] = 0.03; f[19] = 0.065; -} - -void mtrevdata(dmatrix q, double *f) -{ - /* - * mtREV24 model of amino acid evolution - * (complete sequence data of mtDNA from 24 vertebrate species) - * Adachi, J. and Hasegawa, M. (1996) - */ - - q[0][1]=1.2199217606346e+01; q[0][2]=1.4182139942122e+01; - q[0][3]=9.2985091873208e+00; q[0][4]=3.1542792981957e+01; - q[0][5]=1.0025852846688e+00; q[0][6]=5.1418866803338e+00; - q[0][7]=6.3531246495131e+01; q[0][8]=7.3137132861715e+00; - q[0][9]=5.0782382656186e+01; q[0][10]=1.3399741808481e+01; - q[0][11]=4.4021672780560e+00; q[0][12]=7.4673480520104e+01; - q[0][13]=3.3513021631978e+00; q[0][14]=2.8582502221773e+01; - q[0][15]=2.0413623195312e+02; q[0][16]=2.5301305153906e+02; - q[0][17]=1.0000000000000e+00; q[0][18]=3.4084158197615e+00; - q[0][19]=1.0266468401249e+02; - - q[1][2]=6.9661274444534e+00; q[1][3]=1.0000000000000e+00; - q[1][4]=5.4384584796568e+01; q[1][5]=1.1631134513343e+02; - q[1][6]=1.0000000000000e+00; q[1][7]=1.2122831341194e+01; - q[1][8]=8.6961067087353e+01; q[1][9]=1.0000000000000e+00; - q[1][10]=8.1976829394538e+00; q[1][11]=7.4423215395318e+01; - q[1][12]=1.0000000000000e+00; q[1][13]=2.4659158338099e+00; - q[1][14]=1.2439947713615e+01; q[1][15]=3.1791814866372e+00; - q[1][16]=1.0935327216119e+00; q[1][17]=1.1550775790126e+01; - q[1][18]=1.0000000000000e+00; q[1][19]=4.0211417480338e+00; - - q[2][3]=4.1809325468160e+02; q[2][4]=3.1020979842967e+01; - q[2][5]=9.1349622725361e+01; q[2][6]=3.3185663516310e+01; - q[2][7]=2.8052324651124e+01; q[2][8]=2.6112087577885e+02; - q[2][9]=1.4261453863336e+01; q[2][10]=7.9775653461977e+00; - q[2][11]=3.2036829276162e+02; q[2][12]=3.4424354918739e+01; - q[2][13]=7.9996445145608e+00; q[2][14]=3.8586541461044e+01; - q[2][15]=2.6020426225852e+02; q[2][16]=1.2550758780474e+02; - q[2][17]=5.6207759736659e+00; q[2][18]=1.0071406219571e+02; - q[2][19]=1.0000000000000e+00; - - q[3][4]=1.0000000000000e+00; q[3][5]=2.9097352675564e+01; - q[3][6]=3.0713149855302e+02; q[3][7]=2.9877072751897e+01; - q[3][8]=5.9995408885817e+01; q[3][9]=2.2827096245105e+00; - q[3][10]=1.0000000000000e+00; q[3][11]=1.2183938185384e+00; - q[3][12]=1.0000000000000e+00; q[3][13]=2.6221929413096e+00; - q[3][14]=7.0708004204733e+00; q[3][15]=3.6327934317139e+01; - q[3][16]=1.4743408713748e+01; q[3][17]=1.0453246057102e+01; - q[3][18]=1.1165627147496e+01; q[3][19]=1.0000000000000e+00; - - q[4][5]=3.9599394038972e+01; q[4][6]=1.0000000000000e+00; - q[4][7]=1.6163581056674e+01; q[4][8]=7.4467985406234e+01; - q[4][9]=3.3018175376623e+01; q[4][10]=1.3500725995091e+01; - q[4][11]=1.0000000000000e+00; q[4][12]=3.2504095376923e+00; - q[4][13]=3.7264767083096e+01; q[4][14]=1.6454136037822e+01; - q[4][15]=1.4581783243113e+02; q[4][16]=9.4720031458442e+01; - q[4][17]=1.7684087896962e+01; q[4][18]=1.3409157685926e+02; - q[4][19]=1.0000000000000e+00; - - q[5][6]=1.6503249008836e+02; q[5][7]=3.5530760735494e+00; - q[5][8]=3.0652523140859e+02; q[5][9]=4.3905393139325e+00; - q[5][10]=2.0895470525345e+01; q[5][11]=2.4504076430724e+02; - q[5][12]=2.4931300477797e+01; q[5][13]=1.0059428264289e+01; - q[5][14]=7.2256314165467e+01; q[5][15]=2.8480937892158e+01; - q[5][16]=4.9962974409828e+01; q[5][17]=1.0000000000000e+00; - q[5][18]=2.0430790980529e+01; q[5][19]=9.9986289000676e+00; - - q[6][7]=1.4884496769963e+01; q[6][8]=2.5853576435567e+01; - q[6][9]=1.7418201388328e+00; q[6][10]=1.0000000000000e+00; - q[6][11]=1.6519126809071e+02; q[6][12]=1.0000000000000e+00; - q[6][13]=1.4067850525292e+00; q[6][14]=6.7547121641947e+00; - q[6][15]=2.8794794140840e+01; q[6][16]=7.8001372062558e+00; - q[6][17]=1.0000000000000e+00; q[6][18]=6.9067239183061e+00; - q[6][19]=1.1127702362585e+01; - - q[7][8]=1.0000000000000e+00; q[7][9]=3.1466649021550e+00; - q[7][10]=1.2699794194865e+00; q[7][11]=1.1962111069278e+01; - q[7][12]=1.0000000000000e+00; q[7][13]=1.0000000000000e+00; - q[7][14]=1.0000000000000e+00; q[7][15]=6.6277950574411e+01; - q[7][16]=5.8800079133028e+00; q[7][17]=5.7494182626674e+00; - q[7][18]=1.6887657206208e+00; q[7][19]=1.3320553471351e+00; - - q[8][9]=6.4536986087271e+00; q[8][10]=6.0472584534958e+00; - q[8][11]=6.7197196398961e+01; q[8][12]=6.2977633277779e+00; - q[8][13]=2.5347805183364e+01; q[8][14]=3.2089868698728e+01; - q[8][15]=4.0766987134407e+01; q[8][16]=2.3570850628539e+01; - q[8][17]=3.7286635325194e+00; q[8][18]=3.5270764890474e+02; - q[8][19]=1.0000000000000e+00; - - q[9][10]=1.7320653206333e+02; q[9][11]=1.0298655619743e+01; - q[9][12]=2.7262244199514e+02; q[9][13]=4.4561065036310e+01; - q[9][14]=1.0856482766156e+01; q[9][15]=2.5107659603898e+01; - q[9][16]=1.9391167162525e+02; q[9][17]=1.0000000000000e+00; - q[9][18]=1.3161329199391e+01; q[9][19]=6.4365086389428e+02; - - q[10][11]=7.8314019154706e+00; q[10][12]=2.8290920517725e+02; - q[10][13]=1.1371735519833e+02; q[10][14]=2.1105885757279e+01; - q[10][15]=3.8741359395934e+01; q[10][16]=6.6524559321657e+01; - q[10][17]=1.7071378554833e+01; q[10][18]=2.3234516108847e+01; - q[10][19]=4.8247261078055e+01; - - q[11][12]=4.8092094826036e+01; q[11][13]=3.3887559483420e+00; - q[11][14]=2.6368577564199e+01; q[11][15]=5.5679895711418e+01; - q[11][16]=7.1750284708933e+01; q[11][17]=1.2631893872825e+01; - q[11][18]=2.6932728996777e+01; q[11][19]=1.0000000000000e+00; - - q[12][13]=4.7798798034572e+01; q[12][14]=9.9165053447429e+00; - q[12][15]=5.8505442466161e+01; q[12][16]=2.7798190504760e+02; - q[12][17]=1.1427000119701e+01; q[12][18]=2.1029990530586e+01; - q[12][19]=2.0397078683768e+02; - - q[13][14]=9.1089574817139e+00; q[13][15]=3.3835737720574e+01; - q[13][16]=1.7815549567056e+01; q[13][17]=4.1272404968214e+00; - q[13][18]=2.4504156395152e+02; q[13][19]=3.3435675442163e+00; - - q[14][15]=8.9421193040709e+01; q[14][16]=6.7485067008375e+01; - q[14][17]=2.2161693733113e+00; q[14][18]=8.5338209390745e+00; - q[14][19]=4.3342126659660e+00; - - q[15][16]=3.1432036618746e+02; q[15][17]=2.0305343047059e+01; - q[15][18]=3.4167877957799e+01; q[15][19]=1.0000000000000e+00; - - q[16][17]=5.2559565123081e+00; q[16][18]=2.0382362288681e+01; - q[16][19]=1.0765527137500e+02; - - q[17][18]=1.3814733274637e+01; q[17][19]=2.8259139240676e+00; - - q[18][19]=1.0000000000000e+00; - - - /* amino acid frequencies */ - f[0]=0.072; f[1]=0.019; f[2]=0.039; f[3]=0.019; f[4]=0.006; - f[5]=0.025; f[6]=0.024; f[7]=0.056; f[8]=0.028; f[9]=0.088; - f[10]=0.168; f[11]=0.023; f[12]=0.054; f[13]=0.061; f[14]=0.054; - f[15]=0.072; f[16]=0.086; f[17]=0.029; f[18]=0.033; f[19]=0.043; -} - -void blosum62data(dmatrix q, double *f) -{ - /* - * BLOSUM62 model of amino acid evolution - * - * S. Henikoff and J. G. Henikoff. 1992. PNAS USA 89:10915-10919. - * - */ - - q[0][1]=7.3579038969751e-01; q[0][2]=4.8539105546575e-01; - q[0][3]=5.4316182089867e-01; q[0][4]=1.4599953104700e+00; - q[0][5]=1.1997057046020e+00; q[0][6]=1.1709490427999e+00; - q[0][7]=1.9558835749595e+00; q[0][8]=7.1624144499779e-01; - q[0][9]=6.0589900368677e-01; q[0][10]=8.0001653051838e-01; - q[0][11]=1.2952012667833e+00; q[0][12]=1.2537582666635e+00; - q[0][13]=4.9296467974759e-01; q[0][14]=1.1732759009239e+00; - q[0][15]=4.3250926870566e+00; q[0][16]=1.7291780194850e+00; - q[0][17]=4.6583936772479e-01; q[0][18]=7.1820669758623e-01; - q[0][19]=2.1877745220045e+00; - - q[1][2]=1.2974467051337e+00; q[1][3]=5.0096440855513e-01; - q[1][4]=2.2782657420895e-01; q[1][5]=3.0208336100636e+00; - q[1][6]=1.3605741904203e+00; q[1][7]=4.1876330851753e-01; - q[1][8]=1.4561411663360e+00; q[1][9]=2.3203644514174e-01; - q[1][10]=6.2271166969249e-01; q[1][11]=5.4111151414889e+00; - q[1][12]=9.8369298745695e-01; q[1][13]=3.7164469320875e-01; - q[1][14]=4.4813366171831e-01; q[1][15]=1.1227831042096e+00; - q[1][16]=9.1466595456337e-01; q[1][17]=4.2638231012175e-01; - q[1][18]=7.2051744121611e-01; q[1][19]=4.3838834377202e-01; - - q[2][3]=3.1801000482161e+00; q[2][4]=3.9735894989702e-01; - q[2][5]=1.8392161469920e+00; q[2][6]=1.2404885086396e+00; - q[2][7]=1.3558723444845e+00; q[2][8]=2.4145014342081e+00; - q[2][9]=2.8301732627800e-01; q[2][10]=2.1188815961519e-01; - q[2][11]=1.5931370434574e+00; q[2][12]=6.4844127878707e-01; - q[2][13]=3.5486124922252e-01; q[2][14]=4.9488704370192e-01; - q[2][15]=2.9041016564560e+00; q[2][16]=1.8981736345332e+00; - q[2][17]=1.9148204624678e-01; q[2][18]=5.3822251903674e-01; - q[2][19]=3.1285879799342e-01; - - q[3][4]=2.4083661480204e-01; q[3][5]=1.1909457033960e+00; - q[3][6]=3.7616252083685e+00; q[3][7]=7.9847324896839e-01; - q[3][8]=7.7814266402188e-01; q[3][9]=4.1855573246161e-01; - q[3][10]=2.1813157759360e-01; q[3][11]=1.0324479249521e+00; - q[3][12]=2.2262189795786e-01; q[3][13]=2.8173069420651e-01; - q[3][14]=7.3062827299842e-01; q[3][15]=1.5827541420653e+00; - q[3][16]=9.3418750943056e-01; q[3][17]=1.4534504627853e-01; - q[3][18]=2.6142220896504e-01; q[3][19]=2.5812928941763e-01; - - q[4][5]=3.2980150463028e-01; q[4][6]=1.4074889181440e-01; - q[4][7]=4.1820319228376e-01; q[4][8]=3.5405810983129e-01; - q[4][9]=7.7489402279418e-01; q[4][10]=8.3184264014158e-01; - q[4][11]=2.8507880090648e-01; q[4][12]=7.6768882347954e-01; - q[4][13]=4.4133747118660e-01; q[4][14]=3.5600849876863e-01; - q[4][15]=1.1971884150942e+00; q[4][16]=1.1198313585160e+00; - q[4][17]=5.2766441887169e-01; q[4][18]=4.7023773369610e-01; - q[4][19]=1.1163524786062e+00; - - q[5][6]=5.5289191779282e+00; q[5][7]=6.0984630538281e-01; - q[5][8]=2.4353411311401e+00; q[5][9]=2.3620245120365e-01; - q[5][10]=5.8073709318144e-01; q[5][11]=3.9452776745146e+00; - q[5][12]=2.4948960771127e+00; q[5][13]=1.4435695975031e-01; - q[5][14]=8.5857057567418e-01; q[5][15]=1.9348709245965e+00; - q[5][16]=1.2774802945956e+00; q[5][17]=7.5865380864172e-01; - q[5][18]=9.5898974285014e-01; q[5][19]=5.3078579012486e-01; - - q[6][7]=4.2357999217628e-01; q[6][8]=1.6268910569817e+00; - q[6][9]=1.8684804693170e-01; q[6][10]=3.7262517508685e-01; - q[6][11]=2.8024271516787e+00; q[6][12]=5.5541539747043e-01; - q[6][13]=2.9140908416530e-01; q[6][14]=9.2656393484598e-01; - q[6][15]=1.7698932389373e+00; q[6][16]=1.0710972360073e+00; - q[6][17]=4.0763564893830e-01; q[6][18]=5.9671930034577e-01; - q[6][19]=5.2425384633796e-01; - - q[7][8]=5.3985912495418e-01; q[7][9]=1.8929629237636e-01; - q[7][10]=2.1772115923623e-01; q[7][11]=7.5204244030271e-01; - q[7][12]=4.5943617357855e-01; q[7][13]=3.6816646445253e-01; - q[7][14]=5.0408659952683e-01; q[7][15]=1.5093262532236e+00; - q[7][16]=6.4143601140497e-01; q[7][17]=5.0835892463812e-01; - q[7][18]=3.0805573703500e-01; q[7][19]=2.5334079019018e-01; - - q[8][9]=2.5271844788492e-01; q[8][10]=3.4807220979697e-01; - q[8][11]=1.0225070358890e+00; q[8][12]=9.8431152535870e-01; - q[8][13]=7.1453370392764e-01; q[8][14]=5.2700733915060e-01; - q[8][15]=1.1170297629105e+00; q[8][16]=5.8540709022472e-01; - q[8][17]=3.0124860078016e-01; q[8][18]=4.2189539693890e+00; - q[8][19]=2.0155597175031e-01; - - q[9][10]=3.8909637733035e+00; q[9][11]=4.0619358664202e-01; - q[9][12]=3.3647977631042e+00; q[9][13]=1.5173593259539e+00; - q[9][14]=3.8835540920564e-01; q[9][15]=3.5754441245967e-01; - q[9][16]=1.1790911972601e+00; q[9][17]=3.4198578754023e-01; - q[9][18]=6.7461709322842e-01; q[9][19]=8.3118394054582e+00; - - q[10][11]=4.4557027426059e-01; q[10][12]=6.0305593795716e+00; - q[10][13]=2.0648397032375e+00; q[10][14]=3.7455568747097e-01; - q[10][15]=3.5296918452729e-01; q[10][16]=9.1525985769421e-01; - q[10][17]=6.9147463459998e-01; q[10][18]=8.1124585632307e-01; - q[10][19]=2.2314056889131e+00; - - q[11][12]=1.0730611843319e+00; q[11][13]=2.6692475051102e-01; - q[11][14]=1.0473834507215e+00; q[11][15]=1.7521659178195e+00; - q[11][16]=1.3038752007987e+00; q[11][17]=3.3224304063396e-01; - q[11][18]=7.1799348690032e-01; q[11][19]=4.9813847530407e-01; - - q[12][13]=1.7738551688305e+00; q[12][14]=4.5412362510273e-01; - q[12][15]=9.1872341574605e-01; q[12][16]=1.4885480537218e+00; - q[12][17]=8.8810109815193e-01; q[12][18]=9.5168216224591e-01; - q[12][19]=2.5758507553153e+00; - - q[13][14]=2.3359790962888e-01; q[13][15]=5.4002764482413e-01; - q[13][16]=4.8820611879305e-01; q[13][17]=2.0743248934965e+00; - q[13][18]=6.7472604308008e+00; q[13][19]=8.3811961017754e-01; - - q[14][15]=1.1691295777157e+00; q[14][16]=1.0054516831488e+00; - q[14][17]=2.5221483002727e-01; q[14][18]=3.6940531935451e-01; - q[14][19]=4.9690841067567e-01; - - q[15][16]=5.1515562922704e+00; q[15][17]=3.8792562209837e-01; - q[15][18]=7.9675152076106e-01; q[15][19]=5.6192545744165e-01; - - q[16][17]=5.1312812689059e-01; q[16][18]=8.0101024319939e-01; - q[16][19]=2.2530740511763e+00; - - q[17][18]=4.0544190065580e+00; q[17][19]=2.6650873142646e-01; - - q[18][19]=1.0000000000000e+00; - - - f[0]=0.074; f[1]=0.052; f[2]=0.045; f[3]=0.054; - f[4]=0.025; f[5]=0.034; f[6]=0.054; f[7]=0.074; - f[8]=0.026; f[9]=0.068; f[10]=0.099; f[11]=0.058; - f[12]=0.025; f[13]=0.047; f[14]=0.039; f[15]=0.057; - f[16]=0.051; f[17]=0.013; f[18]=0.032; f[19]=0.073; -} - - - -void vtmvdata(dmatrix q, double *f) -{ - /* - * variable time (VT) model for amino acid evolution - * Mueller, T. and Vingron, M. (1999) - * "Modeling Amino Acid Replacement" - * Journal of Comp. Biology - */ - -/* amino acid frequencies */ - -f[0]=0.078837 ; -f[1]=0.051238 ; -f[2]=0.042313 ; -f[3]=0.053066 ; -f[4]=0.015175 ; -f[5]=0.036713 ; -f[6]=0.061924 ; -f[7]=0.070852 ; -f[8]=0.023082 ; -f[9]=0.062056 ; -f[10]=0.096371 ; -f[11]=0.057324 ; -f[12]=0.023771 ; -f[13]=0.043296 ; -f[14]=0.043911 ; -f[15]=0.063403 ; -f[16]=0.055897 ; -f[17]=0.013272 ; -f[18]=0.034399 ; -f[19]=0.073101 ; - - -q[0][1] = 0.233108 ; -q[0][2] = 0.199097 ; -q[0][3] = 0.265145 ; -q[0][4] = 0.227333 ; -q[0][5] = 0.310084 ; -q[0][6] = 0.567957 ; -q[0][7] = 0.876213 ; -q[0][8] = 0.078692 ; -q[0][9] = 0.222972 ; -q[0][10] = 0.424630 ; -q[0][11] = 0.393245 ; -q[0][12] = 0.211550 ; -q[0][13] = 0.116646 ; -q[0][14] = 0.399143 ; -q[0][15] = 1.817198 ; -q[0][16] = 0.877877 ; -q[0][17] = 0.030309 ; -q[0][18] = 0.087061 ; -q[0][19] = 1.230985 ; - -q[1][2] = 0.210797 ; -q[1][3] = 0.105191 ; -q[1][4] = 0.031726 ; -q[1][5] = 0.493763 ; -q[1][6] = 0.255240 ; -q[1][7] = 0.156945 ; -q[1][8] = 0.213164 ; -q[1][9] = 0.081510 ; -q[1][10] = 0.192364 ; -q[1][11] = 1.755838 ; -q[1][12] = 0.087930 ; -q[1][13] = 0.042569 ; -q[1][14] = 0.128480 ; -q[1][15] = 0.292327 ; -q[1][16] = 0.204109 ; -q[1][17] = 0.046417 ; -q[1][18] = 0.097010 ; -q[1][19] = 0.113146 ; - -q[2][3] = 0.883422 ; -q[2][4] = 0.027495 ; -q[2][5] = 0.275700 ; -q[2][6] = 0.270417 ; -q[2][7] = 0.362028 ; -q[2][8] = 0.290006 ; -q[2][9] = 0.087225 ; -q[2][10] = 0.069245 ; -q[2][11] = 0.503060 ; -q[2][12] = 0.057420 ; -q[2][13] = 0.039769 ; -q[2][14] = 0.083956 ; -q[2][15] = 0.847049 ; -q[2][16] = 0.471268 ; -q[2][17] = 0.010459 ; -q[2][18] = 0.093268 ; -q[2][19] = 0.049824 ; - -q[3][4] = 0.010313 ; -q[3][5] = 0.205842 ; -q[3][6] = 1.599461 ; -q[3][7] = 0.311718 ; -q[3][8] = 0.134252 ; -q[3][9] = 0.011720 ; -q[3][10] = 0.060863 ; -q[3][11] = 0.261101 ; -q[3][12] = 0.012182 ; -q[3][13] = 0.016577 ; -q[3][14] = 0.160063 ; -q[3][15] = 0.461519 ; -q[3][16] = 0.178197 ; -q[3][17] = 0.011393 ; -q[3][18] = 0.051664 ; -q[3][19] = 0.048769 ; - -q[4][5] = 0.004315 ; -q[4][6] = 0.005321 ; -q[4][7] = 0.050876 ; -q[4][8] = 0.016695 ; -q[4][9] = 0.046398 ; -q[4][10] = 0.091709 ; -q[4][11] = 0.004067 ; -q[4][12] = 0.023690 ; -q[4][13] = 0.051127 ; -q[4][14] = 0.011137 ; -q[4][15] = 0.175270 ; -q[4][16] = 0.079511 ; -q[4][17] = 0.007732 ; -q[4][18] = 0.042823 ; -q[4][19] = 0.163831 ; - -q[5][6] = 0.960976 ; -q[5][7] = 0.128660 ; -q[5][8] = 0.315521 ; -q[5][9] = 0.054602 ; -q[5][10] = 0.243530 ; -q[5][11] = 0.738208 ; -q[5][12] = 0.120801 ; -q[5][13] = 0.026235 ; -q[5][14] = 0.156570 ; -q[5][15] = 0.358017 ; -q[5][16] = 0.248992 ; -q[5][17] = 0.021248 ; -q[5][18] = 0.062544 ; -q[5][19] = 0.112027 ; - -q[6][7] = 0.250447 ; -q[6][8] = 0.104458 ; -q[6][9] = 0.046589 ; -q[6][10] = 0.151924 ; -q[6][11] = 0.888630 ; -q[6][12] = 0.058643 ; -q[6][13] = 0.028168 ; -q[6][14] = 0.205134 ; -q[6][15] = 0.406035 ; -q[6][16] = 0.321028 ; -q[6][17] = 0.018844 ; -q[6][18] = 0.055200 ; -q[6][19] = 0.205868 ; - -q[7][8] = 0.058131 ; -q[7][9] = 0.051089 ; -q[7][10] = 0.087056 ; -q[7][11] = 0.193243 ; -q[7][12] = 0.046560 ; -q[7][13] = 0.050143 ; -q[7][14] = 0.124492 ; -q[7][15] = 0.612843 ; -q[7][16] = 0.136266 ; -q[7][17] = 0.023990 ; -q[7][18] = 0.037568 ; -q[7][19] = 0.082579 ; - -q[8][9] = 0.020039 ; -q[8][10] = 0.103552 ; -q[8][11] = 0.153323 ; -q[8][12] = 0.021157 ; -q[8][13] = 0.079807 ; -q[8][14] = 0.078892 ; -q[8][15] = 0.167406 ; -q[8][16] = 0.101117 ; -q[8][17] = 0.020009 ; -q[8][18] = 0.286027 ; -q[8][19] = 0.068575 ; - -q[9][10] = 2.089890 ; -q[9][11] = 0.093181 ; -q[9][12] = 0.493845 ; -q[9][13] = 0.321020 ; -q[9][14] = 0.054797 ; -q[9][15] = 0.081567 ; -q[9][16] = 0.376588 ; -q[9][17] = 0.034954 ; -q[9][18] = 0.086237 ; -q[9][19] = 3.654430 ; - -q[10][11] = 0.201204 ; -q[10][12] = 1.105667 ; -q[10][13] = 0.946499 ; -q[10][14] = 0.169784 ; -q[10][15] = 0.214977 ; -q[10][16] = 0.243227 ; -q[10][17] = 0.083439 ; -q[10][18] = 0.189842 ; -q[10][19] = 1.337571 ; - -q[11][12] = 0.096474 ; -q[11][13] = 0.038261 ; -q[11][14] = 0.212302 ; -q[11][15] = 0.400072 ; -q[11][16] = 0.446646 ; -q[11][17] = 0.023321 ; -q[11][18] = 0.068689 ; -q[11][19] = 0.144587 ; - -q[12][13] = 0.173052 ; -q[12][14] = 0.010363 ; -q[12][15] = 0.090515 ; -q[12][16] = 0.184609 ; -q[12][17] = 0.022019 ; -q[12][18] = 0.073223 ; -q[12][19] = 0.307309 ; - -q[13][14] = 0.042564 ; -q[13][15] = 0.138119 ; -q[13][16] = 0.085870 ; -q[13][17] = 0.128050 ; -q[13][18] = 0.898663 ; -q[13][19] = 0.247329 ; - -q[14][15] = 0.430431 ; -q[14][16] = 0.207143 ; -q[14][17] = 0.014584 ; -q[14][18] = 0.032043 ; -q[14][19] = 0.129315 ; - -q[15][16] = 1.767766 ; -q[15][17] = 0.035933 ; -q[15][18] = 0.121979 ; -q[15][19] = 0.127700 ; - -q[16][17] = 0.020437 ; -q[16][18] = 0.094617 ; -q[16][19] = 0.740372 ; - -q[17][18] = 0.124746 ; -q[17][19] = 0.022134 ; - -q[18][19] = 0.125733 ; - -} - - -/* - * WAG matrix: Simon Whelan and Nick Goldman - * - */ - -void wagdata(dmatrix q, double *f) -{ - /* - * WAG model of amino acid evolution - * - * S. Whelan and N. Goldman. 2000. In prep. - * - * presented at the MASAMB-X workshop in Cambridge - * - * Whelan, S., and N. Goldman. 2000. - * The WAG amino acid rate matrix. - * Manuscript in prep. - */ - - /* Q matrix */ - q[0][1] = 0.610810; q[0][2] = 0.569079; - q[0][3] = 0.821500; q[0][4] = 1.141050; - q[0][5] = 1.011980; q[0][6] = 1.756410; - q[0][7] = 1.572160; q[0][8] = 0.354813; - q[0][9] = 0.219023; q[0][10] = 0.443935; - q[0][11] = 1.005440; q[0][12] = 0.989475; - q[0][13] = 0.233492; q[0][14] = 1.594890; - q[0][15] = 3.733380; q[0][16] = 2.349220; - q[0][17] = 0.125227; q[0][18] = 0.268987; - q[0][19] = 2.221870; - - q[1][2] = 0.711690; q[1][3] = 0.165074; - q[1][4] = 0.585809; q[1][5] = 3.360330; - q[1][6] = 0.488649; q[1][7] = 0.650469; - q[1][8] = 2.362040; q[1][9] = 0.206722; - q[1][10] = 0.551450; q[1][11] = 5.925170; - q[1][12] = 0.758446; q[1][13] = 0.116821; - q[1][14] = 0.753467; q[1][15] = 1.357640; - q[1][16] = 0.613776; q[1][17] = 1.294610; - q[1][18] = 0.423612; q[1][19] = 0.280336; - - q[2][3] = 6.013660; q[2][4] = 0.296524; - q[2][5] = 1.716740; q[2][6] = 1.056790; - q[2][7] = 1.253910; q[2][8] = 4.378930; - q[2][9] = 0.615636; q[2][10] = 0.147156; - q[2][11] = 3.334390; q[2][12] = 0.224747; - q[2][13] = 0.110793; q[2][14] = 0.217538; - q[2][15] = 4.394450; q[2][16] = 2.257930; - q[2][17] = 0.078463; q[2][18] = 1.208560; - q[2][19] = 0.221176; - - q[3][4] = 0.033379; q[3][5] = 0.691268; - q[3][6] = 6.833400; q[3][7] = 0.961142; - q[3][8] = 1.032910; q[3][9] = 0.043523; - q[3][10] = 0.093930; q[3][11] = 0.533362; - q[3][12] = 0.116813; q[3][13] = 0.052004; - q[3][14] = 0.472601; q[3][15] = 1.192810; - q[3][16] = 0.417372; q[3][17] = 0.146348; - q[3][18] = 0.363243; q[3][19] = 0.169417; - - q[4][5] = 0.109261; q[4][6] = 0.023920; - q[4][7] = 0.341086; q[4][8] = 0.275403; - q[4][9] = 0.189890; q[4][10] = 0.428414; - q[4][11] = 0.083649; q[4][12] = 0.437393; - q[4][13] = 0.441300; q[4][14] = 0.122303; - q[4][15] = 1.560590; q[4][16] = 0.570186; - q[4][17] = 0.795736; q[4][18] = 0.604634; - q[4][19] = 1.114570; - - q[5][6] = 6.048790; q[5][7] = 0.366510; - q[5][8] = 4.749460; q[5][9] = 0.131046; - q[5][10] = 0.964886; q[5][11] = 4.308310; - q[5][12] = 1.705070; q[5][13] = 0.110744; - q[5][14] = 1.036370; q[5][15] = 1.141210; - q[5][16] = 0.954144; q[5][17] = 0.243615; - q[5][18] = 0.252457; q[5][19] = 0.333890; - - q[6][7] = 0.630832; q[6][8] = 0.635025; - q[6][9] = 0.141320; q[6][10] = 0.172579; - q[6][11] = 2.867580; q[6][12] = 0.353912; - q[6][13] = 0.092310; q[6][14] = 0.755791; - q[6][15] = 0.782467; q[6][16] = 0.914814; - q[6][17] = 0.172682; q[6][18] = 0.217549; - q[6][19] = 0.655045; - - q[7][8] = 0.276379; q[7][9] = 0.034151; - q[7][10] = 0.068651; q[7][11] = 0.415992; - q[7][12] = 0.194220; q[7][13] = 0.055288; - q[7][14] = 0.273149; q[7][15] = 1.486700; - q[7][16] = 0.251477; q[7][17] = 0.374321; - q[7][18] = 0.114187; q[7][19] = 0.209108; - - q[8][9] = 0.152215; q[8][10] = 0.555096; - q[8][11] = 0.992083; q[8][12] = 0.450867; - q[8][13] = 0.756080; q[8][14] = 0.771387; - q[8][15] = 0.822459; q[8][16] = 0.525511; - q[8][17] = 0.289998; q[8][18] = 4.290350; - q[8][19] = 0.131869; - - q[9][10] = 3.517820; q[9][11] = 0.360574; - q[9][12] = 4.714220; q[9][13] = 1.177640; - q[9][14] = 0.111502; q[9][15] = 0.353443; - q[9][16] = 1.615050; q[9][17] = 0.234326; - q[9][18] = 0.468951; q[9][19] = 8.659740; - - q[10][11] = 0.287583; q[10][12] = 5.375250; - q[10][13] = 2.348200; q[10][14] = 0.462018; - q[10][15] = 0.382421; q[10][16] = 0.364222; - q[10][17] = 0.740259; q[10][18] = 0.443205; - q[10][19] = 1.997370; - - q[11][12] = 1.032220; q[11][13] = 0.098843; - q[11][14] = 0.619503; q[11][15] = 1.073780; - q[11][16] = 1.537920; q[11][17] = 0.152232; - q[11][18] = 0.147411; q[11][19] = 0.342012; - - q[12][13] = 1.320870; q[12][14] = 0.194864; - q[12][15] = 0.556353; q[12][16] = 1.681970; - q[12][17] = 0.570369; q[12][18] = 0.473810; - q[12][19] = 2.282020; - - q[13][14] = 0.179896; q[13][15] = 0.606814; - q[13][16] = 0.191467; q[13][17] = 1.699780; - q[13][18] = 7.154480; q[13][19] = 0.725096; - - q[14][15] = 1.786490; q[14][16] = 0.885349; - q[14][17] = 0.156619; q[14][18] = 0.239607; - q[14][19] = 0.351250; - - q[15][16] = 4.847130; q[15][17] = 0.578784; - q[15][18] = 0.872519; q[15][19] = 0.258861; - - q[16][17] = 0.126678; q[16][18] = 0.325490; - q[16][19] = 1.547670; - - q[17][18] = 2.763540; q[17][19] = 0.409817; - - q[18][19] = 0.347826; - - /* original frequencies */ - f[ 0] = 0.0866; - f[ 1] = 0.0440; - f[ 2] = 0.0391; - f[ 3] = 0.0570; - f[ 4] = 0.0193; - f[ 5] = 0.0367; - f[ 6] = 0.0581; - f[ 7] = 0.0833; - f[ 8] = 0.0244; - f[ 9] = 0.0485; - f[10] = 0.0862; - f[11] = 0.0620; - f[12] = 0.0195; - f[13] = 0.0384; - f[14] = 0.0458; - f[15] = 0.0695; - f[16] = 0.0610; - f[17] = 0.0144; - f[18] = 0.0353; - f[19] = 0.0709; -} - -void cprev45data(dmatrix q, double *f) -{ - /* cpREV45 model of amino acid evolution - * Adachi, J., P.J. Waddell, W. Martin, and M. Hasegawa. 2000. - * J. Mol. Evol. 50:348-358 - * (reconstructed from 45 chloroplast genomes) - */ - q[0][1] = 105; q[0][2] = 227; - q[0][3] = 175; q[0][4] = 669; - q[0][5] = 157; q[0][6] = 499; - q[0][7] = 665; q[0][8] = 66; - q[0][9] = 145; q[0][10] = 197; - q[0][11] = 236; q[0][12] = 185; - q[0][13] = 68; q[0][14] = 490; - q[0][15] = 2440; q[0][16] = 1340; - q[0][17] = 14; q[0][18] = 56; - q[0][19] = 968; - - q[1][2] = 357; q[1][3] = 43; - q[1][4] = 823; q[1][5] = 1745; - q[1][6] = 152; q[1][7] = 243; - q[1][8] = 715; q[1][9] = 136; - q[1][10] = 203; q[1][11] = 4482; - q[1][12] = 125; q[1][13] = 53; - q[1][14] = 87; q[1][15] = 385; - q[1][16] = 314; q[1][17] = 230; - q[1][18] = 323; q[1][19] = 92; - - q[2][3] = 4435; q[2][4] = 538; - q[2][5] = 768; q[2][6] = 1055; - q[2][7] = 653; q[2][8] = 1405; - q[2][9] = 168; q[2][10] = 113; - q[2][11] = 2430; q[2][12] = 61; - q[2][13] = 97; q[2][14] = 173; - q[2][15] = 2085; q[2][16] = 1393; - q[2][17] = 40; q[2][18] = 754; - q[2][19] = 83; - - q[3][4] = 10; q[3][5] = 400; - q[3][6] = 3691; q[3][7] = 431; - q[3][8] = 331; q[3][9] = 10; - q[3][10] = 10; q[3][11] = 412; - q[3][12] = 47; q[3][13] = 22; - q[3][14] = 170; q[3][15] = 590; - q[3][16] = 266; q[3][17] = 18; - q[3][18] = 281; q[3][19] = 75; - - q[4][5] = 10; q[4][6] = 10; - q[4][7] = 303; q[4][8] = 441; - q[4][9] = 280; q[4][10] = 396; - q[4][11] = 48; q[4][12] = 159; - q[4][13] = 726; q[4][14] = 285; - q[4][15] = 2331; q[4][16] = 576; - q[4][17] = 435; q[4][18] = 1466; - q[4][19] = 592; - - q[5][6] = 3122; q[5][7] = 133; - q[5][8] = 1269; q[5][9] = 92; - q[5][10] = 286; q[5][11] = 3313; - q[5][12] = 202; q[5][13] = 10; - q[5][14] = 323; q[5][15] = 396; - q[5][16] = 241; q[5][17] = 53; - q[5][18] = 391; q[5][19] = 54; - - q[6][7] = 379; q[6][8] = 162; - q[6][9] = 148; q[6][10] = 82; - q[6][11] = 2629; q[6][12] = 113; - q[6][13] = 145; q[6][14] = 185; - q[6][15] = 568; q[6][16] = 369; - q[6][17] = 63; q[6][18] = 142; - q[6][19] = 200; - - q[7][8] = 19; q[7][9] = 40; - q[7][10] = 20; q[7][11] = 263; - q[7][12] = 21; q[7][13] = 25; - q[7][14] = 28; q[7][15] = 691; - q[7][16] = 92; q[7][17] = 82; - q[7][18] = 10; q[7][19] = 91; - - q[8][9] = 29; q[8][10] = 66; - q[8][11] = 305; q[8][12] = 10; - q[8][13] = 127; q[8][14] = 152; - q[8][15] = 303; q[8][16] = 32; - q[8][17] = 69; q[8][18] = 1971; - q[8][19] = 25; - - q[9][10] = 1745; q[9][11] = 345; - q[9][12] = 1772; q[9][13] = 454; - q[9][14] = 117; q[9][15] = 216; - q[9][16] = 1040; q[9][17] = 42; - q[9][18] = 89; q[9][19] = 4797; - - q[10][11] = 218; q[10][12] = 1351; - q[10][13] = 1268; q[10][14] = 219; - q[10][15] = 516; q[10][16] = 156; - q[10][17] = 159; q[10][18] = 189; - q[10][19] = 865; - - q[11][12] = 193; q[11][13] = 72; - q[11][14] = 302; q[11][15] = 868; - q[11][16] = 918; q[11][17] = 10; - q[11][18] = 247; q[11][19] = 249; - - q[12][13] = 327; q[12][14] = 100; - q[12][15] = 93; q[12][16] = 645; - q[12][17] = 86; q[12][18] = 215; - q[12][19] = 475; - - q[13][14] = 43; q[13][15] = 487; - q[13][16] = 148; q[13][17] = 468; - q[13][18] = 2370; q[13][19] = 317; - - q[14][15] = 1202; q[14][16] = 260; - q[14][17] = 49; q[14][18] = 97; - q[14][19] = 122; - - q[15][16] = 2151; q[15][17] = 73; - q[15][18] = 522; q[15][19] = 167; - - q[16][17] = 29; q[16][18] = 71; - q[16][19] = 760; - - q[17][18] = 346; q[17][19] = 10; - - q[18][19] = 119; - - f[0] = 0.076; - f[1] = 0.062; - f[2] = 0.041; - f[3] = 0.037; - f[4] = 0.009; - f[5] = 0.038; - f[6] = 0.049; - f[7] = 0.084; - f[8] = 0.025; - f[9] = 0.081; - f[10] = 0.101; - f[11] = 0.050; - f[12] = 0.022; - f[13] = 0.051; - f[14] = 0.043; - f[15] = 0.062; - f[16] = 0.054; - f[17] = 0.018; - f[18] = 0.031; - f[19] = 0.066; -} - diff --git a/forester/archive/RIO/others/puzzle_dqo/src/outdist b/forester/archive/RIO/others/puzzle_dqo/src/outdist deleted file mode 100644 index f5728a1..0000000 --- a/forester/archive/RIO/others/puzzle_dqo/src/outdist +++ /dev/null @@ -1,4 +0,0 @@ -RECA_NEIMU 0.01095 -O86384/1-2 8.99866 -RECA_NEIPH 0.02202 - diff --git a/forester/archive/RIO/others/puzzle_dqo/src/ppuzzle.h b/forester/archive/RIO/others/puzzle_dqo/src/ppuzzle.h deleted file mode 100644 index 2007467..0000000 --- a/forester/archive/RIO/others/puzzle_dqo/src/ppuzzle.h +++ /dev/null @@ -1,274 +0,0 @@ -/* - * ppuzzle.h - * - * - * Part of TREE-PUZZLE 5.0 (June 2000) - * - * (c) 1999-2000 by Heiko A. Schmidt, Korbinian Strimmer, - * M. Vingron, and Arndt von Haeseler - * (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler - * - * All parts of the source except where indicated are distributed under - * the GNU public licence. See http://www.opensource.org for details. - */ - - -#ifndef _PPUZZLE_ -#define _PPUZZLE_ - -#include "puzzle.h" -#include "util.h" -#include "ml.h" -#include "sched.h" - -extern int PP_IamSlave; -extern int PP_IamMaster; - -#ifdef PARALLEL -# ifdef SEQUENTIAL -# undef SEQUENTIAL -# endif -# define SEQUENTIAL 0 -# undef PARALLEL -# define PARALLEL 1 -# include "mpi.h" -#else -# ifdef SEQUENTIAL -# undef SEQUENTIAL -# endif -# define SEQUENTIAL 1 -# define PARALLEL 0 -# undef PVERBOSE -# undef PVERBOSE1 -# undef PVERBOSE2 -# undef PVERBOSE3 -#endif - -/* PVERBOSE3 includes PVERBOSE2 includes PVERBOSE1 */ -/* PVERBOSE1 is default (PVERBOSE) */ - -#ifdef PVERBOSE -# undef PVERBOSE1 -# define PVERBOSE1 -#endif -#ifdef PVERBOSE3 -# undef PVERBOSE2 -# define PVERBOSE2 -#endif -#ifdef PVERBOSE2 -# undef PVERBOSE1 -# define PVERBOSE1 -#endif - -#if PARALLEL -# define PP_DONE 0 /* Finished M->S */ -# define PP_SIZES 1 /* Array sizes needed M->S */ -# define PP_DATA 2 /* Data Arrays M->S */ - -# define PP_ALLQUARTS 3 /* All Quartets M->S */ - -# define PP_DOQUART 4 /* do 4Specs M->S */ -# define PP_DOQUARTX2 5 /* do 4Specs + X^2 M->S */ -# define PP_QUART 6 /* quartet back S->M */ -# define PP_QUARTX2 7 /* quartet + X^2 back S->M */ - -# define PP_DOQUARTBLOCKSPECS 8 /* do block Specs M->S */ -# define PP_DOQUARTBLOCK 9 /* do block of Quarts M->S */ -# define PP_QUARTBLOCKSPECS 10 /* block Specs S->M */ -# define PP_QUARTBLOCK 11 /* block of Quarts S->M */ - -# define PP_DOPUZZLE 12 /* do Puzzling step M->S */ -# define PP_PUZZLE 13 /* Puzzling tree back S->M */ -# define PP_DOPUZZLEBLOCK 14 /* do Puzzling block M->S */ -# define PP_DOPUZZLEBLOCKSPECS 15 /* do Puzzling block M->S */ -# define PP_PUZZLEBLOCK 16 /* Puzzling block S->M */ -# define PP_PUZZLEBLOCKSPECS 17 /* Puzzling block S->M */ - -# define PP_STATS 18 /* Slave Statistics S->M */ - -# define PP_WAIT 18 /* waiting for work S->M */ -# define PP_TEST 100 /* testing */ - -# define PERMUTQUEUESIZE 100 -# define QUARTQUEUESIZE 100 - - extern int PP_IamMaster; - extern int PP_IamSlave; - extern int PP_Myid; - extern int PP_MyMaster; - extern int PP_NumProcs; - extern MPI_Comm PP_Comm; -#endif /* PARALLEL */ - -extern int *permutsent, - *permutrecved, - *quartsent, - *quartrecved, - *doquartsent, - *doquartrecved, - *splitsent, - *splitrecved, - *permutsentn, - *permutrecvedn, - *quartsentn, - *quartrecvedn, - *doquartsentn, - *doquartrecvedn, - *splitsentn, - *splitrecvedn; -extern double *walltimes, - *cputimes; -extern double *fullwalltimes, - *fullcputimes; -extern double *altwalltimes, - *altcputimes; - -extern int PP_permutsent, - PP_permutrecved, - PP_quartsent, - PP_quartrecved, - PP_doquartsent, - PP_doquartrecved, - PP_splitsent, - PP_splitrecved, - PP_permutsentn, - PP_permutrecvedn, - PP_quartsentn, - PP_quartrecvedn, - PP_doquartsentn, - PP_doquartrecvedn, - PP_splitsentn, - PP_splitrecvedn; - -extern double PP_starttime, - PP_stoptime, - PP_inittime, - PP_paramcomptime, - PP_paramsendtime, - PP_quartcomptime, - PP_quartsendtime, - PP_puzzletime, - PP_treetime; - -void num2quart(uli qnum, int *a, int *b, int *c, int *d); -uli numquarts(int maxspc); -uli quart2num (int a, int b, int c, int d); - -int slave_main(int argc, char *argv[]); -void PP_Init(int *argc, char **argv[]); -void PP_Finalize(); -void PP_Printerror(FILE *of, int id, int err); -void PP_do_puzzling(ivector trueID); - -void PP_RecvDoQuart(int *a, - int *b, - int *c, - int *d, - int *approx); -void PP_SendDoQuart(int dest, - int a, - int b, - int c, - int d, - int approx); -void PP_RecvQuart(int *a, - int *b, - int *c, - int *d, - double *d1, - double *d2, - double *d3, - int *approx); -void PP_SendQuart(int a, - int b, - int c, - int d, - double d1, - double d2, - double d3, - int approx); -void PP_SendSizes(int mspc, - int msite, - int ncats, - int nptrn, - int rad, - int outgr, - double frconst, - int rseed); -void PP_RecvSizes(int *mspc, - int *msite, - int *ncats, - int *nptrn, - int *rad, - int *outgr, - double *frconst, - int *rseed); -void PP_RecvData( - cmatrix Seqpat, /* cmatrix (Maxspc x Numptrn) */ - ivector Alias, /* ivector (Maxsite) */ - ivector Weight, /* ivector (Numptrn) */ - ivector constpat, - dvector Rates, /* dvector (numcats) */ - dvector Eval, /* dvector (tpmradix) */ - dvector Freqtpm, - dmatrix Evec, /* dmatrix (tpmradix x tpmradix) */ - dmatrix Ievc, - dmatrix iexp, - dmatrix Distanmat, /* dmatrix (Maxspc x Maxspc) */ - dcube ltprobr); /* dcube (numcats x tpmradix x tpmradix) */ -void PP_SendData( - cmatrix Seqpat, /* cmatrix (Maxspc x Numptrn) */ - ivector Alias, /* ivector (Maxsite) */ - ivector Weight, /* ivector (Numptrn) */ - ivector constpat, - dvector Rates, /* dvector (numcats) */ - dvector Eval, /* dvector (tpmradix) */ - dvector Freqtpm, - dmatrix Evec, /* dmatrix (tpmradix x tpmradix) */ - dmatrix Ievc, - dmatrix iexp, - dmatrix Distanmat, /* dmatrix (Maxspc x Maxspc) */ - dcube ltprobr); /* dcube (numcats x tpmradix x tpmradix) */ -void PP_SendAllQuarts(unsigned long Numquartets, - unsigned char *quartetinfo); -void PP_RecvAllQuarts(int taxa, - unsigned long *Numquartets, - unsigned char *quartetinfo); - -void PP_SendDoQuartBlock(int dest, uli firstq, uli amount, int approx); -void PP_RecvDoQuartBlock(uli *firstq, uli *amount, uli **bq, int *approx); -void PP_SendQuartBlock(uli startq, - uli numofq, - unsigned char *quartetinfo, - uli numofbq, - uli *bq, - int approx); -void PP_RecvQuartBlock(int slave, - uli *startq, - uli *numofq, - unsigned char *quartetinfo, - int *approx); - -void PP_SendPermut(int dest, - int taxa, - ivector permut); -void PP_RecvPermut(int taxa, - ivector permut); -void PP_SendDoPermutBlock(uli puzzlings); -void PP_RecvDoPermutBlock(uli *taxa); - -void PP_SendSplits(int taxa, - cmatrix biparts); -void PP_RecvSplits(int taxa, - cmatrix biparts); -void PP_SendDone(); -void PP_RecvDone(); - -int PP_emptyslave(); -void PP_putslave(int sl); -int PP_getslave(); - -void PP_cmpd(int rank, double a, double b); -void PP_cmpi(int rank, int a, int b); - -#endif /* _PPUZZLE_ */ diff --git a/forester/archive/RIO/others/puzzle_dqo/src/puzzle.h b/forester/archive/RIO/others/puzzle_dqo/src/puzzle.h deleted file mode 100644 index 8165b1a..0000000 --- a/forester/archive/RIO/others/puzzle_dqo/src/puzzle.h +++ /dev/null @@ -1,493 +0,0 @@ -/* - * puzzle.h - * - * - * Part of TREE-PUZZLE 5.0 (June 2000) - * - * (c) 1999-2000 by Heiko A. Schmidt, Korbinian Strimmer, - * M. Vingron, and Arndt von Haeseler - * (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler - * - * All parts of the source except where indicated are distributed under - * the GNU public licence. See http://www.opensource.org for details. - */ - - -#ifndef _PUZZLE_ -#define _PUZZLE_ - -#ifndef PACKAGE -# define PACKAGE "tree-puzzle" -#endif -#ifndef VERSION -# define VERSION "5.0" -#endif -#define DATE "October 2000" - -/* prototypes */ -#include -#include -#include -#include -#include -#include -#include -#include -#include "util.h" -#include "ml.h" -#ifdef PARALLEL -# include "ppuzzle.h" -#endif - -#define STDOUT stdout -#ifndef PARALLEL /* because printf() runs significantly faster */ - /* than fprintf(stdout) on an Apple McIntosh */ - /* (HS) */ -# define FPRINTF printf -# define STDOUTFILE -#else -# define FPRINTF fprintf -# define STDOUTFILE STDOUT, -#endif - -/* filenames */ -# define FILENAMELENTH 2048 - - -# define INFILEDEFAULT "infile" -# define OUTFILEDEFAULT "outfile" -# define TREEFILEDEFAULT "outtree" -# define INTREEDEFAULT "intree" -# define DISTANCESDEFAULT "outdist" -# define TRIANGLEDEFAULT "outlm.eps" -# define UNRESOLVEDDEFAULT "outqlist" -# define ALLQUARTDEFAULT "outallquart" -# define ALLQUARTLHDEFAULT "outallquartlh" -# define OUTPTLISTDEFAULT "outpstep" -# define OUTPTORDERDEFAULT "outptorder" - -# define INFILE infilename -# define OUTFILE outfilename -# define TREEFILE outtreename -# define INTREE intreename -# define DISTANCES outdistname -# define TRIANGLE outlmname -# define UNRESOLVED outqlistname -# define ALLQUART outallquartname -# define ALLQUARTLH outallquartlhname -# define OUTPTLIST outpstepname -# define OUTPTORDER outptordername - -EXTERN char infilename [FILENAMELENTH]; -EXTERN char outfilename [FILENAMELENTH]; -EXTERN char outtreename [FILENAMELENTH]; -EXTERN char intreename [FILENAMELENTH]; -EXTERN char outdistname [FILENAMELENTH]; -EXTERN char outlmname [FILENAMELENTH]; -EXTERN char outqlistname [FILENAMELENTH]; -EXTERN char outallquartname [FILENAMELENTH]; -EXTERN char outallquartlhname [FILENAMELENTH]; -EXTERN char outpstepname [FILENAMELENTH]; -EXTERN char outptordername [FILENAMELENTH]; - -#define OUTFILEEXT "puzzle" -#define TREEFILEEXT "tree" -#define DISTANCESEXT "dist" -#define TRIANGLEEXT "eps" -#define UNRESOLVEDEXT "qlist" -#define ALLQUARTEXT "allquart" -#define ALLQUARTLHEXT "allquartlh" -#define OUTPTLISTEXT "pstep" -#define OUTPTORDEREXT "ptorder" - -#ifndef PARALLEL /* because printf() runs significantly faster */ - /* than fprintf(stdout) on an Apple McIntosh */ - /* (HS) */ -# define FPRINTF printf -# define STDOUTFILE -#else -# define FPRINTF fprintf -# define STDOUT stdout -# define STDOUTFILE STDOUT, -#endif - - -/* auto_aamodel/auto_datatype values (xxx) */ -#define AUTO_OFF 0 -#define AUTO_GUESS 1 -#define AUTO_DEFAULT 2 - - -/* qptlist values (xxx) */ -#define PSTOUT_NONE 0 -#define PSTOUT_ORDER 1 -#define PSTOUT_LISTORDER 2 -#define PSTOUT_LIST 3 - -/* dtat_optn values (xxx) */ -#define NUCLEOTIDE 0 -#define AMINOACID 1 -#define BINARY 2 - -/* typ_optn values (xxx) */ -#define LIKMAPING_OPTN 1 -#define TREERECON_OPTN 0 - -/* puzzlemodes (xxx) */ -#define QUARTPUZ 0 -#define USERTREE 1 -#define PAIRDIST 2 - -/* rhetmodes (xxx) Modes of rate heterogeneity */ -#define UNIFORMRATE 0 -#define GAMMARATE 1 -#define TWORATE 2 -#define MIXEDRATE 3 - -/* defines for types of quartet likelihood computation (xxx) */ -#define EXACT 0 -#define APPROX 1 - -/* tree structure */ -typedef struct oneedge { - /* pointer to other three edges */ - struct oneedge *up; - struct oneedge *downleft; - struct oneedge *downright; - int numedge; /* number of edge */ - uli edgeinfo; /* value of this edge */ - int *edgemap; /* pointer to the local edgemap */ -} ONEEDGE; - - -/* variables */ -EXTERN cmatrix biparts; /* bipartitions of tree of current puzzling step */ -EXTERN cmatrix consbiparts; /* bipartitions of majority rule consensus tree */ -EXTERN cmatrix seqchars; /* characters contained in data set */ -EXTERN cmatrix treepict; /* picture of consensus tree */ -EXTERN double minscore; /* value of edgescore on minedge */ -EXTERN double tstvf84; /* F84 transition/transversion ratio */ -EXTERN double tstvratio; /* expected transition/transversion ratio */ -EXTERN double yrtsratio; /* expected pyrimidine/purine transition ratio */ -EXTERN dvector ulkl; /* log L of user trees */ -EXTERN dmatrix allsites; /* log L per sites of user trees */ -EXTERN dvector ulklc; /* log L of user trees (clock) */ -EXTERN dmatrix allsitesc; /* log L per sites of user trees (clock) */ -EXTERN FILE *utfp; /* pointer to user tree file */ -EXTERN FILE *ofp; /* pointer to output file */ -EXTERN FILE *seqfp; /* pointer to sequence input file */ -EXTERN FILE *tfp; /* pointer to tree file */ -EXTERN FILE *dfp; /* pointer to distance file */ -EXTERN FILE *trifp; /* pointer to triangle file */ -EXTERN FILE *unresfp; /* pointer to file with unresolved quartets */ -EXTERN FILE *tmpfp; /* pointer to temporary file */ -EXTERN FILE *qptlist; /* pointer to file with puzzling step trees */ -EXTERN FILE *qptorder; /* pointer to file with unique puzzling step trees */ -EXTERN int SHcodon; /* whether SH should be applied to 1st, 2nd codon positions */ -EXTERN int utree_optn; /* use first user tree for estimation */ -EXTERN int listqptrees; /* list puzzling step trees */ -EXTERN int approxqp; /* approximate QP quartets */ -EXTERN int *edgeofleaf; /* vector with edge number of all leaves */ -EXTERN int codon_optn; /* declares what positions in a codon should be used */ -EXTERN int compclock; /* computation of clocklike branch lengths */ -EXTERN int chooseA; /* leaf variable */ -EXTERN int chooseB; /* leaf variable */ -EXTERN int clustA, clustB, clustC, clustD; /* number of members of LM clusters */ -EXTERN int column; /* used for breaking lines (writing tree to treefile) */ -EXTERN int Frequ_optn; /* use empirical base frequencies */ -EXTERN int Maxbrnch; /* 2*Maxspc - 3 */ -EXTERN int Maxseqc; /* number of sequence characters per taxum */ -EXTERN int mflag; /* flag used for correct printing of runtime messages */ -EXTERN int minedge; /* edge with minimum edgeinfo */ -EXTERN int nextedge; /* number of edges in the current tree */ -EXTERN int nextleaf; /* next leaf to add to tree */ -EXTERN int numclust; /* number of clusters in LM analysis */ -EXTERN int outgroup; /* outgroup */ -EXTERN int puzzlemode; /* computation of QP tree and/or ML distances */ -EXTERN int rootsearch; /* how location of root is found */ -EXTERN int rhetmode; /* model of rate heterogeneity */ -EXTERN int splitlength; /* length of one entry in splitpatterns */ -EXTERN int *splitsizes; /* size of all different splits of all trees */ -EXTERN int usebestq_optn; /* use only best quartet topology, no bayesian weights */ -EXTERN int show_optn; /* show unresolved quartets */ -EXTERN int savequart_optn; /* save memory block which quartets to file */ -EXTERN int savequartlh_optn; /* save quartet likelihoods to file */ -EXTERN int saveqlhbin_optn; /* save quartet likelihoods binary */ -EXTERN int readquart_optn; /* read memory block which quartets from file */ -EXTERN int sym_optn; /* symmetrize doublet frequencies */ -EXTERN int xsize; /* depth of consensus tree picture */ -EXTERN int ytaxcounter; /* counter for establishing y-coordinates of all taxa */ -EXTERN int numutrees; /* number of users trees in input tree file */ -EXTERN ivector clusterA, clusterB, clusterC, clusterD; /* clusters for LM analysis */ -EXTERN ivector consconfid; /* confidence values of majority rule consensus tree */ -EXTERN ivector conssizes; /* partition sizes of majority rule consensus tree */ -EXTERN ivector trueID; /* leaf -> taxon on this leaf */ -EXTERN ivector xcor; /* x-coordinates of consensus tree nodes */ -EXTERN ivector ycor; /* y-coordinates of consensus tree nodes */ -EXTERN ivector ycormax; /* maximal y-coordinates of consensus tree nodes */ -EXTERN ivector ycormin; /* minimal y-coordinates of consensus tree nodes */ -EXTERN ivector ycortax; /* y-coordinates of all taxa */ -EXTERN ONEEDGE *edge; /* vector with all the edges of the tree */ -EXTERN uli *splitcomp; /* bipartition storage */ -EXTERN uli *splitfreqs; /* frequencies of all different splits of all trees */ -EXTERN uli *splitpatterns; /* all different splits of all trees */ -EXTERN uli badqs; /* number of bad quartets */ -EXTERN uli consincluded; /* number of included biparts in the consensus tree */ -EXTERN uli Currtrial; /* counter for puzzling steps */ -EXTERN uli maxbiparts; /* space is reserved for that many bipartitions */ -EXTERN uli mininfo; /* value of edgeinfo on minedge */ -EXTERN uli numbiparts; /* number of different bipartitions */ -EXTERN uli Numquartets; /* number of quartets */ -EXTERN uli Numtrial; /* number of puzzling steps */ -EXTERN uli lmqts; /* quartets investigated in LM analysis (0 = ALL) */ - -EXTERN int auto_datatype; /* guess datatype ? */ -EXTERN int guessdata_optn; /* guessed datatype */ - -EXTERN int auto_aamodel; /* guess amino acid modell ? */ -EXTERN int guessauto_aamodel; /* guessed amino acid modell ? */ -EXTERN int guessDayhf_optn; /* guessed Dayhoff model option */ -EXTERN int guessJtt_optn; /* guessed JTT model option */ -EXTERN int guessblosum62_optn; /* guessed BLOSUM 62 model option */ -EXTERN int guessmtrev_optn; /* guessed mtREV model option */ -EXTERN int guesscprev_optn; /* guessed cpREV model option */ -EXTERN int guessvtmv_optn; /* guessed VT model option */ -EXTERN int guesswag_optn; /* guessed WAG model option */ - -/* counter variables needed in likelihood mapping analysis */ -EXTERN uli ar1, ar2, ar3; -EXTERN uli reg1, reg2, reg3, reg4, reg5, reg6, reg7; -EXTERN uli reg1l, reg1r, reg2u, reg2d, reg3u, reg3d, - reg4u, reg4d, reg5l, reg5r, reg6u, reg6d; -EXTERN unsigned char *quartetinfo; /* place where quartets are stored */ -EXTERN dvector qweight; /* for use in QP and LM analysis */ -EXTERN dvector sqdiff; -EXTERN ivector qworder; -EXTERN ivector sqorder; - -EXTERN int randseed; -EXTERN int psteptreestrlen; - -typedef struct treelistitemtypedummy { - struct treelistitemtypedummy *pred; - struct treelistitemtypedummy *succ; - struct treelistitemtypedummy *sortnext; - struct treelistitemtypedummy *sortlast; - char *tree; - int count; - int id; - int idx; -} treelistitemtype; - -EXTERN treelistitemtype *psteptreelist; -EXTERN treelistitemtype *psteptreesortlist; -EXTERN int psteptreenum; -EXTERN int psteptreesum; - - -/* prototypes */ -void makeF84model(void); -void compnumqts(void); -void setoptions(void); -void openfiletoread(FILE **, char[], char[]); -void openfiletowrite(FILE **, char[], char[]); -void openfiletoappend(FILE **, char[], char[]); -void closefile(FILE *); -void symdoublets(void); -void computeexpectations(void); -void putdistance(FILE *); -void findidenticals(FILE *); -double averagedist(void); -void initps(FILE *); -void plotlmpoint(FILE *, double, double); -void finishps(FILE *); -void makelmpoint(FILE *, double, double, double); -void printtreestats(FILE *); -void timestamp(FILE *); -void writeoutputfile(FILE *, int); - -/* definitions for writing output */ -#define WRITEALL 0 -#define WRITEPARAMS 1 -#define WRITEREST 2 - -void writetimesstat(FILE *ofp); -void writecutree(FILE *, int); -void starttimer(void); -void checktimer(uli); -void estimateparametersnotree(void); -void estimateparameterstree(void); -int main(int, char *[]); -int ulicmp(const void *, const void *); -int intcmp(const void *, const void *); - -void readid(FILE *, int); -char readnextcharacter(FILE *, int, int); -void skiprestofline(FILE *, int, int); -void skipcntrl(FILE *, int, int); -void getseqs(FILE *); -void initid(int); -void fputid10(FILE *, int); -int fputid(FILE *, int); -void getsizesites(FILE *); -void getdataset(FILE *); -int guessdatatype(void); -void translatedataset(void); -void estimatebasefreqs(void); -void guessmodel(void); -void inittree(void); -void addnextleaf(int); -void freetree(void); -void writeOTU(FILE *, int); -void writetree(FILE *); -int *initctree(); -void copytree(int *ctree); -void freectree(int **snodes); -void printctree(int *ctree); -char *sprintfctree(int *ctree, int strlen); -void fprintffullpstree(FILE *outf, char *treestr); -int printfsortctree(int *ctree); -int sortctree(int *ctree); -int ct_1stedge(int node); -int ct_2ndedge(int node); -int ct_3rdedge(int node); - -void printfpstrees(treelistitemtype *list); -void printfsortedpstrees(treelistitemtype *list); -void fprintfsortedpstrees(FILE *output, treelistitemtype *list, int itemnum, int itemsum, int comment, float cutoff); - -void sortbynum(treelistitemtype *list, treelistitemtype **sortlist); -treelistitemtype *addtree2list(char **tree, - int numtrees, - treelistitemtype **list, - int *numitems, - int *numsum); -void freetreelist(treelistitemtype **list, - int *numitems, - int *numsum); -void resetedgeinfo(void); -void incrementedgeinfo(int, int); -void minimumedgeinfo(void); -void initconsensus(void); -void makepart(int, int); -void computebiparts(void); -void printsplit(FILE *, uli); -void makenewsplitentries(void); -void copysplit(uli, int); -void makeconsensus(void); -void writenode(FILE *, int); -void writeconsensustree(FILE *); -void nodecoordinates(int); -void drawnode(int, int); -void plotconsensustree(FILE *); -unsigned char *mallocquartets(int); -void freequartets(void); -unsigned char readquartet(int, int, int, int); -void writequartet(int, int, int, int, unsigned char); -void sort3doubles(dvector, ivector); -void computeallquartets(void); -void checkquartet(int, int, int, int); -void num2quart(uli qnum, int *a, int *b, int *c, int *d); -uli numquarts(int maxspc); -uli quart2num (int a, int b, int c, int d); - -void writetpqfheader(int nspec, FILE *ofp, int flag); - - -/* extracted from main (xxx) */ -void compute_quartlklhds(int a, int b, int c, int d, double *d1, double *d2, double *d3, int approx); - - -/* definitions for timing */ - -#define OVERALL 0 -#define GENERAL 1 -#define OPTIONS 2 -#define PARAMEST 3 -#define QUARTETS 4 -#define PUZZLING 5 -#define TREEEVAL 6 - -typedef struct { - int currentjob; - clock_t tempcpu; - clock_t tempfullcpu; - clock_t tempcpustart; - time_t temptime; - time_t tempfulltime; - time_t temptimestart; - - clock_t maxcpu; - clock_t mincpu; - time_t maxtime; - time_t mintime; - - double maxcpublock; - double mincpublock; - double mincputick; - double mincputicktime; - double maxtimeblock; - double mintimeblock; - - double generalcpu; - double optionscpu; - double paramestcpu; - double quartcpu; - double quartblockcpu; - double quartmaxcpu; - double quartmincpu; - double puzzcpu; - double puzzblockcpu; - double puzzmaxcpu; - double puzzmincpu; - double treecpu; - double treeblockcpu; - double treemaxcpu; - double treemincpu; - double cpu; - double fullcpu; - - double generaltime; - double optionstime; - double paramesttime; - double quarttime; - double quartblocktime; - double quartmaxtime; - double quartmintime; - double puzztime; - double puzzblocktime; - double puzzmaxtime; - double puzzmintime; - double treetime; - double treeblocktime; - double treemaxtime; - double treemintime; - double time; - double fulltime; -} timearray_t; - -EXTERN double cputime, walltime; -EXTERN double fullcpu, fulltime; -EXTERN double fullcputime, fullwalltime; -EXTERN double altcputime, altwalltime; -EXTERN clock_t cputimestart, cputimestop, cputimedummy; -EXTERN time_t walltimestart, walltimestop, walltimedummy; -EXTERN clock_t Startcpu; /* start cpu time */ -EXTERN clock_t Stopcpu; /* stop cpu time */ -EXTERN time_t Starttime; /* start time */ -EXTERN time_t Stoptime; /* stop time */ -EXTERN time_t time0; /* timer variable */ -EXTERN time_t time1; /* yet another timer */ -EXTERN time_t time2; /* yet another timer */ -EXTERN timearray_t tarr; - -void resetqblocktime(timearray_t *ta); -void resetpblocktime(timearray_t *ta); -void inittimearr(timearray_t *ta); -void addtimes(int jobtype, timearray_t *ta); -#ifdef TIMEDEBUG - void printtimearr(timearray_t *ta); -#endif /* TIMEDEBUG */ - -#endif /* _PUZZLE_ */ - diff --git a/forester/archive/RIO/others/puzzle_dqo/src/puzzle1.c b/forester/archive/RIO/others/puzzle_dqo/src/puzzle1.c deleted file mode 100644 index a012cb4..0000000 --- a/forester/archive/RIO/others/puzzle_dqo/src/puzzle1.c +++ /dev/null @@ -1,2864 +0,0 @@ -/* - * puzzle1.c - * - * - * Part of TREE-PUZZLE 5.0 (June 2000) - * - * (c) 1999-2000 by Heiko A. Schmidt, Korbinian Strimmer, - * M. Vingron, and Arndt von Haeseler - * (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler - * - * All parts of the source except where indicated are distributed under - * the GNU public licence. See http://www.opensource.org for details. - */ - - -#define EXTERN - -#include "puzzle.h" -#include "gamma.h" - -void num2quart(uli qnum, int *a, int *b, int *c, int *d) -{ - double temp; - uli aa, bb, cc, dd; - uli lowval=0, highval=0; - - aa=0; bb=1; cc=2; dd=3; - - temp = (double)(24 * qnum); - temp = sqrt(temp); - temp = sqrt(temp); - /* temp = pow(temp, (double)(1/4)); */ - dd = (uli) floor(temp) + 1; - if (dd < 3) dd = 3; - lowval = (uli) dd*(dd-1)*(dd-2)*(dd-3)/24; - highval = (uli) (dd+1)*dd*(dd-1)*(dd-2)/24; - if (lowval >= qnum) - while ((lowval > qnum)) { - dd -= 1; lowval = (uli) dd*(dd-1)*(dd-2)*(dd-3)/24; - } - else { - while (highval <= qnum) { - dd += 1; highval = (uli) (dd+1)*dd*(dd-1)*(dd-2)/24; - } - lowval = (uli) dd*(dd-1)*(dd-2)*(dd-3)/24; - } - qnum -= lowval; - if (qnum > 0) { - temp = (double)(6 * qnum); - temp = pow(temp, (double)(1/3)); - cc = (uli) floor(temp); - if (cc < 2) cc= 2; - lowval = (uli) cc*(cc-1)*(cc-2)/6; - highval = (uli) (cc+1)*cc*(cc-1)/6; - if (lowval >= qnum) - while ((lowval > qnum)) { - cc -= 1; lowval = (uli) cc*(cc-1)*(cc-2)/6; - } - else { - while (highval <= qnum) { - cc += 1; highval = (uli) (cc+1)*cc*(cc-1)/6; - } - lowval = (uli) cc*(cc-1)*(cc-2)/6; - } - qnum -= lowval; - if (qnum > 0) { - temp = (double)(2 * qnum); - temp = sqrt(temp); - bb = (uli) floor(temp); - if (bb < 1) bb= 1; - lowval = (uli) bb*(bb-1)/2; - highval = (uli) (bb+1)*bb/2; - if (lowval >= qnum) - while ((lowval > qnum)) { - bb -= 1; lowval = (uli) bb*(bb-1)/2; - } - else { - while (highval <= qnum) { - bb += 1; highval = (uli) (bb+1)*bb/2; - } - lowval = (uli) bb*(bb-1)/2; - } - qnum -= lowval; - if (qnum > 0) { - aa = (uli) qnum; - if (aa < 0) aa= 0; - } - } - } - *d = (int)dd; - *c = (int)cc; - *b = (int)bb; - *a = (int)aa; -} /* num2quart */ - -/******************/ - -uli numquarts(int maxspc) -{ - uli tmp; - int a, b, c, d; - - if (maxspc < 4) - return (uli)0; - else { - maxspc--; - a = maxspc-3; - b = maxspc-2; - c = maxspc-1; - d = maxspc; - - tmp = (uli) 1 + a + - (uli) b * (b-1) / 2 + - (uli) c * (c-1) * (c-2) / 6 + - (uli) d * (d-1) * (d-2) * (d-3) / 24; - return (tmp); - } -} /* numquarts */ - -/******************/ - -uli quart2num (int a, int b, int c, int d) -{ - uli tmp; - if ((a>b) || (b>c) || (c>d)) { - fprintf(stderr, "Error PP5 not (%d <= %d <= %d <= %d) !!!\n", a, b, c, -d); - exit (1); - } - tmp = (uli) a + - (uli) b * (b-1) / 2 + - (uli) c * (c-1) * (c-2) / 6 + - (uli) d * (d-1) * (d-2) * (d-3) / 24; - return (tmp); -} /* quart2num */ - -/******************/ - - - -/* flag=0 old allquart binary */ -/* flag=1 allquart binary */ -/* flag=2 allquart ACSII */ -/* flag=3 quartlh binary */ -/* flag=4 quartlh ASCII */ - -void writetpqfheader(int nspec, - FILE *ofp, - int flag) -{ int currspec; - - if (flag == 0) { - unsigned long nquart; - unsigned long blocklen; - - nquart = numquarts(nspec); - /* compute number of bytes */ - if (nquart % 2 == 0) { /* even number */ - blocklen = (nquart)/2; - } else { /* odd number */ - blocklen = (nquart + 1)/2; - } - /* FPRINTF(STDOUTFILE "Writing quartet file: %s\n", filename); */ - fprintf(ofp, "TREE-PUZZLE\n%s\n\n", VERSION); - fprintf(ofp, "species: %d\n", nspec); - fprintf(ofp, "quartets: %lu\n", nquart); - fprintf(ofp, "bytes: %lu\n\n", blocklen); - - - /* fwrite(&(quartetinfo[0]), sizeof(char), blocklen, ofp); */ - } - - if (flag == 1) fprintf(ofp, "##TPQF-BB (TREE-PUZZLE %s)\n%d\n", VERSION, nspec); - if (flag == 2) fprintf(ofp, "##TPQF-BA (TREE-PUZZLE %s)\n%d\n", VERSION, nspec); - if (flag == 3) fprintf(ofp, "##TPQF-LB (TREE-PUZZLE %s)\n%d\n", VERSION, nspec); - if (flag == 4) fprintf(ofp, "##TPQF-LA (TREE-PUZZLE %s)\n%d\n", VERSION, nspec); - - for (currspec=0; currspec MAXTS) { - FPRINTF(STDOUTFILE "\n\n\nF84 model not possible "); - FPRINTF(STDOUTFILE "(bad Ts/Tv parameter)\n"); - tstvf84 = 0.0; - return; - } - if (yr < MINYR || yr > MAXYR) { - FPRINTF(STDOUTFILE "\n\n\nF84 model not possible "); - FPRINTF(STDOUTFILE "(bad Y/R transition parameter)\n"); - tstvf84 = 0.0; - return; - } - TSparam = ts; - YRparam = yr; - optim_optn = FALSE; -} - -/* compute number of quartets used in LM analysis */ -void compnumqts() -{ - if (lmqts == 0) { - if (numclust == 4) - Numquartets = (uli) clustA*clustB*clustC*clustD; - if (numclust == 3) - Numquartets = (uli) clustA*clustB*clustC*(clustC-1)/2; - if (numclust == 2) - Numquartets = (uli) clustA*(clustA-1)/2 * clustB*(clustB-1)/2; - if (numclust == 1) - Numquartets = (uli) Maxspc*(Maxspc-1)*(Maxspc-2)*(Maxspc-3)/24; - } else { - Numquartets = lmqts; - } -} - -/* set options interactively */ -void setoptions() -{ - int i, valid; - double sumfreq; - char ch; - - puzzlemode = PAIRDIST; /*Only do pairwise dist. CZ, 05/16/01*/ - - /* defaults */ - rhetmode = UNIFORMRATE; /* assume rate homogeneity */ - numcats = 1; - Geta = 0.05; - grate_optim = FALSE; - fracinv = 0.0; - fracinv_optim = FALSE; - - compclock = FALSE; /* compute clocklike branch lengths */ - locroot = -1; /* search for optimal place of root */ - qcalg_optn = FALSE; /* don't use sampling of quartets */ - approxp_optn = TRUE; /* approximate parameter estimates */ - listqptrees = PSTOUT_NONE; /* list puzzling step trees */ - - /* approximate QP quartets? */ - if (Maxspc <= 6) approxqp = FALSE; - else approxqp = TRUE; - - codon_optn = 0; /* use all positions in a codon */ - - /* number of puzzling steps */ - if (Maxspc <= 25) Numtrial = 1000; - else if (Maxspc <= 50) Numtrial = 10000; - else if (Maxspc <= 75) Numtrial = 25000; - else Numtrial = 50000; - - utree_optn = TRUE; /* use first user tree for estimation */ - outgroup = 0; /* use first taxon as outgroup */ - sym_optn = FALSE; /* symmetrize doublet frequencies */ - tstvf84 = 0.0; /* disable F84 model */ - show_optn = FALSE; /* show unresolved quartets */ - typ_optn = TREERECON_OPTN; /* tree reconstruction */ - numclust = 1; /* one clusters in LM analysis */ - lmqts = 0; /* all quartets in LM analysis */ - compnumqts(); - if (Numquartets > 10000) { - lmqts = 10000; /* 10000 quartets in LM analysis */ - compnumqts(); - } - - do { - FPRINTF(STDOUTFILE "\n\n\nGENERAL OPTIONS\n"); - FPRINTF(STDOUTFILE " b Type of analysis? "); - if (typ_optn == TREERECON_OPTN) FPRINTF(STDOUTFILE "Tree reconstruction\n"); - if (typ_optn == LIKMAPING_OPTN) FPRINTF(STDOUTFILE "Likelihood mapping\n"); - if (typ_optn == TREERECON_OPTN) { - FPRINTF(STDOUTFILE " k Tree search procedure? "); - if (puzzlemode == QUARTPUZ) FPRINTF(STDOUTFILE "Quartet puzzling\n"); - if (puzzlemode == USERTREE) FPRINTF(STDOUTFILE "User defined trees\n"); - if (puzzlemode == PAIRDIST) FPRINTF(STDOUTFILE "Pairwise distances only (no tree)\n"); - if (puzzlemode == QUARTPUZ) { - FPRINTF(STDOUTFILE " v Approximate quartet likelihood? %s\n", - (approxqp ? "Yes" : "No")); - FPRINTF(STDOUTFILE " u List unresolved quartets? %s\n", - (show_optn ? "Yes" : "No")); - FPRINTF(STDOUTFILE " n Number of puzzling steps? %lu\n", - Numtrial); - FPRINTF(STDOUTFILE " j List puzzling step trees? "); - switch (listqptrees) { - case PSTOUT_NONE: FPRINTF(STDOUTFILE "No\n"); break; - case PSTOUT_ORDER: FPRINTF(STDOUTFILE "Unique topologies\n"); break; - case PSTOUT_LISTORDER: FPRINTF(STDOUTFILE "Unique topologies & Chronological list\n"); break; - case PSTOUT_LIST: FPRINTF(STDOUTFILE "Chronological list only\n"); break; - } - - FPRINTF(STDOUTFILE " o Display as outgroup? "); - fputid(STDOUT, outgroup); - FPRINTF(STDOUTFILE "\n"); - } - if (puzzlemode == QUARTPUZ || puzzlemode == USERTREE) { - FPRINTF(STDOUTFILE " z Compute clocklike branch lengths? "); - if (compclock) FPRINTF(STDOUTFILE "Yes\n"); - else FPRINTF(STDOUTFILE "No\n"); - } - if (compclock) - if (puzzlemode == QUARTPUZ || puzzlemode == USERTREE) { - FPRINTF(STDOUTFILE " l Location of root? "); - if (locroot < 0) FPRINTF(STDOUTFILE "Best place (automatic search)\n"); - else if (locroot < Maxspc) { - FPRINTF(STDOUTFILE "Branch %d (", locroot + 1); - fputid(STDOUT, locroot); - FPRINTF(STDOUTFILE ")\n"); - } else FPRINTF(STDOUTFILE "Branch %d (internal branch)\n", locroot + 1); - } - } - if (typ_optn == LIKMAPING_OPTN) { - FPRINTF(STDOUTFILE " g Group sequences in clusters? "); - if (numclust == 1) FPRINTF(STDOUTFILE "No\n"); - else FPRINTF(STDOUTFILE "Yes (%d clusters as specified)\n", numclust); - FPRINTF(STDOUTFILE " n Number of quartets? "); - if (lmqts == 0) FPRINTF(STDOUTFILE "%lu (all possible)\n", Numquartets); - else FPRINTF(STDOUTFILE "%lu (random choice)\n", lmqts); - } - FPRINTF(STDOUTFILE " e Parameter estimates? "); - if (approxp_optn) FPRINTF(STDOUTFILE "Approximate (faster)\n"); - else FPRINTF(STDOUTFILE "Exact (slow)\n"); - if (!(puzzlemode == USERTREE && typ_optn == TREERECON_OPTN)) { - FPRINTF(STDOUTFILE " x Parameter estimation uses? "); - if (qcalg_optn) FPRINTF(STDOUTFILE "Quartet sampling + NJ tree\n"); - else FPRINTF(STDOUTFILE "Neighbor-joining tree\n"); - - } else { - FPRINTF(STDOUTFILE " x Parameter estimation uses? "); - if (utree_optn) - FPRINTF(STDOUTFILE "1st input tree\n"); - else if (qcalg_optn) FPRINTF(STDOUTFILE "Quartet sampling + NJ tree\n"); - else FPRINTF(STDOUTFILE "Neighbor-joining tree\n"); - } - FPRINTF(STDOUTFILE "SUBSTITUTION PROCESS\n"); - FPRINTF(STDOUTFILE " d Type of sequence input data? "); - if (auto_datatype == AUTO_GUESS) FPRINTF(STDOUTFILE "Auto: "); - if (data_optn == NUCLEOTIDE) FPRINTF(STDOUTFILE "Nucleotides\n"); - if (data_optn == AMINOACID) FPRINTF(STDOUTFILE "Amino acids\n"); - if (data_optn == BINARY) FPRINTF(STDOUTFILE "Binary states\n"); - if (data_optn == NUCLEOTIDE && (Maxseqc % 3) == 0 && !SH_optn) { - FPRINTF(STDOUTFILE " h Codon positions selected? "); - if (codon_optn == 0) FPRINTF(STDOUTFILE "Use all positions\n"); - if (codon_optn == 1) FPRINTF(STDOUTFILE "Use only 1st positions\n"); - if (codon_optn == 2) FPRINTF(STDOUTFILE "Use only 2nd positions\n"); - if (codon_optn == 3) FPRINTF(STDOUTFILE "Use only 3rd positions\n"); - if (codon_optn == 4) FPRINTF(STDOUTFILE "Use 1st and 2nd positions\n"); - } - FPRINTF(STDOUTFILE " m Model of substitution? "); - if (data_optn == NUCLEOTIDE) { /* nucleotides */ - if (nuc_optn) { - if(HKY_optn) - FPRINTF(STDOUTFILE "HKY (Hasegawa et al. 1985)\n"); - else { - FPRINTF(STDOUTFILE "TN (Tamura-Nei 1993)\n"); - FPRINTF(STDOUTFILE " p Constrain TN model to F84 model? "); - if (tstvf84 == 0.0) - FPRINTF(STDOUTFILE "No\n"); - else FPRINTF(STDOUTFILE "Yes (Ts/Tv ratio = %.2f)\n", tstvf84); - } - FPRINTF(STDOUTFILE " t Transition/transversion parameter? "); - if (optim_optn) - FPRINTF(STDOUTFILE "Estimate from data set\n"); - else - FPRINTF(STDOUTFILE "%.2f\n", TSparam); - if (TN_optn) { - FPRINTF(STDOUTFILE " r Y/R transition parameter? "); - if (optim_optn) - FPRINTF(STDOUTFILE "Estimate from data set\n"); - else - FPRINTF(STDOUTFILE "%.2f\n", YRparam); - } - } - if (SH_optn) { - FPRINTF(STDOUTFILE "SH (Schoeniger-von Haeseler 1994)\n"); - FPRINTF(STDOUTFILE " t Transition/transversion parameter? "); - if (optim_optn) - FPRINTF(STDOUTFILE "Estimate from data set\n"); - else - FPRINTF(STDOUTFILE "%.2f\n", TSparam); - } - } - if (data_optn == NUCLEOTIDE && SH_optn) { - FPRINTF(STDOUTFILE " h Doublets defined by? "); - if (SHcodon) - FPRINTF(STDOUTFILE "1st and 2nd codon positions\n"); - else - FPRINTF(STDOUTFILE "1st+2nd, 3rd+4th, etc. site\n"); - } - if (data_optn == AMINOACID) { /* amino acids */ - switch (auto_aamodel) { - case AUTO_GUESS: - FPRINTF(STDOUTFILE "Auto: "); - break; - case AUTO_DEFAULT: - FPRINTF(STDOUTFILE "Def.: "); - break; - } - if (Dayhf_optn) FPRINTF(STDOUTFILE "Dayhoff (Dayhoff et al. 1978)\n"); - if (Jtt_optn) FPRINTF(STDOUTFILE "JTT (Jones et al. 1992)\n"); - if (mtrev_optn) FPRINTF(STDOUTFILE "mtREV24 (Adachi-Hasegawa 1996)\n"); - if (cprev_optn) FPRINTF(STDOUTFILE "cpREV45 (Adachi et al. 2000)\n"); - if (blosum62_optn) FPRINTF(STDOUTFILE "BLOSUM62 (Henikoff-Henikoff 92)\n"); - if (vtmv_optn) FPRINTF(STDOUTFILE "VT (Mueller-Vingron 2000)\n"); - if (wag_optn) FPRINTF(STDOUTFILE "WAG (Whelan-Goldman 2000)\n"); - } - if (data_optn == BINARY) { /* binary states */ - FPRINTF(STDOUTFILE "Two-state model (Felsenstein 1981)\n"); - } - if (data_optn == AMINOACID) - FPRINTF(STDOUTFILE " f Amino acid frequencies? "); - else if (data_optn == NUCLEOTIDE && SH_optn) - FPRINTF(STDOUTFILE " f Doublet frequencies? "); - else if (data_optn == NUCLEOTIDE && nuc_optn) - FPRINTF(STDOUTFILE " f Nucleotide frequencies? "); - else if (data_optn == BINARY) - FPRINTF(STDOUTFILE " f Binary state frequencies? "); - FPRINTF(STDOUTFILE "%s\n", (Frequ_optn ? "Estimate from data set" : - "Use specified values")); - if (data_optn == NUCLEOTIDE && SH_optn) - FPRINTF(STDOUTFILE " s Symmetrize doublet frequencies? %s\n", - (sym_optn ? "Yes" : "No")); - - FPRINTF(STDOUTFILE "RATE HETEROGENEITY\n"); - FPRINTF(STDOUTFILE " w Model of rate heterogeneity? "); - if (rhetmode == UNIFORMRATE) FPRINTF(STDOUTFILE "Uniform rate\n"); - if (rhetmode == GAMMARATE ) FPRINTF(STDOUTFILE "Gamma distributed rates\n"); - if (rhetmode == TWORATE ) FPRINTF(STDOUTFILE "Two rates (1 invariable + 1 variable)\n"); - if (rhetmode == MIXEDRATE ) FPRINTF(STDOUTFILE "Mixed (1 invariable + %d Gamma rates)\n", numcats); - - if (rhetmode == TWORATE || rhetmode == MIXEDRATE) { - FPRINTF(STDOUTFILE " i Fraction of invariable sites? "); - if (fracinv_optim) FPRINTF(STDOUTFILE "Estimate from data set"); - else FPRINTF(STDOUTFILE "%.2f", fracinv); - if (fracinv == 0.0 && !fracinv_optim) FPRINTF(STDOUTFILE " (all sites variable)"); - FPRINTF(STDOUTFILE "\n"); - } - if (rhetmode == GAMMARATE || rhetmode == MIXEDRATE) { - FPRINTF(STDOUTFILE " a Gamma distribution parameter alpha? "); - if (grate_optim) - FPRINTF(STDOUTFILE "Estimate from data set\n"); - else if (Geta > 0.5) - FPRINTF(STDOUTFILE "%.2f (strong rate heterogeneity)\n", (1.0-Geta)/Geta); - else FPRINTF(STDOUTFILE "%.2f (weak rate heterogeneity)\n", (1.0-Geta)/Geta); - FPRINTF(STDOUTFILE " c Number of Gamma rate categories? %d\n", numcats); - } - - FPRINTF(STDOUTFILE "\nQuit [q], confirm [y], or change [menu] settings: "); - - /* read one char */ - ch = getchar(); - if (ch != '\n') { - do ; - while (getchar() != '\n'); - } - ch = (char) tolower((int) ch); - - /* letters in use: d m */ - /* letters not in use: */ - - switch (ch) { - - case '\n': break; - - - - case 'd': if (auto_datatype == AUTO_GUESS) { - auto_datatype = AUTO_OFF; - guessdata_optn = data_optn; - data_optn = 0; - } else { - data_optn = data_optn + 1; - if (data_optn == 3) { - auto_datatype = AUTO_GUESS; - data_optn = guessdata_optn; - } - } - /* translate characters into format used by ML engine */ - translatedataset(); - estimatebasefreqs(); - break; - - - - case 'm': if (data_optn == NUCLEOTIDE) { /* nucleotide data */ - if(HKY_optn && nuc_optn) { - /* HKY -> TN */ - tstvf84 = 0.0; - TSparam = 2.0; - YRparam = 0.9; - HKY_optn = FALSE; - TN_optn = TRUE; - optim_optn = TRUE; - nuc_optn = TRUE; - SH_optn = FALSE; - break; - } - if(TN_optn && nuc_optn) { - if (Maxseqc % 2 == 0 || Maxseqc % 3 == 0) { - /* number of chars needs to be a multiple 2 or 3 */ - /* TN -> SH */ - if (Maxseqc % 2 != 0 && Maxseqc % 3 == 0) - SHcodon = TRUE; - else - SHcodon = FALSE; - tstvf84 = 0.0; - TSparam = 2.0; - YRparam = 1.0; - HKY_optn = TRUE; - TN_optn = FALSE; - optim_optn = TRUE; - nuc_optn = FALSE; - SH_optn = TRUE; - /* translate characters into format */ - /* used by ML engine */ - translatedataset(); - estimatebasefreqs(); - } else { - FPRINTF(STDOUTFILE "\n\n\nSH model not "); - FPRINTF(STDOUTFILE "available for the data set!\n"); - /* TN -> HKY */ - tstvf84 = 0.0; - TSparam = 2.0; - YRparam = 1.0; - HKY_optn = TRUE; - TN_optn = FALSE; - optim_optn = TRUE; - nuc_optn = TRUE; - SH_optn = FALSE; - } - break; - } - if(SH_optn) { - /* SH -> HKY */ - tstvf84 = 0.0; - TSparam = 2.0; - YRparam = 1.0; - HKY_optn = TRUE; - TN_optn = FALSE; - optim_optn = TRUE; - nuc_optn = TRUE; - SH_optn = FALSE; - /* translate characters into format */ - /* used by ML engine */ - translatedataset(); - estimatebasefreqs(); - break; - } - break; - } - if (data_optn == AMINOACID) { /* amino acid data */ - if (auto_aamodel) { - /* AUTO -> Dayhoff */ - Dayhf_optn = TRUE; - Jtt_optn = FALSE; - mtrev_optn = FALSE; - cprev_optn = FALSE; - blosum62_optn = FALSE; - vtmv_optn = FALSE; - wag_optn = FALSE; - auto_aamodel = AUTO_OFF; - break; - } - if (Dayhf_optn) { - /* Dayhoff -> JTT */ - Dayhf_optn = FALSE; - Jtt_optn = TRUE; - mtrev_optn = FALSE; - cprev_optn = FALSE; - blosum62_optn = FALSE; - vtmv_optn = FALSE; - wag_optn = FALSE; - auto_aamodel = AUTO_OFF; - break; - } - if (Jtt_optn) { - /* JTT -> mtREV */ - Dayhf_optn = FALSE; - Jtt_optn = FALSE; - mtrev_optn = TRUE; - cprev_optn = FALSE; - blosum62_optn = FALSE; - vtmv_optn = FALSE; - wag_optn = FALSE; - auto_aamodel = AUTO_OFF; - break; - } -#ifdef CPREV - if (mtrev_optn) { - /* mtREV -> cpREV */ - Dayhf_optn = FALSE; - Jtt_optn = FALSE; - mtrev_optn = FALSE; - cprev_optn = TRUE; - blosum62_optn = FALSE; - vtmv_optn = FALSE; - wag_optn = FALSE; - auto_aamodel = AUTO_OFF; - break; - } -#else /* ! CPREV */ - if (mtrev_optn) { - /* mtREV -> BLOSUM 62 */ - Dayhf_optn = FALSE; - Jtt_optn = FALSE; - mtrev_optn = FALSE; - cprev_optn = FALSE; - blosum62_optn = TRUE; - vtmv_optn = FALSE; - wag_optn = FALSE; - auto_aamodel = AUTO_OFF; - break; - } -#endif /* ! CPREV */ - -#ifdef CPREV - if (cprev_optn) { - /* cpREV -> BLOSUM 62 */ - Dayhf_optn = FALSE; - Jtt_optn = FALSE; - mtrev_optn = FALSE; - cprev_optn = FALSE; - blosum62_optn = TRUE; - vtmv_optn = FALSE; - wag_optn = FALSE; - auto_aamodel = AUTO_OFF; - break; - } -#endif - if (blosum62_optn) { - /* BLOSUM 62 -> VT model */ - Dayhf_optn = FALSE; - Jtt_optn = FALSE; - mtrev_optn = FALSE; - cprev_optn = FALSE; - blosum62_optn = FALSE; - vtmv_optn = TRUE; - wag_optn = FALSE; - auto_aamodel = AUTO_OFF; - break; - } - if (vtmv_optn) { - /* VT model -> WAG model */ - Dayhf_optn = FALSE; - Jtt_optn = FALSE; - mtrev_optn = FALSE; - cprev_optn = FALSE; - blosum62_optn = FALSE; - vtmv_optn = FALSE; - wag_optn = TRUE; - auto_aamodel = AUTO_OFF; - break; - } - if (wag_optn) { - /* WAG model -> AUTO */ - Dayhf_optn = guessDayhf_optn; - Jtt_optn = guessJtt_optn; - mtrev_optn = guessmtrev_optn; - cprev_optn = guesscprev_optn; - blosum62_optn = guessblosum62_optn; - vtmv_optn = guessvtmv_optn; - wag_optn = guesswag_optn; - auto_aamodel = guessauto_aamodel; - break; - } - break; - } - if (data_optn == BINARY) { - FPRINTF(STDOUTFILE "\n\n\nNo other model available!\n"); - } - break; - - - - case 'y': break; - - default: FPRINTF(STDOUTFILE "\n\n\nThis is not a possible option!\n"); - break; - } - } while (ch != 'y'); - - FPRINTF(STDOUTFILE "\n\n\n"); -} - -/* open file for reading */ -void openfiletoread(FILE **fp, char name[], char descr[]) -{ - int count = 0; - cvector str; - - if ((*fp = fopen(name, "r")) == NULL) { - FPRINTF(STDOUTFILE "\n\n\nPlease enter a file name for the %s: ", descr); - str = mygets(); - while ((*fp = fopen(str, "r")) == NULL) - { - count++; - if (count > 10) - { - FPRINTF(STDOUTFILE "\n\n\nToo many trials - quitting ...\n"); - exit(1); - } - FPRINTF(STDOUTFILE "File '%s' not found, ", str); - FPRINTF(STDOUTFILE "please enter alternative name: "); - free_cvector(str); - str = mygets(); - } - free_cvector(str); - FPRINTF(STDOUTFILE "\n"); - } -} /* openfiletoread */ - - -/* open file for writing */ -void openfiletowrite(FILE **fp, char name[], char descr[]) -{ - int count = 0; - cvector str; - - if ((*fp = fopen(name, "w")) == NULL) { - FPRINTF(STDOUTFILE "\n\n\nPlease enter a file name for the %s: ", descr); - str = mygets(); - while ((*fp = fopen(str, "w")) == NULL) - { - count++; - if (count > 10) - { - FPRINTF(STDOUTFILE "\n\n\nToo many trials - quitting ...\n"); - exit(1); - } - FPRINTF(STDOUTFILE "File '%s' not created, ", str); - FPRINTF(STDOUTFILE "please enter other name: "); - free_cvector(str); - str = mygets(); - } - free_cvector(str); - FPRINTF(STDOUTFILE "\n"); - } -} /* openfiletowrite */ - - -/* open file for appending */ -void openfiletoappend(FILE **fp, char name[], char descr[]) -{ - int count = 0; - cvector str; - - if ((*fp = fopen(name, "a")) == NULL) { - FPRINTF(STDOUTFILE "\n\n\nPlease enter a file name for the %s: ", descr); - str = mygets(); - while ((*fp = fopen(str, "a")) == NULL) - { - count++; - if (count > 10) - { - FPRINTF(STDOUTFILE "\n\n\nToo many trials - quitting ...\n"); - exit(1); - } - FPRINTF(STDOUTFILE "File '%s' not created, ", str); - FPRINTF(STDOUTFILE "please enter other name: "); - free_cvector(str); - str = mygets(); - } - free_cvector(str); - FPRINTF(STDOUTFILE "\n"); - } -} /* openfiletowrite */ - - -/* close file */ -void closefile(FILE *fp) -{ - fclose(fp); -} /* closefile */ - -/* symmetrize doublet frequencies */ -void symdoublets() -{ - int i, imean; - double mean; - - if (data_optn == NUCLEOTIDE && SH_optn && sym_optn) { - /* ML frequencies */ - mean = (Freqtpm[1] + Freqtpm[4])/2.0; /* AC CA */ - Freqtpm[1] = mean; - Freqtpm[4] = mean; - mean = (Freqtpm[2] + Freqtpm[8])/2.0; /* AG GA */ - Freqtpm[2] = mean; - Freqtpm[8] = mean; - mean = (Freqtpm[3] + Freqtpm[12])/2.0; /* AT TA */ - Freqtpm[3] = mean; - Freqtpm[12] = mean; - mean = (Freqtpm[6] + Freqtpm[9])/2.0; /* CG GC */ - Freqtpm[6] = mean; - Freqtpm[9] = mean; - mean = (Freqtpm[7] + Freqtpm[13])/2.0; /* CT TC */ - Freqtpm[7] = mean; - Freqtpm[13] = mean; - mean = (Freqtpm[11] + Freqtpm[14])/2.0; /* GT TG */ - Freqtpm[11] = mean; - Freqtpm[14] = mean; - - /* base composition of each taxon */ - for (i = 0; i < Maxspc; i++) { - imean = (Basecomp[i][1] + Basecomp[i][4])/2; /* AC CA */ - Basecomp[i][1] = imean; - Basecomp[i][4] = imean; - imean = (Basecomp[i][2] + Basecomp[i][8])/2; /* AG GA */ - Basecomp[i][2] = imean; - Basecomp[i][8] = imean; - imean = (Basecomp[i][3] + Basecomp[i][12])/2; /* AT TA */ - Basecomp[i][3] = imean; - Basecomp[i][12] = imean; - imean = (Basecomp[i][6] + Basecomp[i][9])/2; /* CG GC */ - Basecomp[i][6] = imean; - Basecomp[i][9] = imean; - imean = (Basecomp[i][7] + Basecomp[i][13])/2; /* CT TC */ - Basecomp[i][7] = imean; - Basecomp[i][13] = imean; - imean = (Basecomp[i][11] + Basecomp[i][14])/2; /* GT TG */ - Basecomp[i][11] = imean; - Basecomp[i][14] = imean; - } - } -} - -/* show Ts/Tv ratio and Ts Y/R ratio */ -void computeexpectations() -{ - /* CZ */ -} - -/* write ML distance matrix to file. Modified CZ 05/29/01 */ -void putdistance(FILE *fp) -{ - /*int i;*/ - int i, j; - - for (i = 0; i < Maxspc - 1; i++) { - /*fprintf(fp, "%.5f ", Distanmat[i]/100.0);*/ - for ( j = 0; j < 26; j++ ) { - fputc( Identif[i][j], fp ); /*CZ*/ - } - fprintf(fp, "%.5f\n", Distanmat[i]/100.0); - } - fprintf(fp, "\n"); - -} - - - - -/* first lines of EPSF likelihood mapping file */ -void initps(FILE *ofp) -{ - /* CZ */ -} - -/* plot one point of likelihood mapping analysis */ -void plotlmpoint(FILE *ofp, double w1, double w2) -{ - /* CZ */ -} - -/* last lines of EPSF likelihood mapping file */ -void finishps(FILE *ofp) -{ - /* CZ */ -} - -/* computes LM point from the three log-likelihood values, - plots the point, and does some statistics */ -void makelmpoint(FILE *fp, double b1, double b2, double b3) -{ - double w1, w2, w3, temp; - unsigned char qpbranching; - double temp1, temp2, temp3, onethird; - unsigned char discreteweight[3], treebits[3]; - - onethird = 1.0/3.0; - treebits[0] = (unsigned char) 1; - treebits[1] = (unsigned char) 2; - treebits[2] = (unsigned char) 4; - - /* sort in descending order */ - qweight[0] = b1; - qweight[1] = b2; - qweight[2] = b3; - sort3doubles(qweight, qworder); - - /* compute Bayesian weights */ - qweight[qworder[1]] = exp(qweight[qworder[1]]-qweight[qworder[0]]); - qweight[qworder[2]] = exp(qweight[qworder[2]]-qweight[qworder[0]]); - qweight[qworder[0]] = 1.0; - temp = qweight[0] + qweight[1] + qweight[2]; - qweight[0] = qweight[0]/temp; - qweight[1] = qweight[1]/temp; - qweight[2] = qweight[2]/temp; - - /* plot one point in likelihood mapping triangle */ - w1 = qweight[0]; - w2 = qweight[1]; - w3 = qweight[2]; - plotlmpoint(fp, w1, w2); - - /* check areas 1,2,3 */ - if (treebits[qworder[0]] == 1) ar1++; - else if (treebits[qworder[0]] == 2) ar2++; - else ar3++; - - /* check out regions 1,2,3,4,5,6,7 */ - - /* 100 distribution */ - temp1 = 1.0 - qweight[qworder[0]]; - sqdiff[0] = temp1*temp1 + - qweight[qworder[1]]*qweight[qworder[1]] + - qweight[qworder[2]]*qweight[qworder[2]]; - discreteweight[0] = treebits[qworder[0]]; - - /* 110 distribution */ - temp1 = 0.5 - qweight[qworder[0]]; - temp2 = 0.5 - qweight[qworder[1]]; - sqdiff[1] = temp1*temp1 + temp2*temp2 + - qweight[qworder[2]]*qweight[qworder[2]]; - discreteweight[1] = treebits[qworder[0]] + treebits[qworder[1]]; - - /* 111 distribution */ - temp1 = onethird - qweight[qworder[0]]; - temp2 = onethird - qweight[qworder[1]]; - temp3 = onethird - qweight[qworder[2]]; - sqdiff[2] = temp1 * temp1 + temp2 * temp2 + temp3 * temp3; - discreteweight[2] = (unsigned char) 7; - - /* sort in descending order */ - sort3doubles(sqdiff, sqorder); - - qpbranching = (unsigned char) discreteweight[sqorder[2]]; - - if (qpbranching == 1) { - reg1++; - if (w2 < w3) reg1l++; - else reg1r++; - } - if (qpbranching == 2) { - reg2++; - if (w1 < w3) reg2d++; - else reg2u++; - } - if (qpbranching == 4) { - reg3++; - if (w1 < w2) reg3d++; - else reg3u++; - } - if (qpbranching == 3) { - reg4++; - if (w1 < w2) reg4d++; - else reg4u++; - } - if (qpbranching == 6) { - reg5++; - if (w2 < w3) reg5l++; - else reg5r++; - } - if (qpbranching == 5) { - reg6++; - if (w1 < w3) reg6d++; - else reg6u++; - } - if (qpbranching == 7) reg7++; -} - -/* print tree statistics */ -void printtreestats(FILE *ofp) -{ - int i, j, besttree; - double bestlkl, difflkl, difflklps, temp, sum; - - /* find best tree */ - besttree = 0; - bestlkl = ulkl[0]; - for (i = 1; i < numutrees; i++) - if (ulkl[i] > bestlkl) { - besttree = i; - bestlkl = ulkl[i]; - } - - fprintf(ofp, "\n\nCOMPARISON OF USER TREES (NO CLOCK)\n\n"); - fprintf(ofp, "Tree log L difference S.E. Significantly worse\n"); - fprintf(ofp, "--------------------------------------------------------\n"); - for (i = 0; i < numutrees; i++) { - difflkl = ulkl[besttree]-ulkl[i]; - fprintf(ofp, "%2d %10.2f %8.2f ", i+1, ulkl[i], difflkl); - if (i == besttree) { - fprintf(ofp, " <----------------- best tree"); - } else { - /* compute variance of Log L differences over sites */ - difflklps = difflkl/(double)Maxsite; - sum = 0.0; - for (j = 0; j < Numptrn; j++) { - temp = allsites[besttree][j] - allsites[i][j] - difflklps; - sum += temp*temp*Weight[j]; - } - sum = sqrt(fabs(sum/(Maxsite-1.0)*Maxsite)); - fprintf(ofp, "%11.2f ", sum); - if (difflkl > 1.96*sum) - fprintf(ofp, "yes"); - else - fprintf(ofp, "no"); - } - fprintf(ofp, "\n"); - } - fprintf(ofp, "\nThis test (5%% significance) follows Kishino and Hasegawa (1989).\n"); - - if (compclock) { - - /* find best tree */ - besttree = 0; - bestlkl = ulklc[0]; - for (i = 1; i < numutrees; i++) - if (ulklc[i] > bestlkl) { - besttree = i; - bestlkl = ulklc[i]; - } - - fprintf(ofp, "\n\nCOMPARISON OF USER TREES (WITH CLOCK)\n\n"); - fprintf(ofp, "Tree log L difference S.E. Significantly worse\n"); - fprintf(ofp, "--------------------------------------------------------\n"); - for (i = 0; i < numutrees; i++) { - difflkl = ulklc[besttree]-ulklc[i]; - fprintf(ofp, "%2d %10.2f %8.2f ", i+1, ulklc[i], difflkl); - if (i == besttree) { - fprintf(ofp, " <----------------- best tree"); - } else { - /* compute variance of Log L differences over sites */ - difflklps = difflkl/(double)Maxsite; - sum = 0.0; - for (j = 0; j < Numptrn; j++) { - temp = allsitesc[besttree][j] - allsitesc[i][j] - difflklps; - sum += temp*temp*Weight[j]; - } - sum = sqrt(fabs(sum/(Maxsite-1.0)*Maxsite)); - fprintf(ofp, "%11.2f ", sum); - if (difflkl > 1.96*sum) - fprintf(ofp, "yes"); - else - fprintf(ofp, "no"); - } - fprintf(ofp, "\n"); - } - fprintf(ofp, "\nThis test (5%% significance) follows Kishino and Hasegawa (1989).\n"); - } -} - -/* time stamp */ -void timestamp(FILE* ofp) -{ - double timespan; - double cpuspan; - timespan = difftime(Stoptime, Starttime); - cpuspan = ((double) (Stopcpu - Startcpu) / CLOCKS_PER_SEC); - fprintf(ofp, "\n\nTIME STAMP\n\n"); - fprintf(ofp, "Date and time: %s", asctime(localtime(&Starttime)) ); - fprintf(ofp, "Runtime (excl. input) : %.0f seconds (= %.1f minutes = %.1f hours)\n", - timespan, timespan/60., timespan/3600.); - fprintf(ofp, "Runtime (incl. input) : %.0f seconds (= %.1f minutes = %.1f hours)\n", - fulltime, fulltime/60., fulltime/3600.); -#ifdef TIMEDEBUG - fprintf(ofp, "CPU time (incl. input): %.0f seconds (= %.1f minutes = %.1f hours)\n\n", - fullcpu, fullcpu/60., fullcpu/3600.); -#endif /* TIMEDEBUG */ - -} - -/* extern int bestrfound; */ - -/* write output file */ -void writeoutputfile(FILE *ofp, int part) -{ - /* CZ */ -} - - -/******************************************************************************/ -/* timer routines */ -/******************************************************************************/ - -/* start timer */ -void starttimer() -{ - time(&time0); - time1 = time0; -} - -/* check remaining time and print message if necessary */ -void checktimer(uli numqts) -{ - double tc2, mintogo, minutes, hours; - - time(&time2); - if ( (time2 - time1) > 900) { /* generate message every 15 minutes */ - /* every 900 seconds */ - /* percentage of completed quartets */ - if (mflag == 0) { - mflag = 1; - FPRINTF(STDOUTFILE "\n"); - } - tc2 = 100.*numqts/Numquartets; - mintogo = (100.0-tc2) * - (double) (time2-time0)/60.0/tc2; - hours = floor(mintogo/60.0); - minutes = mintogo - 60.0*hours; - FPRINTF(STDOUTFILE "%.2f%%", tc2); - FPRINTF(STDOUTFILE " completed (remaining"); - FPRINTF(STDOUTFILE " time: %.0f", hours); - FPRINTF(STDOUTFILE " hours %.0f", minutes); - FPRINTF(STDOUTFILE " minutes)\n"); - fflush(STDOUT); - time1 = time2; - } - -} - -/* check remaining time and print message if necessary */ -void checktimer2(uli numqts, uli all, int flag) -{ - double tc2, mintogo, minutes, hours; - - static time_t tt1; - static time_t tt2; - - if (flag == 1) { - time(&tt1); - time(&tt2); - } else { - time(&tt2); - if ( (tt2 - tt1) > 900) { /* generate message every 15 minutes */ - /* every 900 seconds */ - /* percentage of completed quartets */ - if (mflag == 0) { - mflag = 1; - FPRINTF(STDOUTFILE "\n"); - } - tc2 = 100.*numqts/Numquartets; - mintogo = (100.0-tc2) * - (double) (tt2-time0)/60.0/tc2; - hours = floor(mintogo/60.0); - minutes = mintogo - 60.0*hours; - FPRINTF(STDOUTFILE "%.2f%%", tc2); - FPRINTF(STDOUTFILE " completed (remaining"); - FPRINTF(STDOUTFILE " time: %.0f", hours); - FPRINTF(STDOUTFILE " hours %.0f", minutes); - FPRINTF(STDOUTFILE " minutes)\n"); - fflush(STDOUT); - tt1 = tt2; - } - } -} - -void resetqblocktime(timearray_t *ta) -{ - ta->quartcpu += ta->quartblockcpu; - ta->quartblockcpu = 0.0; - ta->quarttime += ta->quartblocktime; - ta->quartblocktime = 0.0; -} /* resetqblocktime */ - - -void resetpblocktime(timearray_t *ta) -{ - ta->puzzcpu += ta->puzzblockcpu; - ta->puzzblockcpu = 0.0; - ta->puzztime += ta->puzzblocktime; - ta->puzzblocktime = 0.0; -} /* resetpblocktime */ - - -#ifdef TIMEDEBUG -void printtimearr(timearray_t *ta) -{ -# if ! PARALLEL - int PP_Myid; - PP_Myid = -1; -# endif - printf("(%2d) MMCPU: %11ld / %11ld \n", PP_Myid, ta->maxcpu, ta->mincpu); - printf("(%2d) CTick: %11.6f [tks] / %11.6f [s] \n", PP_Myid, ta->mincputick, ta->mincputicktime); - - printf("(%2d) MMTIM: %11ld / %11ld \n", PP_Myid, ta->maxtime, ta->mintime); - - printf("(%2d) Mxblk: %11.6e / %11.6e \n", PP_Myid, ta->maxcpublock, ta->maxtimeblock); - printf("(%2d) Mnblk: %11.6e / %11.6e \n", PP_Myid, ta->mincpublock, ta->mintimeblock); - - printf("(%2d) Gnrl: %11.6e / %11.6e \n", PP_Myid, ta->generalcpu, ta->generaltime); - printf("(%2d) Optn: %11.6e / %11.6e \n", PP_Myid, ta->optionscpu, ta->optionstime); - printf("(%2d) Estm: %11.6e / %11.6e \n", PP_Myid, ta->paramestcpu, ta->paramesttime); - printf("(%2d) Qurt: %11.6e / %11.6e \n", PP_Myid, ta->quartcpu, ta->quarttime); - printf("(%2d) QBlk: %11.6e / %11.6e \n", PP_Myid, ta->quartblockcpu, ta->quartblocktime); - printf("(%2d) QMax: %11.6e / %11.6e \n", PP_Myid, ta->quartmaxcpu, ta->quartmaxtime); - printf("(%2d) QMin: %11.6e / %11.6e \n", PP_Myid, ta->quartmincpu, ta->quartmintime); - - printf("(%2d) Puzz: %11.6e / %11.6e \n", PP_Myid, ta->puzzcpu, ta->puzztime); - printf("(%2d) PBlk: %11.6e / %11.6e \n", PP_Myid, ta->puzzblockcpu, ta->puzzblocktime); - printf("(%2d) PMax: %11.6e / %11.6e \n", PP_Myid, ta->puzzmaxcpu, ta->puzzmaxtime); - printf("(%2d) PMin: %11.6e / %11.6e \n", PP_Myid, ta->puzzmincpu, ta->puzzmintime); - - printf("(%2d) Tree: %11.6e / %11.6e \n", PP_Myid, ta->treecpu, ta->treetime); - printf("(%2d) TBlk: %11.6e / %11.6e \n", PP_Myid, ta->treeblockcpu, ta->treeblocktime); - printf("(%2d) TMax: %11.6e / %11.6e \n", PP_Myid, ta->treemaxcpu, ta->treemaxtime); - printf("(%2d) TMin: %11.6e / %11.6e \n", PP_Myid, ta->treemincpu, ta->treemintime); - - printf("(%2d) C/T : %11.6e / %11.6e \n", PP_Myid, - (ta->generalcpu + ta->optionscpu + ta->paramestcpu + ta->quartblockcpu + ta->puzzblockcpu + ta->treeblockcpu), - (ta->generaltime + ta->optionstime + ta->paramesttime + ta->quartblocktime + ta->puzzblocktime + ta->treeblocktime)); - printf("(%2d) CPU: %11.6e / Time: %11.6e \n", PP_Myid, ta->cpu, ta->time); - printf("(%2d) aCPU: %11.6e / aTime: %11.6e \n", PP_Myid, ta->fullcpu, ta->fulltime); - -} /* printtimearr */ -#endif /* TIMEDEBUG */ - -char *jtype [7]; - -void inittimearr(timearray_t *ta) -{ - clock_t c0, c1, c2; - - jtype[OVERALL] = "OVERALL"; - jtype[GENERAL] = "GENERAL"; - jtype[OPTIONS] = "OPTIONS"; - jtype[PARAMEST] = "PARAMeter ESTimation"; - jtype[QUARTETS] = "QUARTETS"; - jtype[PUZZLING] = "PUZZLING steps"; - jtype[TREEEVAL] = "TREE EVALuation"; - ta->currentjob = GENERAL; - - c1 = clock(); - c2 = clock(); - while (c1 == c2) - c2 = clock(); - ta->mincputick = (double)(c2 - c1); - ta->mincputicktime = ((double)(c2 - c1))/CLOCKS_PER_SEC; - - ta->tempcpu = clock(); - ta->tempcpustart = ta->tempcpu; - ta->tempfullcpu = ta->tempcpu; - time(&(ta->temptime)); - ta->temptimestart = ta->temptime; - ta->tempfulltime = ta->temptime; - - c0=0; c1=0; c2=(clock_t)((2 * c1) + 1);; - while (c1 < c2) { - c0 = c1; - c1 = c2; - c2 = (clock_t)((2 * c1) + 1); - } - if (c1 == c2) ta->maxcpu=c0; - if (c1 > c2) ta->maxcpu=c1; - - c0=0; c1=0; c2=(clock_t)((2 * c1) - 1); - while (c1 > c2) { - c0 = c1; - c1 = c2; - c2 = (clock_t)((2 * c1) - 1); - } - if (c1 == c2) ta->mincpu=c0; - if (c1 < c2) ta->mincpu=c1; - - - - ta->maxtime = 0; - ta->mintime = 0; - - ta->maxcpublock = 0; - ta->mincpublock = DBL_MAX; - ta->maxtimeblock = 0; - ta->mintimeblock = DBL_MAX; - - ta->cpu = 0.0; - ta->time = 0.0; - - ta->fullcpu = 0.0; - ta->fulltime = 0.0; - - ta->generalcpu = 0.0; - ta->optionscpu = 0.0; - ta->paramestcpu = 0.0; - ta->quartcpu = 0.0; - ta->quartblockcpu = 0.0; - ta->quartmaxcpu = 0.0; - ta->quartmincpu = ((double) ta->maxcpu)/CLOCKS_PER_SEC; - ta->puzzcpu = 0.0; - ta->puzzblockcpu = 0.0; - ta->puzzmaxcpu = 0.0; - ta->puzzmincpu = ((double) ta->maxcpu)/CLOCKS_PER_SEC; - ta->treecpu = 0.0; - ta->treeblockcpu = 0.0; - ta->treemaxcpu = 0.0; - ta->treemincpu = ((double) ta->maxcpu)/CLOCKS_PER_SEC; - - ta->generaltime = 0.0; - ta->optionstime = 0.0; - ta->paramesttime = 0.0; - ta->quarttime = 0.0; - ta->quartblocktime = 0.0; - ta->quartmaxtime = 0.0; - ta->quartmintime = DBL_MAX; - ta->puzztime = 0.0; - ta->puzzblocktime = 0.0; - ta->puzzmaxtime = 0.0; - ta->puzzmintime = DBL_MAX; - ta->treetime = 0.0; - ta->treeblocktime = 0.0; - ta->treemaxtime = 0.0; - ta->treemintime = DBL_MAX; -} /* inittimearr */ - - -/***************/ - -void addup(int jobtype, clock_t c1, clock_t c2, time_t t1, time_t t2, timearray_t *ta) -{ - double c, - t; - - if (t2 != t1) t = difftime(t2, t1); - else t = 0.0; - - if (c2 < c1) - c = ((double)(c2 - ta->mincpu))/CLOCKS_PER_SEC + - ((double)(ta->maxcpu - c1))/CLOCKS_PER_SEC; - else - c = ((double)(c2 - c1))/CLOCKS_PER_SEC; - - if (jobtype != OVERALL) { - - if (ta->mincpublock > c) ta->mincpublock = c; - if (ta->maxcpublock < c) ta->maxcpublock = c; - if (ta->mintimeblock > t) ta->mintimeblock = t; - if (ta->maxtimeblock < t) ta->maxtimeblock = t; - - switch (jobtype) { - case GENERAL: ta->generalcpu += c; - ta->generaltime += t; - break; - case OPTIONS: ta->optionscpu += c; - ta->optionstime += t; - break; - case PARAMEST: ta->paramestcpu += c; - ta->paramesttime += t; - break; - case QUARTETS: ta->quartblockcpu += c; - ta->quartblocktime += t; - if (ta->quartmincpu > c) ta->quartmincpu = c; - if (ta->quartmaxcpu < c) ta->quartmaxcpu = c; - if (ta->quartmintime > t) ta->quartmintime = t; - if (ta->quartmaxtime < t) ta->quartmaxtime = t; - break; - case PUZZLING: ta->puzzblockcpu += c; - ta->puzzblocktime += t; - if (ta->puzzmincpu > c) ta->puzzmincpu = c; - if (ta->puzzmaxcpu < c) ta->puzzmaxcpu = c; - if (ta->puzzmintime > t) ta->puzzmintime = t; - if (ta->puzzmaxtime < t) ta->puzzmaxtime = t; - break; - case TREEEVAL: ta->treeblockcpu += c; - ta->treeblocktime += t; - if (ta->treemincpu > c) ta->treemincpu = c; - if (ta->treemaxcpu < c) ta->treemaxcpu = c; - if (ta->treemintime > t) ta->treemintime = t; - if (ta->treemaxtime < t) ta->treemaxtime = t; - break; - } - ta->cpu += c; - ta->time += t; - - } else { - ta->fullcpu += c; - ta->fulltime += t; - } - -# ifdef TIMEDEBUG - { -# if ! PARALLEL - int PP_Myid = -1; -# endif /* !PARALLEL */ - printf("(%2d) CPU: +%10.6f / Time: +%10.6f (%s)\n", PP_Myid, c, t, jtype[jobtype]); - printf("(%2d) CPU: %11.6f / Time: %11.6f (%s)\n", PP_Myid, ta->cpu, ta->time, jtype[jobtype]); - printf("(%2d) CPU: %11.6f / Time: %11.6f (%s)\n", PP_Myid, ta->fullcpu, ta->fulltime, jtype[jobtype]); - } -# endif /* TIMEDEBUG */ -} /* addup */ - - -/***************/ - - -void addtimes(int jobtype, timearray_t *ta) -{ - clock_t tempc; - time_t tempt; - - time(&tempt); - tempc = clock(); - - if ((tempc < ta->tempfullcpu) || (jobtype == OVERALL)) { /* CPU counter overflow for overall time */ - addup(OVERALL, ta->tempfullcpu, tempc, ta->tempfulltime, tempt, ta); - ta->tempfullcpu = tempc; - ta->tempfulltime = tempt; - if (jobtype == OVERALL) { - addup(ta->currentjob, ta->tempcpustart, tempc, ta->temptimestart, tempt, ta); - ta->tempcpustart = ta->tempcpu; - ta->tempcpu = tempc; - ta->temptimestart = ta->temptime; - ta->temptime = tempt; - } - } - - if((jobtype != ta->currentjob) && (jobtype != OVERALL)) { /* change of job type */ - addup(ta->currentjob, ta->tempcpustart, ta->tempcpu, ta->temptimestart, ta->temptime, ta); - ta->tempcpustart = ta->tempcpu; - ta->tempcpu = tempc; - ta->temptimestart = ta->temptime; - ta->temptime = tempt; - ta->currentjob = jobtype; - } - - if (tempc < ta->tempcpustart) { /* CPU counter overflow */ - addup(jobtype, ta->tempcpustart, tempc, ta->temptimestart, tempt, ta); - ta->tempcpustart = ta->tempcpu; - ta->tempcpu = tempc; - ta->temptimestart = ta->temptime; - ta->temptime = tempt; - } - -} /* addtimes */ - - - -/******************************************************************************/ - -/* estimate parameters of substitution process and rate heterogeneity - no tree - n-taxon tree is not needed because of quartet method or NJ tree topology */ -void estimateparametersnotree() -{ - int it, nump, change; - double TSold, YRold, FIold, GEold; - - it = 0; - nump = 0; - - /* count number of parameters */ - if (data_optn == NUCLEOTIDE && optim_optn) nump++; - if (fracinv_optim || grate_optim) nump++; - - do { /* repeat until nothing changes any more */ - it++; - change = FALSE; - - /* optimize substitution parameters */ - if (data_optn == NUCLEOTIDE && optim_optn) { - - TSold = TSparam; - YRold = YRparam; - - - /* - * optimize - */ - - FPRINTF(STDOUTFILE "Optimizing missing substitution process parameters\n"); - fflush(STDOUT); - - if (qcalg_optn) { /* quartet sampling */ - optimseqevolparamsq(); - } else { /* NJ tree */ - tmpfp = tmpfile(); - njtree(tmpfp); - rewind(tmpfp); - readusertree(tmpfp); - closefile(tmpfp); - optimseqevolparamst(); - } - - computedistan(); /* update ML distances */ - - /* same tolerance as 1D minimization */ - if ((fabs(TSparam - TSold) > 3.3*PEPS1) || - (fabs(YRparam - YRold) > 3.3*PEPS1) - ) change = TRUE; - - } - - /* optimize rate heterogeneity variables */ - if (fracinv_optim || grate_optim) { - - FIold = fracinv; - GEold = Geta; - - - /* - * optimize - */ - - FPRINTF(STDOUTFILE "Optimizing missing rate heterogeneity parameters\n"); - fflush(STDOUT); - /* compute NJ tree */ - tmpfp = tmpfile(); - njtree(tmpfp); - /* use NJ tree topology to estimate parameters */ - rewind(tmpfp); - readusertree(tmpfp); - closefile(tmpfp); - - optimrateparams(); - computedistan(); /* update ML distances */ - - - /* same tolerance as 1D minimization */ - if ((fabs(fracinv - FIold) > 3.3*PEPS2) || - (fabs(Geta - GEold) > 3.3*PEPS2) - ) change = TRUE; - - } - - if (nump == 1) return; - - } while (it != MAXITS && change); - - return; -} - - -/* estimate parameters of substitution process and rate heterogeneity - tree - same as above but here the n-taxon tree is already in memory */ -void estimateparameterstree() -{ - int it, nump, change; - double TSold, YRold, FIold, GEold; - - it = 0; - nump = 0; - - /* count number of parameters */ - if (data_optn == NUCLEOTIDE && optim_optn) nump++; - if (fracinv_optim || grate_optim) nump++; - - do { /* repeat until nothing changes any more */ - it++; - change = FALSE; - - /* optimize substitution process parameters */ - if (data_optn == NUCLEOTIDE && optim_optn) { - - TSold = TSparam; - YRold = YRparam; - - - /* - * optimize - */ - - FPRINTF(STDOUTFILE "Optimizing missing substitution process parameters\n"); - fflush(STDOUT); - optimseqevolparamst(); - computedistan(); /* update ML distances */ - - - /* same tolerance as 1D minimization */ - if ((fabs(TSparam - TSold) > 3.3*PEPS1) || - (fabs(YRparam - YRold) > 3.3*PEPS1) - ) change = TRUE; - - } - - /* optimize rate heterogeneity variables */ - if (fracinv_optim || grate_optim) { - - FIold = fracinv; - GEold = Geta; - - - /* - * optimize - */ - - FPRINTF(STDOUTFILE "Optimizing missing rate heterogeneity parameters\n"); - fflush(STDOUT); - optimrateparams(); - computedistan(); /* update ML distances */ - - - /* same tolerance as 1D minimization */ - if ((fabs(fracinv - FIold) > 3.3*PEPS2) || - (fabs(Geta - GEold) > 3.3*PEPS2) - ) change = TRUE; - - } - - if (nump == 1) return; - - } while (it != MAXITS && change); - - return; -} - - -/******************************************************************************/ -/* exported from main */ -/******************************************************************************/ - -void compute_quartlklhds(int a, int b, int c, int d, double *d1, double *d2, double *d3, int approx) -{ - if (approx == APPROX) { - - *d1 = quartet_alklhd(a,b, c,d); /* (a,b)-(c,d) */ - *d2 = quartet_alklhd(a,c, b,d); /* (a,c)-(b,d) */ - *d3 = quartet_alklhd(a,d, b,c); /* (a,d)-(b,c) */ - - } else /* approx == EXACT */ { - - *d1 = quartet_lklhd(a,b, c,d); /* (a,b)-(c,d) */ - *d2 = quartet_lklhd(a,c, b,d); /* (a,c)-(b,d) */ - *d3 = quartet_lklhd(a,d, b,c); /* (a,d)-(b,c) */ - - } -} - -/***************************************************************/ - -void recon_tree() -{ - int i; -# if ! PARALLEL - int a, b, c; - uli nq; - double tc2, mintogo, minutes, hours; -# endif - - /* allocate memory for taxon list of bad quartets */ - badtaxon = new_ulivector(Maxspc); - for (i = 0; i < Maxspc; i++) badtaxon[i] = 0; - - /* allocate variable used for randomizing input order */ - trueID = new_ivector(Maxspc); - - /* allocate memory for quartets */ - quartetinfo = mallocquartets(Maxspc); - - /* prepare for consensus tree analysis */ - initconsensus(); - - if (!(readquart_optn) || (readquart_optn && savequart_optn)) { - /* compute quartets */ - FPRINTF(STDOUTFILE "Computing quartet maximum likelihood trees\n"); - fflush(STDOUT); - computeallquartets(); - } - - if (savequart_optn) - writeallquarts(Maxspc, ALLQUART, quartetinfo); - if (readquart_optn) { - int xx1, xx2, xx3, xx4, count; - readallquarts (Maxspc, ALLQUART, quartetinfo); - if (show_optn) { /* list all unresolved quartets */ - openfiletowrite(&unresfp, UNRESOLVED, "unresolved quartet trees"); - fprintf(unresfp, "List of all completely unresolved quartets:\n\n"); - } - - /* initialize bad quartet memory */ - for (count = 0; count < Maxspc; count++) badtaxon[count] = 0; - badqs = 0; - - for (xx4 = 3; xx4 < Maxspc; xx4++) - for (xx3 = 2; xx3 < xx4; xx3++) - for (xx2 = 1; xx2 < xx3; xx2++) - for (xx1 = 0; xx1 < xx2; xx1++) { - if (readquartet(xx1, xx2, xx3, xx4) == 7) { - badqs++; - badtaxon[xx1]++; - badtaxon[xx2]++; - badtaxon[xx3]++; - badtaxon[xx4]++; - if (show_optn) { - fputid10(unresfp, xx1); - fprintf(unresfp, " "); - fputid10(unresfp, xx2); - fprintf(unresfp, " "); - fputid10(unresfp, xx3); - fprintf(unresfp, " "); - fputid (unresfp, xx4); - fprintf(unresfp, "\n"); - } - } - } /* end for xx4; for xx3; for xx2; for xx1 */ - if (show_optn) /* list all unresolved quartets */ - fclose(unresfp); - } /* readquart_optn */ - -# if PARALLEL - PP_SendAllQuarts(numquarts(Maxspc), quartetinfo); -# endif /* PARALLEL */ - - FPRINTF(STDOUTFILE "Computing quartet puzzling tree\n"); - fflush(STDOUT); - - /* start timer - percentage of completed trees */ - time(&time0); - time1 = time0; - mflag = 0; - - /* open file for chronological list of puzzling step trees */ - if((listqptrees == PSTOUT_LIST) || (listqptrees == PSTOUT_LISTORDER)) - openfiletowrite(&qptlist, OUTPTLIST, "puzzling step trees (chonological)"); - -# if PARALLEL - { - PP_SendDoPermutBlock(Numtrial); - } -# else - addtimes(GENERAL, &tarr); - for (Currtrial = 0; Currtrial < Numtrial; Currtrial++) { - - /* randomize input order */ - chooser(Maxspc, Maxspc, trueID); - - /* initialize tree */ - inittree(); - - /* adding all other leafs */ - for (i = 3; i < Maxspc; i++) { - - /* clear all edgeinfos */ - resetedgeinfo(); - - /* clear counter of quartets */ - nq = 0; - - /* - * core of quartet puzzling algorithm - */ - - for (a = 0; a < nextleaf - 2; a++) - for (b = a + 1; b < nextleaf - 1; b++) - for (c = b + 1; c < nextleaf; c++) { - - /* check which two _leaves_ out of a, b, c - are closer related to each other than - to leaf i according to a least squares - fit of the continous Baysian weights to the - seven trivial "attractive regions". We assign - a score of 1 to all edges between these two leaves - chooseA and chooseB */ - - checkquartet(a, b, c, i); - incrementedgeinfo(chooseA, chooseB); - - nq++; - - /* generate message every 15 minutes */ - - /* check timer */ - time(&time2); - if ( (time2 - time1) > 900) { - /* every 900 seconds */ - /* percentage of completed trees */ - if (mflag == 0) { - FPRINTF(STDOUTFILE "\n"); - mflag = 1; - } - tc2 = 100.0*Currtrial/Numtrial + - 100.0*nq/Numquartets/Numtrial; - mintogo = (100.0-tc2) * - (double) (time2-time0)/60.0/tc2; - hours = floor(mintogo/60.0); - minutes = mintogo - 60.0*hours; - FPRINTF(STDOUTFILE "%2.2f%%", tc2); - FPRINTF(STDOUTFILE " completed (remaining"); - FPRINTF(STDOUTFILE " time: %.0f", hours); - FPRINTF(STDOUTFILE " hours %.0f", minutes); - FPRINTF(STDOUTFILE " minutes)\n"); - fflush(STDOUT); - time1 = time2; - } - } - - /* find out which edge has the lowest edgeinfo */ - minimumedgeinfo(); - - /* add the next leaf on minedge */ - addnextleaf(minedge); - } - - /* compute bipartitions of current tree */ - computebiparts(); - makenewsplitentries(); - - { - int *ctree, startnode; - char *trstr; - treelistitemtype *treeitem; - ctree = initctree(); - copytree(ctree); - startnode = sortctree(ctree); - trstr=sprintfctree(ctree, psteptreestrlen); - - - treeitem = addtree2list(&trstr, 1, &psteptreelist, &psteptreenum, &psteptreesum); - - if((listqptrees == PSTOUT_LIST) - || (listqptrees == PSTOUT_LISTORDER)) { - /* print: order no/# topol per this id/tree id/sum of unique topologies/sum of trees so far */ - fprintf(qptlist, "%ld.\t1\t%d\t%d\t%d\t%d\n", - Currtrial + 1, (*treeitem).count, (*treeitem).id, psteptreenum, psteptreesum); - } - -# ifdef VERBOSE1 - printf("%s\n", trstr); - printfsortedpstrees(psteptreelist); -# endif - freectree(&ctree); - } - - - - /* free tree before building the next tree */ - freetree(); - - addtimes(PUZZLING, &tarr); - } -# endif /* PARALLEL */ - - /* close file for list of puzzling step trees */ - if((listqptrees == PSTOUT_LIST) || (listqptrees == PSTOUT_LISTORDER)) - closefile(qptlist); - - if (mflag == 1) FPRINTF(STDOUTFILE "\n"); - - /* garbage collection */ - free(splitcomp); - free_ivector(trueID); - -# if ! PARALLEL - free_cmatrix(biparts); -# endif /* PARALLEL */ - - freequartets(); - - /* compute majority rule consensus tree */ - makeconsensus(); - - /* write consensus tree to tmp file */ - tmpfp = tmpfile(); - writeconsensustree(tmpfp); -} /* recon_tree */ - -/***************************************************************/ - -void map_lklhd() -{ - int i, a, a1, a2, b, b1, b2, c, c1, c2, d; - uli nq; - double logs[3], d1, d2, d3, temp; - ivector qts, mlorder, gettwo; - /* reset variables */ - ar1 = ar2 = ar3 = 0; - reg1 = reg2 = reg3 = reg4 = reg5 = reg6 = reg7 = 0; - reg1l = reg1r = reg2u = reg2d = reg3u = reg3d = reg4u = - reg4d = reg5l = reg5r = reg6u = reg6d = 0; - - /* place for random quartet */ - qts = new_ivector(4); - - /* initialize output file */ - openfiletowrite(&trifp, TRIANGLE, "Postscript output"); - initps(trifp); - FPRINTF(STDOUTFILE "Performing likelihood mapping analysis\n"); - fflush(STDOUT); - - /* start timer */ - starttimer(); - nq = 0; - mflag = 0; - - addtimes(GENERAL, &tarr); - if (lmqts == 0) { /* all possible quartets */ - - if (numclust == 4) { /* four-cluster analysis */ - - for (a = 0; a < clustA; a++) - for (b = 0; b < clustB; b++) - for (c = 0; c < clustC; c++) - for (d = 0; d < clustD; d++) { - - nq++; - - /* check timer */ - checktimer(nq); - - /* maximum likelihood values */ - /* approximate ML is sufficient */ - compute_quartlklhds(clusterA[a],clusterB[b],clusterC[c],clusterD[d],&d1,&d2,&d3, APPROX); - - /* draw point for LM analysis */ - makelmpoint(trifp, d1, d2, d3); - addtimes(QUARTETS, &tarr); - - } - } - - if (numclust == 3) { /* three-cluster analysis */ - - gettwo = new_ivector(2); - - for (a = 0; a < clustA; a++) - for (b = 0; b < clustB; b++) - for (c1 = 0; c1 < clustC-1; c1++) - for (c2 = c1+1; c2 < clustC; c2++) { - - nq++; - - /* check timer */ - checktimer(nq); - - /* maximum likelihood values */ - /* approximate ML is sufficient */ - compute_quartlklhds(clusterA[a],clusterB[b],clusterC[c1],clusterC[c2],&d1,&d2,&d3, APPROX); - - /* randomize order of d2 and d3 */ - if (randominteger(2) == 1) { - temp = d3; - d3 = d2; - d2 = temp; - } - - /* draw point for LM analysis */ - makelmpoint(trifp, d1, d2, d3); - addtimes(QUARTETS, &tarr); - - } - free_ivector(gettwo); - } - - if (numclust == 2) { /* two-cluster analysis */ - - gettwo = new_ivector(2); - - for (a1 = 0; a1 < clustA-1; a1++) - for (a2 = a1+1; a2 < clustA; a2++) - for (b1 = 0; b1 < clustB-1; b1++) - for (b2 = b1+1; b2 < clustB; b2++) { - - nq++; - - /* check timer */ - checktimer(nq); - - /* maximum likelihood values */ - /* approximate ML is sufficient */ - compute_quartlklhds(clusterA[a1],clusterA[a2],clusterB[b1],clusterB[b2],&d1,&d2,&d3, APPROX); - - /* randomize order of d2 and d3 */ - if (randominteger(2) == 1) { - temp = d3; - d3 = d2; - d2 = temp; - } - - /* draw point for LM analysis */ - makelmpoint(trifp, d1, d2, d3); - addtimes(QUARTETS, &tarr); - - } - - free_ivector(gettwo); - } - - if (numclust == 1) { /* normal likelihood mapping (one cluster) */ - - mlorder = new_ivector(3); - -#if 0 - for (i = 3; i < Maxspc; i++) - for (a = 0; a < i - 2; a++) - for (b = a + 1; b < i - 1; b++) - for (c = b + 1; c < i; c++) - for (d = 3; d < Maxspc; d++) - for (c = 2; c < d; c++) - for (b = 1; b < c; b++) - for (a = 0; a < b; a++) -#endif - - for (i = 3; i < Maxspc; i++) - for (c = 2; c < i; c++) - for (b = 1; b < c; b++) - for (a = 0; a < b; a++) { - - nq++; - - /* check timer */ - checktimer(nq); - - /* maximum likelihood values */ - /* approximate ML is sufficient */ - compute_quartlklhds(a,b,c,i,&logs[0],&logs[1],&logs[2], APPROX); - - /* randomize order */ - chooser(3,3,mlorder); - d1 = logs[mlorder[0]]; - d2 = logs[mlorder[1]]; - d3 = logs[mlorder[2]]; - - /* draw point for LM analysis */ - makelmpoint(trifp, d1, d2, d3); - addtimes(QUARTETS, &tarr); - - } - free_ivector(mlorder); - } - - } else { /* randomly selected quartets */ - - if (numclust == 4) { /* four-cluster analysis */ - - for (lmqts = 0; lmqts < Numquartets; lmqts++) { - - nq++; - - /* check timer */ - checktimer(nq); - - /* choose random quartet */ - qts[0] = clusterA[ randominteger(clustA) ]; - qts[1] = clusterB[ randominteger(clustB) ]; - qts[2] = clusterC[ randominteger(clustC) ]; - qts[3] = clusterD[ randominteger(clustD) ]; - - /* maximum likelihood values */ - /* approximate ML is sufficient */ - compute_quartlklhds(qts[0],qts[1],qts[2],qts[3],&d1,&d2,&d3, APPROX); - - /* draw point for LM analysis */ - makelmpoint(trifp, d1, d2, d3); - addtimes(QUARTETS, &tarr); - - } - } - - if (numclust == 3) { /* three-cluster analysis */ - - gettwo = new_ivector(2); - - for (lmqts = 0; lmqts < Numquartets; lmqts++) { - - nq++; - - /* check timer */ - checktimer(nq); - - /* choose random quartet */ - qts[0] = clusterA[ randominteger(clustA) ]; - qts[1] = clusterB[ randominteger(clustB) ]; - chooser(clustC, 2, gettwo); - qts[2] = clusterC[gettwo[0]]; - qts[3] = clusterC[gettwo[1]]; - - /* maximum likelihood values */ - /* approximate ML is sufficient */ - compute_quartlklhds(qts[0],qts[1],qts[2],qts[3],&d1,&d2,&d3, APPROX); - - /* order of d2 and d3 is already randomized! */ - - /* draw point for LM analysis */ - makelmpoint(trifp, d1, d2, d3); - addtimes(QUARTETS, &tarr); - - } - - free_ivector(gettwo); - } - - if (numclust == 2) { /* two-cluster analysis */ - - gettwo = new_ivector(2); - - for (lmqts = 0; lmqts < Numquartets; lmqts++) { - - nq++; - - /* check timer */ - checktimer(nq); - - /* choose random quartet */ - chooser(clustA, 2, gettwo); - qts[0] = clusterA[gettwo[0]]; - qts[1] = clusterA[gettwo[1]]; - chooser(clustB, 2, gettwo); - qts[2] = clusterB[gettwo[0]]; - qts[3] = clusterB[gettwo[1]]; - - /* maximum likelihood values */ - /* approximate ML is sufficient */ - compute_quartlklhds(qts[0],qts[1],qts[2],qts[3],&d1,&d2,&d3, APPROX); - - /* order of d2 and d3 is already randomized! */ - - /* draw point for LM analysis */ - makelmpoint(trifp, d1, d2, d3); - addtimes(QUARTETS, &tarr); - - } - free_ivector(gettwo); - } - - if (numclust == 1) { /* normal likelihood mapping (one cluster) */ - - for (lmqts = 0; lmqts < Numquartets; lmqts++) { - - nq++; - - /* check timer */ - checktimer(nq); - - /* choose random quartet */ - chooser(Maxspc, 4, qts); - - /* maximum likelihood values */ - /* approximate ML is sufficient */ - compute_quartlklhds(qts[0],qts[1],qts[2],qts[3],&d1,&d2,&d3, APPROX); - - /* order of d1, d2, and d3 is already randomized! */ - - /* draw point for LM analysis */ - makelmpoint(trifp, d1, d2, d3); - addtimes(QUARTETS, &tarr); - - } - } - } - - finishps(trifp); - closefile(trifp); - free_ivector(qts); - -} /* map_lklhd */ - -/***************************************************************/ - -void setdefaults() { - - strcpy(INFILE, INFILEDEFAULT); - strcpy(OUTFILE, OUTFILEDEFAULT); - strcpy(TREEFILE, TREEFILEDEFAULT); - strcpy(INTREE, INTREEDEFAULT); - strcpy(DISTANCES, DISTANCESDEFAULT); - strcpy(TRIANGLE, TRIANGLEDEFAULT); - strcpy(UNRESOLVED, UNRESOLVEDDEFAULT); - strcpy(ALLQUART, ALLQUARTDEFAULT); - strcpy(ALLQUARTLH, ALLQUARTLHDEFAULT); - strcpy(OUTPTLIST, OUTPTLISTDEFAULT); - strcpy(OUTPTORDER, OUTPTORDERDEFAULT); - - usebestq_optn = FALSE; - savequartlh_optn = FALSE; - savequart_optn = FALSE; - readquart_optn = FALSE; - - randseed = -1; /* to set random random seed */ - -} /* setdefaults */ - -/***************************************************************/ - -void printversion() -{ -# if ! PARALLEL - fprintf(stderr, "puzzle (%s) %s\n", PACKAGE, VERSION); -#else - fprintf(stderr, "ppuzzle (%s) %s\n", PACKAGE, VERSION); -# endif - exit (0); -} -/***************************************************************/ - -void printusage(char *fname) -{ - fprintf(stderr, "\n\nUsage: %s [-h] [ Infilename [ UserTreeFilename ] ]\n\n", fname); -# if PARALLEL - PP_SendDone(); - MPI_Finalize(); -# endif - exit (1); -} - -/***************************************************************/ - -#ifdef HHH -void printusagehhh(char *fname) -{ - fprintf(stderr, "\n\nUsage: %s [options] [ Infilename [ UserTreeFilename ] ]\n\n", fname); - fprintf(stderr, " -h - print usage\n"); - fprintf(stderr, " -wqf - write quartet file to Infilename.allquart\n"); - fprintf(stderr, " -rqf - read quartet file from Infilename.allquart\n"); - fprintf(stderr, " -wqlb - write quart lhs to Infilename.allquartlh (binary)\n"); - fprintf(stderr, " -wqla - write quart lhs to Infilename.allquartlh (ASCII)\n"); - fprintf(stderr, " -bestq - use best quart, no basian weights\n"); - fprintf(stderr, " -randseed<#> - use <#> as random number seed, for debug purposes only\n"); -# if PARALLEL - PP_SendDone(); - MPI_Finalize(); -# endif - exit (2); -} -#endif /* HHH */ - -/***************************************************************/ - - -void scancmdline(int *argc, char **argv[]) -{ - static short infileset = 0; - static short intreefileset = 0; - short flagused; - int n; - int count, dummyint; - - for (n = 1; n < *argc; n++) { -# ifdef VERBOSE1 - printf("argv[%d] = %s\n", n, (*argv)[n]); -# endif - - flagused = FALSE; - -# ifdef HHH - dummyint = 0; - count = sscanf((*argv)[n], "-wqlb%n", &dummyint); - if (dummyint == 5) { - savequartlh_optn = TRUE; - saveqlhbin_optn = TRUE; - flagused = TRUE; - } - - dummyint = 0; - count = sscanf((*argv)[n], "-wqla%n", &dummyint); - if (dummyint == 5) { - savequartlh_optn = TRUE; - saveqlhbin_optn = FALSE; - flagused = TRUE; - } - - dummyint = 0; - count = sscanf((*argv)[n], "-wqf%n", &dummyint); - if (dummyint == 4) { - savequart_optn = TRUE; - flagused = TRUE; - } - - dummyint = 0; - count = sscanf((*argv)[n],"-rqf%n", &dummyint); - if (dummyint == 4) { - readquart_optn = TRUE; - flagused = TRUE; - } - - dummyint = 0; - count = sscanf((*argv)[n],"-bestq%n", &dummyint); - if (dummyint == 6) { - usebestq_optn = TRUE; - flagused = TRUE; - } - - dummyint = 0; - count = sscanf((*argv)[n],"-hhh%n", &dummyint); - if (dummyint==4) { - printusagehhh((*argv)[0]); - flagused = TRUE; - } -# endif /* HHH */ - - dummyint = 0; - count = sscanf((*argv)[n],"-V%n", &dummyint); - if (dummyint==2) { - printversion((*argv)[0]); - flagused = TRUE; - } - - dummyint = 0; - count = sscanf((*argv)[n],"-version%n", &dummyint); - if (dummyint==8) { - printversion((*argv)[0]); - flagused = TRUE; - } - - dummyint = 0; - count = sscanf((*argv)[n],"--version%n", &dummyint); - if (dummyint>=4) { - printversion((*argv)[0]); - flagused = TRUE; - } - - dummyint = 0; - count = sscanf((*argv)[n],"-h%n", &dummyint); - if (dummyint==2) { - printusage((*argv)[0]); - flagused = TRUE; - } - - count = sscanf((*argv)[n],"-randseed%d", &dummyint); - if (count == 1) { - randseed = dummyint; - flagused = TRUE; - } - -#if 0 - count = sscanf((*argv)[n],"-h%n", &dummyint); - if ((count == 1) && (dummyint>=2)) printusage((*argv)[0]); - - count = sscanf((*argv)[n],"-writequarts%n", &dummyint); - if (count == 1) writequartstofile = 1;; - - count = sscanf((*argv)[n],"-ws%d", &dummyint); - if (count == 1) windowsize = dummyint; -#endif - - if ((*argv)[n][0] != '-') { - if (infileset == 0) { - strcpy(INFILE, (*argv)[n]); - infileset++; - sprintf(OUTFILE ,"%s.%s", INFILE, OUTFILEEXT); - sprintf(TREEFILE ,"%s.%s", INFILE, TREEFILEEXT); - sprintf(DISTANCES ,"%s.%s", INFILE, DISTANCESEXT); - sprintf(TRIANGLE ,"%s.%s", INFILE, TRIANGLEEXT); - sprintf(UNRESOLVED ,"%s.%s", INFILE, UNRESOLVEDEXT); - sprintf(ALLQUART ,"%s.%s", INFILE, ALLQUARTEXT); - sprintf(ALLQUARTLH ,"%s.%s", INFILE, ALLQUARTLHEXT); - sprintf(OUTPTLIST ,"%s.%s", INFILE, OUTPTLISTEXT); - sprintf(OUTPTORDER ,"%s.%s", INFILE, OUTPTORDEREXT); - FPRINTF(STDOUTFILE "Input file: %s\n", INFILE); - flagused = TRUE; - } else { - if (intreefileset == 0) { - strcpy(INTREE, (*argv)[n]); - intreefileset++; - sprintf(OUTFILE ,"%s.%s", INTREE, OUTFILEEXT); - sprintf(TREEFILE ,"%s.%s", INTREE, TREEFILEEXT); - sprintf(DISTANCES ,"%s.%s", INTREE, DISTANCESEXT); - FPRINTF(STDOUTFILE "Usertree file: %s\n", INTREE); - flagused = TRUE; - } - } - } - if (flagused == FALSE) { - fprintf(stderr, "WARNING: commandline parameter %d not recognized (\"%s\")\n", n, (*argv)[n]); - } - flagused = FALSE; - } - -} /* scancmdline */ - - -/***************************************************************/ - -void inputandinit(int *argc, char **argv[]) { - - int ci; - - /* vectors used in QP and LM analysis */ - qweight = new_dvector(3); - sqdiff = new_dvector(3); - qworder = new_ivector(3); - sqorder = new_ivector(3); - - /* Initialization and parsing of Commandline */ - setdefaults(); - scancmdline(argc, argv); - - /* initialize random numbers generator */ - if (randseed >= 0) - fprintf(stderr, "WARNING: random seed set to %d for debugging!\n", randseed); - randseed = initrandom(randseed); - - psteptreelist = NULL; - psteptreesum = 0; - bestratefound = 0; - -# ifndef ALPHA - FPRINTF(STDOUTFILE "\n\n\nWELCOME TO TREE-PUZZLE %s!\n\n\n", VERSION); -# else - FPRINTF(STDOUTFILE "\n\n\nWELCOME TO TREE-PUZZLE %s%s!\n\n\n", VERSION, ALPHA); -# endif - - - /* get sequences */ - openfiletoread(&seqfp, INFILE, "sequence data"); - getsizesites(seqfp); - FPRINTF(STDOUTFILE "\nInput data set contains %d sequences of length %d\n", Maxspc, Maxseqc); - getdataset(seqfp); - closefile(seqfp); - data_optn = guessdatatype(); - - /* translate characters into format used by ML engine */ - nuc_optn = TRUE; - SH_optn = FALSE; - Seqchar = NULL; - translatedataset(); - - /* estimate base frequencies from data set */ - Freqtpm = NULL; - Basecomp = NULL; - estimatebasefreqs(); - - /* guess model of substitution */ - guessmodel(); - - /* initialize guess variables */ - auto_datatype = AUTO_GUESS; - if (data_optn == AMINOACID) auto_aamodel = AUTO_GUESS; - else auto_aamodel = AUTO_DEFAULT; - /* save guessed amino acid options */ - guessDayhf_optn = Dayhf_optn; - guessJtt_optn = Jtt_optn; - guessmtrev_optn = mtrev_optn; - guesscprev_optn = cprev_optn; - guessblosum62_optn = blosum62_optn; - guessvtmv_optn = vtmv_optn; - guesswag_optn = wag_optn; - guessauto_aamodel = auto_aamodel; - - - /* check for user specified tree */ - if ((utfp = fopen(INTREE, "r")) != NULL) { - fclose(utfp); - puzzlemode = USERTREE; - } else { - puzzlemode = QUARTPUZ; - } - - /* reserve memory for cluster LM analysis */ - clusterA = new_ivector(Maxspc); - clusterB = new_ivector(Maxspc); - clusterC = new_ivector(Maxspc); - clusterD = new_ivector(Maxspc); - - /* set options interactively */ - setoptions(); - - /* open usertree file right after start */ - if (typ_optn == TREERECON_OPTN && puzzlemode == USERTREE) { - openfiletoread(&utfp, INTREE, "user trees"); - } - - /* start main timer */ - time(&Starttime); - Startcpu=clock(); - addtimes(OPTIONS, &tarr); - - /* symmetrize doublet frequencies if specified */ - symdoublets(); - - /* initialise ML */ - mlstart(); - - /* determine how many usertrees */ - if (typ_optn == TREERECON_OPTN && puzzlemode == USERTREE) { - numutrees = 0; - do { - ci = fgetc(utfp); - if ((char) ci == ';') numutrees++; - } while (ci != EOF); - rewind(utfp); - if (numutrees < 1) { - FPRINTF(STDOUTFILE "Unable to proceed (no tree in input tree file)\n\n\n"); - exit(1); - } - } - - /* check fraction of invariable sites */ - if ((rhetmode == TWORATE || rhetmode == MIXEDRATE) && !fracinv_optim) - /* fraction of invariable site was specified manually */ - if (fracinv > MAXFI) - fracinv = MAXFI; - - addtimes(GENERAL, &tarr); - /* estimate parameters */ - if (!(typ_optn == TREERECON_OPTN && puzzlemode == USERTREE)) { - /* no tree present */ - estimateparametersnotree(); - } else { - if (utree_optn) { - /* use 1st user tree */ - readusertree(utfp); - rewind(utfp); - estimateparameterstree(); - } else { - /* don't use first user tree */ - estimateparametersnotree(); - } - } - addtimes(PARAMEST, &tarr); - - /* compute expected Ts/Tv ratio */ - if (data_optn == NUCLEOTIDE) computeexpectations(); - -} /* inputandinit */ - - - -/***************************************************************/ - -void evaluatetree(FILE *intreefp, FILE *outtreefp, int pmode, int utreenum, int maxutree, int *oldlocroot) -{ - - switch (pmode) { - case QUARTPUZ: /* read QP tree */ - readusertree(intreefp); - FPRINTF(STDOUTFILE "Computing maximum likelihood branch lengths (without clock)\n"); - fflush(STDOUT); - usertree_lklhd(); - findbestratecombination(); - break; - case USERTREE: /* read user tree */ - readusertree(intreefp); - FPRINTF(STDOUTFILE "Computing maximum likelihood branch lengths (without clock) for tree # %d\n", utreenum+1); - fflush(STDOUT); - usertree_lklhd(); - if (maxutree > 1) { - ulkl[utreenum] = Ctree->lklhd; - allsitelkl(Ctree->condlkl, allsites[utreenum]); - } - if (utreenum==0) findbestratecombination(); - break; - } - - - if (compclock) { /* clocklike branch length */ - switch (pmode) { - case QUARTPUZ: - FPRINTF(STDOUTFILE "Computing maximum likelihood branch lengths (with clock)\n"); - fflush(STDOUT); - break; - case USERTREE: - FPRINTF(STDOUTFILE "Computing maximum likelihood branch lengths (with clock) for tree # %d\n", utreenum+1); - fflush(STDOUT); - break; - } - - /* find best place for root */ - rootsearch = 0; - - if (utreenum==0) locroot = *oldlocroot; - else *oldlocroot = locroot; - - if (locroot < 0) { - locroot = findrootedge(); - rootsearch = 1; - } - /* if user-specified edge for root does not exist use displayed outgroup */ - if (!checkedge(locroot)) { - locroot = outgroup; - rootsearch = 2; - } - /* compute likelihood */ - clock_lklhd(locroot); - if (maxutree > 1) { - ulklc[utreenum] = Ctree->lklhdc; - allsitelkl(Ctree->condlkl, allsitesc[utreenum]); - } - - } - - if (clockmode == 0) - fprintf(outtreefp, "[ lh=%.6f ]", Ctree->lklhd); - else - fprintf(outtreefp, "[ lh=%.6f ]", Ctree->lklhdc); - - /* write ML branch length tree to outree file */ - clockmode = 0; /* nonclocklike branch lengths */ - fputphylogeny(outtreefp); - - /* clocklike branch lengths */ - if (compclock) { - clockmode = 1; - fputrooted(outtreefp, locroot); - } -} /* evaluatetree */ - -/***************************************************************/ - -void memcleanup() { - if (puzzlemode == QUARTPUZ && typ_optn == TREERECON_OPTN) { - free(splitfreqs); - free(splitpatterns); - free(splitsizes); - free_ivector(consconfid); - free_ivector(conssizes); - free_cmatrix(consbiparts); - free_ulivector(badtaxon); - } - free_cmatrix(Identif); - free_dvector(Freqtpm); - free_imatrix(Basecomp); - free_ivector(clusterA); - free_ivector(clusterB); - free_ivector(clusterC); - free_ivector(clusterD); - free_dvector(qweight); - free_dvector(sqdiff); - free_ivector(qworder); - free_ivector(sqorder); - freetreelist(&psteptreelist, &psteptreenum, &psteptreesum); -} /* memcleanup */ - -/***************************************************************/ - - -/******************************************************************************/ -/* main part */ -/******************************************************************************/ - -int main(int argc, char *argv[]) -{ - int i, oldlocroot=0; - - /* start main timer */ - time(&walltimestart); - cputimestart = clock(); - inittimearr(&tarr); - - - - inputandinit(&argc, &argv); - - - - /* write distance matrix */ - FPRINTF(STDOUTFILE "Writing pairwise distances to file %s\n", DISTANCES); - openfiletowrite(&dfp, DISTANCES, "pairwise distances"); - putdistance(dfp); - closefile(dfp); - - - - free_cmatrix(Seqchar); - free_cmatrix(seqchars); - - - - - /* write CPU/Wallclock times and parallel statistics */ - time(&walltimestop); - cputimestop = clock(); - addtimes(OVERALL, &tarr); - - fullcpu = tarr.fullcpu; - fulltime = tarr.fulltime; - - - - /* stop timer */ - - time(&Stoptime); - Stopcpu=clock(); - /* - timestamp(ofp); - closefile(ofp); - CZ 05/16/01*/ - - - /* printbestratecombination(stderr); */ - mlfinish(); - - FPRINTF(STDOUTFILE "\nAll results written to disk:\n"); - /*FPRINTF(STDOUTFILE " Puzzle report file: %s\n", OUTFILE);*/ - FPRINTF(STDOUTFILE " Likelihood distances: %s\n", DISTANCES); - - if (typ_optn == TREERECON_OPTN && puzzlemode != PAIRDIST) - FPRINTF(STDOUTFILE " Phylip tree file: %s\n", TREEFILE); - if (typ_optn == TREERECON_OPTN && puzzlemode == QUARTPUZ) { - if ((listqptrees == PSTOUT_ORDER) ||(listqptrees == PSTOUT_LISTORDER)) - FPRINTF(STDOUTFILE " Unique puzzling step trees: %s\n", OUTPTORDER); - if ((listqptrees == PSTOUT_LIST) ||(listqptrees == PSTOUT_LISTORDER)) - FPRINTF(STDOUTFILE " Puzzling step tree list: %s\n", OUTPTLIST); - } - if (show_optn && typ_optn == TREERECON_OPTN && puzzlemode == QUARTPUZ) - FPRINTF(STDOUTFILE " Unresolved quartets: %s\n", UNRESOLVED); - if (typ_optn == LIKMAPING_OPTN) - FPRINTF(STDOUTFILE " Likelihood mapping diagram: %s\n", TRIANGLE); - FPRINTF(STDOUTFILE "\n"); - - /* runtime message */ - FPRINTF(STDOUTFILE - "The computation took %.0f seconds (= %.1f minutes = %.1f hours)\n", - difftime(Stoptime, Starttime), difftime(Stoptime, Starttime)/60., - difftime(Stoptime, Starttime)/3600.); - FPRINTF(STDOUTFILE - " including input %.0f seconds (= %.1f minutes = %.1f hours)\n", - fulltime, fulltime/60., fulltime/3600.); - - - /* free memory */ - memcleanup(); - - - - return 0; -} - - -/* compare function for uli - sort largest numbers first */ -int ulicmp(const void *ap, const void *bp) -{ - uli a, b; - - a = *((uli *) ap); - b = *((uli *) bp); - - if (a > b) return -1; - else if (a < b) return 1; - else return 0; -} - -/* compare function for int - sort smallest numbers first */ -int intcmp(const void *ap, const void *bp) -{ - int a, b; - - a = *((int *) ap); - b = *((int *) bp); - - if (a < b) return -1; - else if (a > b) return 1; - else return 0; -} diff --git a/forester/archive/RIO/others/puzzle_dqo/src/puzzle2.c b/forester/archive/RIO/others/puzzle_dqo/src/puzzle2.c deleted file mode 100644 index ea53889..0000000 --- a/forester/archive/RIO/others/puzzle_dqo/src/puzzle2.c +++ /dev/null @@ -1,2651 +0,0 @@ -/* - * puzzle2.c - * - * - * Part of TREE-PUZZLE 5.0 (June 2000) - * - * (c) 1999-2000 by Heiko A. Schmidt, Korbinian Strimmer, - * M. Vingron, and Arndt von Haeseler - * (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler - * - * All parts of the source except where indicated are distributed under - * the GNU public licence. See http://www.opensource.org for details. - */ - - -#define EXTERN extern - -#include "puzzle.h" -#include - -#if PARALLEL -# include "sched.h" -#endif /* PARALLEL */ - - -/******************************************************************************/ -/* sequences */ -/******************************************************************************/ - -/* read ten characters of current line as identifier */ -void readid(FILE *infp, int t) -{ - int i, j, flag, ci; - - for (i = 0; i < 26; i++) { /*CZ*/ - ci = fgetc(infp); - if (ci == EOF || !isprint(ci)) { - FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (no name for sequence %d)\n\n\n", t+1); - exit(1); - } - Identif[t][i] = (char) ci; - } - /* convert leading blanks in taxon name to underscores */ - flag = FALSE; - for (i = 25; i > -1; i--) { /*CZ*/ - if (flag == FALSE) { - if (Identif[t][i] != ' ') flag = TRUE; - } else { - if (Identif[t][i] == ' ') Identif[t][i] = '_'; - } - } - /* check whether this name is already used */ - for (i = 0; i < t; i++) { /* compare with all other taxa */ - flag = TRUE; /* assume identity */ - for (j = 0; (j < 26) && (flag == TRUE); j++) /*CZ*/ - if (Identif[t][j] != Identif[i][j]) - flag = FALSE; - if (flag) { - FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (multiple occurence of sequence name '"); - fputid(STDOUT, t); - FPRINTF(STDOUTFILE "')\n\n\n"); - exit(1); - } - } -} - -/* read next allowed character */ -char readnextcharacter(FILE *ifp, int notu, int nsite) -{ - char c; - - /* ignore blanks and control characters except newline */ - do { - if (fscanf(ifp, "%c", &c) != 1) { - FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (missing character at position %d in sequence '", nsite + 1); - fputid(STDOUT, notu); - FPRINTF(STDOUTFILE "')\n\n\n"); - exit(1); - } - } while (c == ' ' || (iscntrl((int) c) && c != '\n')); - return c; -} - -/* skip rest of the line */ -void skiprestofline(FILE* ifp, int notu, int nsite) -{ - int ci; - - /* read chars until the first newline */ - do{ - ci = fgetc(ifp); - if (ci == EOF) { - FPRINTF(STDOUTFILE "Unable to proceed (missing newline at position %d in sequence '", nsite + 1); - fputid(STDOUT, notu); - FPRINTF(STDOUTFILE "')\n\n\n"); - exit(1); - } - } while ((char) ci != '\n'); -} - -/* skip control characters and blanks */ -void skipcntrl(FILE *ifp, int notu, int nsite) -{ - int ci; - - /* read over all control characters and blanks */ - do { - ci = fgetc(ifp); - if (ci == EOF) { - FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (missing character at position %d in sequence '", nsite + 1); - fputid(STDOUT, notu); - FPRINTF(STDOUTFILE "')\n\n\n"); - exit(1); - } - } while (iscntrl(ci) || (char) ci == ' '); - /* go one character back */ - if (ungetc(ci, ifp) == EOF) { - FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (positioning error at position %d in sequence '", nsite + 1); - fputid(STDOUT, notu); - FPRINTF(STDOUTFILE "')\n\n\n"); - exit(1); - } -} - -/* read sequences of one data set */ -void getseqs(FILE *ifp) -{ - int notu, nsite, endofline, linelength, i; - char c; - - seqchars = new_cmatrix(Maxspc, Maxseqc); - /* read all characters */ - nsite = 0; /* next site to be read */ - while (nsite < Maxseqc) { - /* read first taxon */ - notu = 0; - /* go to next true line */ - skiprestofline(ifp, notu, nsite); - skipcntrl(ifp, notu, nsite); - if (nsite == 0) readid(ifp, notu); - endofline = FALSE; - linelength = 0; - do { - c = readnextcharacter(ifp, notu, nsite + linelength); - if (c == '\n') endofline = TRUE; - else if (c == '.') { - FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (invalid character '.' at position "); - FPRINTF(STDOUTFILE "%d in first sequence)\n\n\n", nsite + linelength + 1); - exit(1); - } else if (nsite + linelength < Maxseqc) { - /* change to upper case */ - seqchars[notu][nsite + linelength] = (char) toupper((int) c); - linelength++; - } else { - endofline = TRUE; - skiprestofline(ifp, notu, nsite + linelength); - } - } while (!endofline); - if (linelength == 0) { - FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (line with length 0 at position %d in sequence '", nsite + 1); - fputid(STDOUT, notu); - FPRINTF(STDOUTFILE "')\n\n\n"); - exit(1); - } - /* read other taxa */ - for (notu = 1; notu < Maxspc; notu++) { - /* go to next true line */ - if (notu != 1) skiprestofline(ifp, notu, nsite); - skipcntrl(ifp, notu, nsite); - if (nsite == 0) readid(ifp, notu); - for (i = nsite; i < nsite + linelength; i++) { - c = readnextcharacter(ifp, notu, i); - if (c == '\n') { /* too short */ - FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (line to short at position %d in sequence '", i + 1); - fputid(STDOUT, notu); - FPRINTF(STDOUTFILE "')\n\n\n"); - exit(1); - } else if (c == '.') { - seqchars[notu][i] = seqchars[0][i]; - } else { - /* change to upper case */ - seqchars[notu][i] = (char) toupper((int) c); - } - } - } - nsite = nsite + linelength; - } -} - -/* initialize identifer array */ -void initid(int t) -{ - int i, j; - - Identif = new_cmatrix(t, 26); /*CZ*/ - for (i = 0; i < t; i++) - for (j = 0; j < 26; j++) /*CZ*/ - Identif[i][j] = ' '; -} - -/* print identifier of specified taxon in full 10 char length */ -void fputid10(FILE *ofp, int t) -{ - int i; - - for (i = 0; i < 26; i++) fputc(Identif[t][i], ofp); /*CZ*/ -} - -/* print identifier of specified taxon up to first space */ -int fputid(FILE *ofp, int t) -{ - int i; - - i = 0; - while (Identif[t][i] != ' ' && i < 26) { /*CZ*/ - fputc(Identif[t][i], ofp); - i++; - } - return i; -} - -/* read first line of sequence data set */ -void getsizesites(FILE *ifp) -{ - if (fscanf(ifp, "%d", &Maxspc) != 1) { - FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (missing number of sequences)\n\n\n"); - exit(1); - } - if (fscanf(ifp, "%d", &Maxseqc) != 1) { - FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (missing number of sites)\n\n\n"); - exit(1); - } - - if (Maxspc < 4) { - FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (less than 4 sequences)\n\n\n"); - exit(1); - } - if (Maxspc > 8000) { /*CZ*/ - FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (more than 8000 sequences)\n\n\n"); - exit(1); - } - if (Maxseqc < 1) { - FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (no sequence sites)\n\n\n"); - exit(1); - } - Maxbrnch = 2*Maxspc - 3; -} - -/* read one data set - PHYLIP interleaved */ -void getdataset(FILE *ifp) -{ - initid(Maxspc); - getseqs(ifp); -} - -/* guess data type */ -int guessdatatype() -{ - uli numnucs, numchars, numbins; - int notu, nsite; - char c; - - /* count A, C, G, T, U, N */ - numnucs = 0; - numchars = 0; - numbins = 0; - for (notu = 0; notu < Maxspc; notu++) - for (nsite = 0; nsite < Maxseqc; nsite++) { - c = seqchars[notu][nsite]; - if (c == 'A' || c == 'C' || c == 'G' || - c == 'T' || c == 'U' || c == 'N') numnucs++; - if (c != '-' && c != '?') numchars++; - if (c == '0' || c == '1') numbins++; - } - if (numchars == 0) numchars = 1; - /* more than 85 % frequency means nucleotide data */ - if ((double) numnucs / (double) numchars > 0.85) return 0; - else if ((double) numbins / (double) numchars > 0.2) return 2; - else return 1; -} - -/* translate characters into format used by ML engine */ -void translatedataset() -{ - int notu, sn, co; - char c; - cvector code; - - - /* determine Maxsite - number of ML sites per taxon */ - if (data_optn == 0 && SH_optn) { - if (SHcodon) - Maxsite = Maxseqc / 3; - else - Maxsite = Maxseqc / 2; /* assume doublets */ - - } else - Maxsite = Maxseqc; - if (data_optn == 0 && (Maxsite % 3) == 0 && !SH_optn) { - if (codon_optn == 1 || codon_optn == 2 || codon_optn == 3) - Maxsite = Maxsite / 3; /* only one of the three codon positions */ - if (codon_optn == 4) - Maxsite = 2*(Maxsite / 3); /* 1st + 2nd codon positions */ - } - - /* reserve memory */ - if (Seqchar != NULL) free_cmatrix(Seqchar); - Seqchar = new_cmatrix(Maxspc, Maxsite); - - /* code length */ - if (data_optn == 0 && SH_optn) - code = new_cvector(2); - else - code = new_cvector(1); - - /* decode characters */ - if (data_optn == 0 && SH_optn) { /* SH doublets */ - - for (notu = 0; notu < Maxspc; notu++) { - for (sn = 0; sn < Maxsite; sn++) { - for (co = 0; co < 2; co++) { - if (SHcodon) - c = seqchars[notu][sn*3 + co]; - else - c = seqchars[notu][sn*2 + co]; - code[co] = c; - } - Seqchar[notu][sn] = code2int(code); - } - } - - } else if (!(data_optn == 0 && (Maxseqc % 3) == 0)) { /* use all */ - - for (notu = 0; notu < Maxspc; notu++) { - for (sn = 0; sn < Maxsite; sn++) { - code[0] = seqchars[notu][sn]; - Seqchar[notu][sn] = code2int(code); - } - } - - } else { /* codons */ - - for (notu = 0; notu < Maxspc; notu++) { - for (sn = 0; sn < Maxsite; sn++) { - if (codon_optn == 1 || codon_optn == 2 || codon_optn == 3) - code[0] = seqchars[notu][sn*3+codon_optn-1]; - else if (codon_optn == 4) { - if ((sn % 2) == 0) - code[0] = seqchars[notu][(sn/2)*3]; - else - code[0] = seqchars[notu][((sn-1)/2)*3+1]; - } else - code[0] = seqchars[notu][sn]; - Seqchar[notu][sn] = code2int(code); - } - } - - } - free_cvector(code); -} - -/* estimate mean base frequencies from translated data set */ -void estimatebasefreqs() -{ - int tpmradix, i, j; - uli all, *gene; - - tpmradix = gettpmradix(); - - if (Freqtpm != NULL) free_dvector(Freqtpm); - Freqtpm = new_dvector(tpmradix); - - if (Basecomp != NULL) free_imatrix(Basecomp); - Basecomp = new_imatrix(Maxspc, tpmradix); - - gene = (uli *) malloc((unsigned) ((tpmradix + 1) * sizeof(uli))); - if (gene == NULL) maerror("gene in estimatebasefreqs"); - - for (i = 0; i < tpmradix + 1; i++) gene[i] = 0; - for (i = 0; i < Maxspc; i++) - for (j = 0; j < tpmradix; j++) Basecomp[i][j] = 0; - for (i = 0; i < Maxspc; i++) - for (j = 0; j < Maxsite; j++) { - gene[(int) Seqchar[i][j]]++; - if (Seqchar[i][j] != tpmradix) Basecomp[i][(int) Seqchar[i][j]]++; - } - - all = Maxspc * Maxsite - gene[tpmradix]; - if (all != 0) { /* normal case */ - for (i = 0; i < tpmradix; i++) - Freqtpm[i] = (double) gene[i] / (double) all; - } else { /* pathological case with no unique character in data set */ - for (i = 0; i < tpmradix; i++) - Freqtpm[i] = 1.0 / (double) tpmradix; - } - - free(gene); - - Frequ_optn = TRUE; -} - -/* guess model of substitution */ -void guessmodel() -{ - double c1, c2, c3, c4, c5, c6; - dvector f; - dmatrix a; - int i; - - Dayhf_optn = FALSE; - Jtt_optn = TRUE; - mtrev_optn = FALSE; - cprev_optn = FALSE; - blosum62_optn = FALSE; - vtmv_optn = FALSE; - wag_optn = FALSE; - TSparam = 2.0; - YRparam = 1.0; - optim_optn = TRUE; - HKY_optn = TRUE; - TN_optn = FALSE; - - if (data_optn == 1) { /* amino acids */ - - /* chi2 fit to amino acid frequencies */ - - f = new_dvector(20); - a = new_dmatrix(20,20); - /* chi2 distance Dayhoff */ - dyhfdata(a, f); - c1 = 0; - for (i = 0; i < 20; i++) - c1 = c1 + (Freqtpm[i]-f[i])*(Freqtpm[i]-f[i]); - /* chi2 distance JTT */ - jttdata(a, f); - c2 = 0; - for (i = 0; i < 20; i++) - c2 = c2 + (Freqtpm[i]-f[i])*(Freqtpm[i]-f[i]); - /* chi2 distance mtREV */ - mtrevdata(a, f); - c3 = 0; - for (i = 0; i < 20; i++) - c3 = c3 + (Freqtpm[i]-f[i])*(Freqtpm[i]-f[i]); - /* chi2 distance VT */ - vtmvdata(a, f); - c4 = 0; - for (i = 0; i < 20; i++) - c4 = c4 + (Freqtpm[i]-f[i])*(Freqtpm[i]-f[i]); - /* chi2 distance WAG */ - wagdata(a, f); - c5 = 0; - for (i = 0; i < 20; i++) - c5 = c5 + (Freqtpm[i]-f[i])*(Freqtpm[i]-f[i]); - /* chi2 distance cpREV */ - cprev45data(a, f); - c6 = 0; - for (i = 0; i < 20; i++) - c6 = c6 + (Freqtpm[i]-f[i])*(Freqtpm[i]-f[i]); - - free_dvector(f); - free_dmatrix(a); - -#ifndef CPREV - if ((c1 < c2) && (c1 < c3) && (c1 < c4) && (c1 < c5)) { - /* c1 -> Dayhoff */ - Dayhf_optn = TRUE; - Jtt_optn = FALSE; - mtrev_optn = FALSE; - cprev_optn = FALSE; - vtmv_optn = FALSE; - wag_optn = FALSE; - FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on nuclear DNA)\n"); - } else { - if ((c2 < c3) && (c2 < c4) && (c2 < c5)) { - /* c2 -> JTT */ - Dayhf_optn = FALSE; - Jtt_optn = TRUE; - mtrev_optn = FALSE; - cprev_optn = FALSE; - vtmv_optn = FALSE; - wag_optn = FALSE; - FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on nuclear DNA)\n"); - } else { - if ((c3 < c4) && (c3 < c5)) { - /* c3 -> mtREV */ - Dayhf_optn = FALSE; - Jtt_optn = FALSE; - mtrev_optn = TRUE; - cprev_optn = FALSE; - vtmv_optn = FALSE; - wag_optn = FALSE; - FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on mtDNA)\n"); - } else { - if ((c4 < c5)) { - /* c4 -> VT */ - Dayhf_optn = FALSE; - Jtt_optn = FALSE; - mtrev_optn = FALSE; - cprev_optn = FALSE; - vtmv_optn = TRUE; - wag_optn = FALSE; - FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on nuclear DNA)\n"); - } else { - /* c5 -> WAG */ - Dayhf_optn = FALSE; - Jtt_optn = FALSE; - mtrev_optn = FALSE; - cprev_optn = FALSE; - vtmv_optn = FALSE; - wag_optn = TRUE; - FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on nuclear DNA)\n"); - } /* if c4 else c5 */ - } /* if c3 else c4 */ - } /* if c2 */ - } /* if c1 */ - -#else /* CPREV */ - - if ((c1 < c2) && (c1 < c3) && (c1 < c4) && (c1 < c5) && (c1 < c6)) { - /* c1 -> Dayhoff */ - Dayhf_optn = TRUE; - Jtt_optn = FALSE; - mtrev_optn = FALSE; - cprev_optn = FALSE; - vtmv_optn = FALSE; - wag_optn = FALSE; - FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on nuclear DNA)\n"); - } else { - if ((c2 < c3) && (c2 < c4) && (c2 < c5) && (c2 < c6)) { - /* c2 -> JTT */ - Dayhf_optn = FALSE; - Jtt_optn = TRUE; - mtrev_optn = FALSE; - cprev_optn = FALSE; - vtmv_optn = FALSE; - wag_optn = FALSE; - FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on nuclear DNA)\n"); - } else { - if ((c3 < c4) && (c3 < c5) && (c3 < c6)) { - /* c3 -> mtREV */ - Dayhf_optn = FALSE; - Jtt_optn = FALSE; - mtrev_optn = TRUE; - cprev_optn = FALSE; - vtmv_optn = FALSE; - wag_optn = FALSE; - FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on mtDNA)\n"); - } else { - if ((c4 < c5) && (c4 < c6)) { - /* c4 -> VT */ - Dayhf_optn = FALSE; - Jtt_optn = FALSE; - mtrev_optn = FALSE; - cprev_optn = FALSE; - vtmv_optn = TRUE; - wag_optn = FALSE; - FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on nuclear DNA)\n"); - } else { - if (c5 < c6) { - /* c5 -> WAG */ - Dayhf_optn = FALSE; - Jtt_optn = FALSE; - mtrev_optn = FALSE; - cprev_optn = FALSE; - vtmv_optn = FALSE; - wag_optn = TRUE; - FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on nuclear DNA)\n"); - } else { - /* if (c6) */ - /* c6 -> cpREV */ - Dayhf_optn = FALSE; - Jtt_optn = FALSE; - mtrev_optn = FALSE; - cprev_optn = TRUE; - vtmv_optn = FALSE; - wag_optn = FALSE; - FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on cpDNA)\n"); - } /* if c5 else c6 */ - } /* if c4 else c5 */ - } /* if c3 else c4 */ - } /* if c2 */ - } /* if c1 */ -#endif /* CPREV */ - - } else if (data_optn == 0) { - FPRINTF(STDOUTFILE "(consists very likely of nucleotides)\n"); - } else { - FPRINTF(STDOUTFILE "(consists very likely of binary state data)\n"); - } -} /* guessmodel */ - - -/******************************************************************************/ -/* functions for representing and building puzzling step trees */ -/******************************************************************************/ - -/* initialize tree with the following starting configuration - - 2 - 0 +------- C(=2) - A(=0) -----+ - +------- B(=1) - 1 - */ -void inittree() -{ - int i; - - /* allocate the memory for the whole tree */ - - /* allocate memory for vector with all the edges of the tree */ - edge = (ONEEDGE *) calloc(Maxbrnch, sizeof(ONEEDGE) ); - if (edge == NULL) maerror("edge in inittree"); - - /* allocate memory for vector with edge numbers of leaves */ - edgeofleaf = (int *) calloc(Maxspc, sizeof(int) ); - if (edgeofleaf == NULL) maerror("edgeofleaf in inittree"); - - /* allocate memory for all the edges the edge map */ - for (i = 0; i < Maxbrnch; i++) { - edge[i].edgemap = (int *) calloc(Maxbrnch, sizeof(int) ); - if (edge[i].edgemap == NULL) maerror("edgemap in inittree"); - } - - /* number all edges */ - for (i = 0; i < Maxbrnch; i++) edge[i].numedge = i; - - /* initialize tree */ - - nextedge = 3; - nextleaf = 3; - - /* edge maps */ - (edge[0].edgemap)[0] = 0; /* you are on the right edge */ - (edge[0].edgemap)[1] = 4; /* go down left for leaf 1 */ - (edge[0].edgemap)[2] = 5; /* go down right for leaf 2 */ - (edge[1].edgemap)[0] = 1; /* go up for leaf 0 */ - (edge[1].edgemap)[1] = 0; /* you are on the right edge */ - (edge[1].edgemap)[2] = 3; /* go up/down right for leaf 2 */ - (edge[2].edgemap)[0] = 1; /* go up for leaf 0 */ - (edge[2].edgemap)[1] = 2; /* go up/down left for leaf 1 */ - (edge[2].edgemap)[2] = 0; /* you are on the right edge */ - - /* interconnection */ - edge[0].up = NULL; - edge[0].downleft = &edge[1]; - edge[0].downright = &edge[2]; - edge[1].up = &edge[0]; - edge[1].downleft = NULL; - edge[1].downright = NULL; - edge[2].up = &edge[0]; - edge[2].downleft = NULL; - edge[2].downright = NULL; - - /* edges of leaves */ - edgeofleaf[0] = 0; - edgeofleaf[1] = 1; - edgeofleaf[2] = 2; -} /* inittree */ - -/* add next leaf on the specified edge */ -void addnextleaf(int dockedge) -{ - int i; - - if (dockedge >= nextedge) { - /* Trying to add leaf nextleaf to nonexisting edge dockedge */ - FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR F TO DEVELOPERS\n\n\n"); - exit(1); - } - - if (nextleaf >= Maxspc) { - /* Trying to add leaf nextleaf to a tree with Maxspc leaves */ - FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR G TO DEVELOPERS\n\n\n"); - exit(1); - } - - /* necessary change in edgeofleaf if dockedge == edgeofleaf[0] */ - if (edgeofleaf[0] == dockedge) edgeofleaf[0] = nextedge; - - /* adding nextedge to the tree */ - edge[nextedge].up = edge[dockedge].up; - edge[nextedge].downleft = &edge[dockedge]; - edge[nextedge].downright = &edge[nextedge+1]; - edge[dockedge].up = &edge[nextedge]; - - if (edge[nextedge].up != NULL) { - if ( ((edge[nextedge].up)->downleft) == &edge[dockedge] ) - (edge[nextedge].up)->downleft = &edge[nextedge]; - else - (edge[nextedge].up)->downright = &edge[nextedge]; - } - - /* adding nextedge + 1 to the tree */ - edge[nextedge+1].up = &edge[nextedge]; - edge[nextedge+1].downleft = NULL; - edge[nextedge+1].downright = NULL; - edgeofleaf[nextleaf] = nextedge+1; - - /* the two new edges get info about the old edges */ - /* nextedge */ - for (i = 0; i < nextedge; i++) { - switch ( (edge[dockedge].edgemap)[i] ) { - - /* down right changes to down left */ - case 5: (edge[nextedge].edgemap)[i] = 4; - break; - - /* null changes to down left */ - case 0: (edge[nextedge].edgemap)[i] = 4; - break; - - default: (edge[nextedge].edgemap)[i] = - (edge[dockedge].edgemap)[i]; - break; - } - } - - /* nextedge + 1 */ - for (i = 0; i < nextedge; i++) { - switch ( (edge[dockedge].edgemap)[i] ) { - - /* up/down left changes to up */ - case 2: (edge[nextedge+1].edgemap)[i] = 1; - break; - - /* up/down right changes to up */ - case 3: (edge[nextedge+1].edgemap)[i] = 1; - break; - - /* down left changes to up/down left */ - case 4: (edge[nextedge+1].edgemap)[i] = 2; - break; - - /* down right changes to up/down left */ - case 5: (edge[nextedge+1].edgemap)[i] = 2; - break; - - /* null changes to up/down left */ - case 0: (edge[nextedge+1].edgemap)[i] = 2; - break; - - /* up stays up */ - default: (edge[nextedge+1].edgemap)[i] = - (edge[dockedge].edgemap)[i]; - break; - } - } - - /* dockedge */ - for (i = 0; i < nextedge; i++) { - switch ( (edge[dockedge].edgemap)[i] ) { - - /* up/down right changes to up */ - case 3: (edge[dockedge].edgemap)[i] = 1; - break; - - /* up/down left changes to up */ - case 2: (edge[dockedge].edgemap)[i] = 1; - break; - - default: break; - } - } - - /* all edgemaps are updated for the two new edges */ - /* nextedge */ - (edge[nextedge].edgemap)[nextedge] = 0; - (edge[nextedge].edgemap)[nextedge+1] = 5; /* down right */ - - /* nextedge + 1 */ - (edge[nextedge+1].edgemap)[nextedge] = 1; /* up */ - (edge[nextedge+1].edgemap)[nextedge+1] = 0; - - /* all other edges */ - for (i = 0; i < nextedge; i++) { - (edge[i].edgemap)[nextedge] = (edge[i].edgemap)[dockedge]; - (edge[i].edgemap)[nextedge+1] = (edge[i].edgemap)[dockedge]; - } - - /* an extra for dockedge */ - (edge[dockedge].edgemap)[nextedge] = 1; /* up */ - (edge[dockedge].edgemap)[nextedge+1] = 3; /* up/down right */ - - nextleaf++; - nextedge = nextedge + 2; -} /* addnextleaf */ - - -/* free memory (to be called after inittree) */ -void freetree() -{ - int i; - - for (i = 0; i < 2 * Maxspc - 3; i++) free(edge[i].edgemap); - free(edge); - free(edgeofleaf); -} /* freetree */ - -/* writes OTU sitting on edge ed */ -void writeOTU(FILE *outfp, int ed) -{ - int i; - - /* test whether we are on a leaf */ - if (edge[ed].downright == NULL && edge[ed].downleft == NULL) { - for (i = 1; i < nextleaf; i++) { - if (edgeofleaf[i] == ed) { /* i is the leaf of ed */ - column += fputid(outfp, trueID[i]); - return; - } - } - } - - /* we are NOT on a leaf */ - fprintf(outfp, "("); - column++; - writeOTU(outfp, edge[ed].downleft->numedge); - fprintf(outfp, ","); - column++; - column++; - if (column > 55) { - column = 2; - fprintf(outfp, "\n "); - } - writeOTU(outfp, edge[ed].downright->numedge); - fprintf(outfp, ")"); - column++; -} /* writeOTU */ - -/* write tree */ -void writetree(FILE *outfp) -{ - column = 1; - fprintf(outfp, "("); - column += fputid(outfp, trueID[0]) + 3; - fprintf(outfp, ","); - writeOTU(outfp, edge[edgeofleaf[0]].downleft->numedge); - column++; - column++; - fprintf(outfp, ","); - writeOTU(outfp, edge[edgeofleaf[0]].downright->numedge); - fprintf(outfp, ");\n"); -} /* writetree */ - - -/* clear all edgeinfos */ -void resetedgeinfo() -{ - int i; - - for (i = 0; i < nextedge; i++) - edge[i].edgeinfo = 0; -} /* resetedgeinfo */ - -/* increment all edgeinfo between leaf A and B */ -void incrementedgeinfo(int A, int B) -{ - int curredge, finaledge, nextstep; - - if (A == B) return; - - finaledge = edgeofleaf[B]; - - curredge = edgeofleaf[A]; - edge[curredge].edgeinfo = edge[curredge].edgeinfo + 1; - - while (curredge != finaledge) { - nextstep = (edge[curredge].edgemap)[finaledge]; - switch (nextstep) { - - /* up */ - case 1: curredge = (edge[curredge].up)->numedge; - break; - - /* up/down left */ - case 2: curredge = ((edge[curredge].up)->downleft)->numedge; - break; - - /* up/down right */ - case 3: curredge = ((edge[curredge].up)->downright)->numedge; - break; - - /* down left */ - case 4: curredge = (edge[curredge].downleft)->numedge; - break; - - /* down right */ - case 5: curredge = (edge[curredge].downright)->numedge; - break; - - } - edge[curredge].edgeinfo = edge[curredge].edgeinfo + 1; - } -} /* incrementedgeinfo */ - -/* checks which edge has the lowest edgeinfo - if there are several edges with the same lowest edgeinfo, - one of them will be selected randomly */ -void minimumedgeinfo() -{ - int i, k, howmany, randomnum; - - howmany = 1; - minedge = 0; - mininfo = edge[0].edgeinfo; - for (i = 1; i < nextedge; i++) - if (edge[i].edgeinfo <= mininfo) { - if (edge[i].edgeinfo == mininfo) { - howmany++; - } else { - minedge = i; - mininfo = edge[i].edgeinfo; - howmany = 1; - } - } - - if (howmany > 1) { /* draw random edge */ - randomnum = randominteger(howmany) + 1; /* 1 to howmany */ - i = -1; - for (k = 0; k < randomnum; k++) { - do { - i++; - } while (edge[i].edgeinfo != mininfo); - minedge = i; - } - } -} /* minimumedgeinfo */ - - - - -/*******************************************/ -/* tree sorting */ -/*******************************************/ - -/* compute address of the 4 int (sort key) in the 4 int node */ -int ct_sortkeyaddr(int addr) -{ - int a, res; - a = addr % 4; - res = addr - a + 3; - return res; -} - - -/**********/ - -/* compute address of the next edge pointer in a 4 int node (0->1->2->0) */ -int ct_nextedgeaddr(int addr) -{ - int a, res; - a = addr % 4; - if ( a == 2 ) { res = addr - 2; } - else { res = addr + 1; } - return res; -} - - -/**********/ - -/* compute address of 1st edge of a 4 int node from node number */ -int ct_1stedge(int node) -{ - int res; - res = 4 * node; - return res; -} - - -/**********/ - -/* compute address of 2nd edge of a 4 int node from node number */ -int ct_2ndedge(int node) -{ - int res; - res = 4 * node +1; - return res; -} - - -/**********/ - -/* compute address of 3rd edge of a 4 int node from node number */ -int ct_3rdedge(int node) -{ - int res; - res = 4 * node +2; - return res; -} - - -/**********/ - -/* check whether node 'node' is a leaf (2nd/3rd edge pointer = -1) */ -int ct_isleaf(int node, int *ctree) -{ - return (ctree[ct_3rdedge(node)] < 0); -} - - -/**********/ - -/* compute node number of 4 int node from an edge addr. */ -int ct_addr2node(int addr) -{ - int a, res; - a = addr % 4; - res = (int) ((addr - a) / 4); - return res; -} - - -/**********/ - -/* print graph pointers for checking */ -void printctree(int *ctree) -{ - int n; - for (n=0; n < 2*Maxspc; n++) { - printf("n[%3d] = (%3d.%2d, %3d.%2d, %3d.%2d | %3d)\n", n, - (int) ctree[ct_1stedge(n)]/4, - (int) ctree[ct_1stedge(n)]%4, - (int) ctree[ct_2ndedge(n)]/4, - (int) ctree[ct_2ndedge(n)]%4, - (int) ctree[ct_3rdedge(n)]/4, - (int) ctree[ct_3rdedge(n)]%4, - ctree[ct_3rdedge(n)+1]); - } - printf("\n"); -} /* printctree */ - - -/**********/ - -/* allocate memory for ctree 3 ints pointer plus 1 check byte */ -int *initctree() -{ - int *snodes; - int n; - - snodes = (int *) malloc(4 * 2 * Maxspc * sizeof(int)); - if (snodes == NULL) maerror("snodes in copytree"); - - for (n=0; n<(4 * 2 * Maxspc); n++) { - snodes[n]=-1; - } - return snodes; -} - - -/**********/ - -/* free memory of a tree for sorting */ -void freectree(int **snodes) -{ - free(*snodes); - *snodes = NULL; -} - - -/**********/ - -/* copy subtree recursively */ -void copyOTU(int *ctree, /* tree array struct */ - int *ct_nextnode, /* next free node */ - int ct_curredge, /* currende edge to add subtree */ - int *ct_nextleaf, /* next free leaf (0-maxspc) */ - int ed) /* edge in puzzling step tree */ -{ - int i, nextcurredge; - - /* test whether we are on a leaf */ - if (edge[ed].downright == NULL && edge[ed].downleft == NULL) { - for (i = 1; i < nextleaf; i++) { - if (edgeofleaf[i] == ed) { /* i is the leaf of ed */ - nextcurredge = ct_1stedge(*ct_nextleaf); - ctree[ct_curredge] = nextcurredge; - ctree[nextcurredge] = ct_curredge; - ctree[ct_sortkeyaddr(nextcurredge)] = trueID[i]; - (*ct_nextleaf)++; - return; - } - } - } - - /* we are NOT on a leaf */ - nextcurredge = ct_1stedge(*ct_nextnode); - ctree[ct_curredge] = nextcurredge; - ctree[nextcurredge] = ct_curredge; - (*ct_nextnode)++; - nextcurredge = ct_nextedgeaddr(nextcurredge); - copyOTU(ctree, ct_nextnode, nextcurredge, - ct_nextleaf, edge[ed].downleft->numedge); - - nextcurredge = ct_nextedgeaddr(nextcurredge); - copyOTU(ctree, ct_nextnode, nextcurredge, - ct_nextleaf, edge[ed].downright->numedge); -} - - -/**********/ - -/* copy treestructure to sorting structure */ -void copytree(int *ctree) -{ - int ct_curredge; - int ct_nextleaf; - int ct_nextnode; - - ct_nextnode = Maxspc; - ct_curredge = ct_1stedge(ct_nextnode); - ct_nextleaf = 1; - - ctree[ct_1stedge(0)] = ct_curredge; - ctree[ct_curredge] = ct_1stedge(0); - ctree[ct_sortkeyaddr(0)] = trueID[0]; - - ct_nextnode++; - - ct_curredge = ct_nextedgeaddr(ct_curredge); - copyOTU(ctree, &ct_nextnode, ct_curredge, - &ct_nextleaf, edge[edgeofleaf[0]].downleft->numedge); - - ct_curredge = ct_nextedgeaddr(ct_curredge); - copyOTU(ctree, &ct_nextnode, ct_curredge, - &ct_nextleaf, edge[edgeofleaf[0]].downright->numedge); -} - - -/**********/ - -/* sort subtree from edge recursively by indices */ -int sortOTU(int edge, int *ctree) -{ - int key1, key2; - int edge1, edge2; - int tempedge; - - if (ctree[ct_2ndedge((int) (edge / 4))] < 0) - return ctree[ct_sortkeyaddr(edge)]; - - edge1 = ctree[ct_nextedgeaddr(edge)]; - edge2 = ctree[ct_nextedgeaddr(ct_nextedgeaddr(edge))]; - - /* printf ("visiting [%5d] -> [%5d], [%5d]\n", edge, edge1, edge2); */ - /* printf ("visiting [%2d.%2d] -> [%2d.%2d], [%2d.%2d]\n", - (int)(edge/4), edge%4, (int)(edge1/4), edge1%4, - (int)(edge2/4), edge2%4); */ - - key1 = sortOTU(edge1, ctree); - key2 = sortOTU(edge2, ctree); - - if (key2 < key1) { - tempedge = ctree[ctree[edge1]]; - ctree[ctree[edge1]] = ctree[ctree[edge2]]; - ctree[ctree[edge2]] = tempedge; - tempedge = ctree[edge1]; - ctree[edge1] = ctree[edge2]; - ctree[edge2] = tempedge; - ctree[ct_sortkeyaddr(edge)] = key2; - - } else { - ctree[ct_sortkeyaddr(edge)] = key1; - } - return ctree[ct_sortkeyaddr(edge)]; -} - - -/**********/ - -/* sort ctree recursively by indices */ -int sortctree(int *ctree) -{ - int n, startnode=-1; - for(n=0; n>>>\n"); - tmpptr = list; - *sortlist = list; - while (tmpptr != NULL) { - (*tmpptr).sortnext = (*tmpptr).succ; - (*tmpptr).sortlast = (*tmpptr).pred; - tmpptr = (*tmpptr).succ; - } - - while (xchange > 0) { - curr = *sortlist; - xchange = 0; - if (curr == NULL) fprintf(stderr, "Grrrrrrrrr>>>>\n"); - while((*curr).sortnext != NULL) { - next = (*curr).sortnext; - if ((*curr).count >= (*next).count) - curr = (*curr).sortnext; - else { - if ((*curr).sortlast != NULL) - (*((*curr).sortlast)).sortnext = next; - if (*sortlist == curr) - *sortlist = next; - (*next).sortlast = (*curr).sortlast; - - if ((*next).sortnext != NULL) - (*((*next).sortnext)).sortlast = curr; - (*curr).sortnext = (*next).sortnext; - - (*curr).sortlast = next; - (*next).sortnext = curr; - - xchange++; - } - } - } -} /* sortbynum */ - - -/**********/ - -/* print puzzling step tree stuctures for checking */ -void printfpstrees(treelistitemtype *list) -{ - char ch; - treelistitemtype *tmpptr = NULL; - tmpptr = list; - ch = '-'; - while (tmpptr != NULL) { - printf ("%c[%2d] %5d %s\n", ch, (*tmpptr).idx, (*tmpptr).count, (*tmpptr).tree); - tmpptr = (*tmpptr).succ; - ch = ' '; - } -} - -/**********/ - -/* print sorted puzzling step tree stucture with names */ -void fprintffullpstree(FILE *outf, char *treestr) -{ - int count = 0; - int idnum = 0; - int n; - for(n=0; treestr[n] != '\0'; n++){ - while(isdigit((int)treestr[n])){ - idnum = (10 * idnum) + ((int)treestr[n]-48); - n++; - count++; - } - if (count > 0){ -# ifdef USEQUOTES - fprintf(outf, "'"); -# endif - (void)fputid(outf, idnum); -# ifdef USEQUOTES - fprintf(outf, "'"); -# endif - count = 0; - idnum = 0; - } - fprintf(outf, "%c", treestr[n]); - } -} - - -/**********/ - -/* print sorted puzzling step tree stuctures with names */ -void fprintfsortedpstrees(FILE *output, - treelistitemtype *list, /* tree list */ - int itemnum, /* order number */ - int itemsum, /* number of trees */ - int comment, /* with statistics, or puzzle report ? */ - float cutoff) /* cutoff percentage */ -{ - treelistitemtype *tmpptr = NULL; - treelistitemtype *slist = NULL; - int num = 1; - float percent; - - if (list == NULL) fprintf(stderr, "Grrrrrrrrr>>>>\n"); - sortbynum(list, &slist); - - tmpptr = slist; - while (tmpptr != NULL) { - percent = (float)(100.0 * (*tmpptr).count / itemsum); - if ((cutoff == 0.0) || (cutoff <= percent)) { - if (comment) - fprintf (output, "[ %d. %d %.2f %d %d %d ]", num++, (*tmpptr).count, percent, (*tmpptr).id, itemnum, itemsum); - else { - if (num == 1){ - fprintf (output, "\n"); - fprintf (output, "The following tree(s) occured in more than %.2f%% of the %d puzzling steps.\n", cutoff, itemsum); - fprintf (output, "The trees are orderd descending by the number of occurences.\n"); - fprintf (output, "\n"); - fprintf (output, "\n occurences ID Phylip tree\n"); - } - fprintf (output, "%2d. %5d %6.2f%% %5d ", num++, (*tmpptr).count, percent, (*tmpptr).id); - } - fprintffullpstree(output, (*tmpptr).tree); - fprintf (output, "\n"); - } - tmpptr = (*tmpptr).sortnext; - } - - if (!comment) { - fprintf (output, "\n"); - switch(num) { - case 1: fprintf (output, "There were no tree topologies (out of %d) occuring with a percentage >= %.2f%% of the %d puzzling steps.\n", itemnum, cutoff, itemsum); break; - case 2: fprintf (output, "There was one tree topology (out of %d) occuring with a percentage >= %.2f%%.\n", itemnum, cutoff); break; - default: fprintf (output, "There were %d tree topologies (out of %d) occuring with a percentage >= %.2f%%.\n", num-1, itemnum, cutoff); break; - } - fprintf (output, "\n"); - fprintf (output, "\n"); - } - -} /* fprintfsortedpstrees */ - -/**********/ - -/* print sorted tree topologies for checking */ -void printfsortedpstrees(treelistitemtype *list) -{ - treelistitemtype *tmpptr = NULL; - treelistitemtype *slist = NULL; - - sortbynum(list, &slist); - - tmpptr = slist; - while (tmpptr != NULL) { - printf ("[%2d] %5d %s\n", (*tmpptr).idx, (*tmpptr).count, (*tmpptr).tree); - tmpptr = (*tmpptr).sortnext; - } -} /* printfsortedpstrees */ - - -/*******************************************/ -/* end of tree sorting */ -/*******************************************/ - - - -/******************************************************************************/ -/* functions for computing the consensus tree */ -/******************************************************************************/ - -/* prepare for consensus tree analysis */ -void initconsensus() -{ -# if ! PARALLEL - biparts = new_cmatrix(Maxspc-3, Maxspc); -# endif /* PARALLEL */ - - if (Maxspc % 32 == 0) - splitlength = Maxspc/32; - else splitlength = (Maxspc + 32 - (Maxspc % 32))/32; - numbiparts = 0; /* no pattern stored so far */ - maxbiparts = 0; /* no memory reserved so far */ - splitfreqs = NULL; - splitpatterns = NULL; - splitsizes = NULL; - splitcomp = (uli *) malloc(splitlength * sizeof(uli) ); - if (splitcomp == NULL) maerror("splitcomp in initconsensus"); -} - -/* prototype needed for recursive function */ -void makepart(int i, int curribrnch); - -/* recursive function to get bipartitions */ -void makepart(int i, int curribrnch) -{ - int j; - - if ( edge[i].downright == NULL || - edge[i].downleft == NULL) { /* if i is leaf */ - - /* check out what leaf j sits on this edge i */ - for (j = 1; j < Maxspc; j++) { - if (edgeofleaf[j] == i) { - biparts[curribrnch][trueID[j]] = '*'; - return; - } - } - } else { /* still on inner branch */ - makepart(edge[i].downleft->numedge, curribrnch); - makepart(edge[i].downright->numedge, curribrnch); - } -} - -/* compute bipartitions of tree of current puzzling step */ -void computebiparts() -{ - int i, j, curribrnch; - - curribrnch = -1; - - for (i = 0; i < Maxspc - 3; i++) - for (j = 0; j < Maxspc; j++) - biparts[i][j] = '.'; - - for (i = 0; i < Maxbrnch; i++) { - if (!( edgeofleaf[0] == i || - edge[i].downright == NULL || - edge[i].downleft == NULL) ) { /* check all inner branches */ - curribrnch++; - makepart(i, curribrnch); - - /* make sure that the root is always a '*' */ - if (biparts[curribrnch][outgroup] == '.') { - for (j = 0; j < Maxspc; j++) { - if (biparts[curribrnch][j] == '.') - biparts[curribrnch][j] = '*'; - else - biparts[curribrnch][j] = '.'; - } - } - } - } -} - -/* print out the bipartition n of all different splitpatterns */ -void printsplit(FILE *fp, uli n) -{ - int i, j, col; - uli z; - - col = 0; - for (i = 0; i < splitlength; i++) { - z = splitpatterns[n*splitlength + i]; - for (j = 0; j < 32 && col < Maxspc; j++) { - if (col % 10 == 0 && col != 0) fprintf(fp, " "); - if (z & 1) fprintf(fp, "."); - else fprintf(fp, "*"); - z = (z >> 1); - col++; - } - } -} - -/* make new entries for new different bipartitions and count frequencies */ -void makenewsplitentries() -{ - int i, j, bpc, identical, idflag, bpsize; - uli nextentry, obpc; - - /* where the next entry would be in splitpatterns */ - nextentry = numbiparts; - - for (bpc = 0; bpc < Maxspc - 3; bpc++) { /* for every new bipartition */ - /* convert bipartition into a more compact format */ - bpsize = 0; - for (i = 0; i < splitlength; i++) { - splitcomp[i] = 0; - for (j = 0; j < 32; j++) { - splitcomp[i] = splitcomp[i] >> 1; - if (i*32 + j < Maxspc) - if (biparts[bpc][i*32 + j] == '.') { - /* set highest bit */ - splitcomp[i] = (splitcomp[i] | 2147483648UL); - bpsize++; /* count the '.' */ - } - } - } - /* compare to the *old* patterns */ - identical = FALSE; - for (obpc = 0; (obpc < numbiparts) && (!identical); obpc++) { - /* compare first partition size */ - if (splitsizes[obpc] == bpsize) idflag = TRUE; - else idflag = FALSE; - /* if size is identical compare whole partition */ - for (i = 0; (i < splitlength) && idflag; i++) - if (splitcomp[i] != splitpatterns[obpc*splitlength + i]) - idflag = FALSE; - if (idflag) identical = TRUE; - } - if (identical) { /* if identical increase frequency */ - splitfreqs[2*(obpc-1)]++; - } else { /* create new entry */ - if (nextentry == maxbiparts) { /* reserve more memory */ - maxbiparts = maxbiparts + 2*Maxspc; - splitfreqs = (uli *) myrealloc(splitfreqs, - 2*maxbiparts * sizeof(uli) ); - /* 2x: splitfreqs contains also an index (sorting!) */ - if (splitfreqs == NULL) maerror("splitfreqs in makenewsplitentries"); - splitpatterns = (uli *) myrealloc(splitpatterns, - splitlength*maxbiparts * sizeof(uli) ); - if (splitpatterns == NULL) maerror("splitpatterns in makenewsplitentries"); - splitsizes = (int *) myrealloc(splitsizes, - maxbiparts * sizeof(int) ); - if (splitsizes == NULL) maerror("splitsizes in makenewsplitentries"); - } - splitfreqs[2*nextentry] = 1; /* frequency */ - splitfreqs[2*nextentry+1] = nextentry; /* index for sorting */ - for (i = 0; i < splitlength; i++) - splitpatterns[nextentry*splitlength + i] = splitcomp[i]; - splitsizes[nextentry] = bpsize; - nextentry++; - } - } - numbiparts = nextentry; -} - -/* general remarks: - - - every entry in consbiparts is one node of the consensus tree - - for each node one has to know which taxa and which other nodes - are *directly* descending from it - - for every taxon/node number there is a flag that shows - whether it descends from the node or not - - '0' means that neither a taxon nor another node with the - corresponding number decends from the node - '1' means that the corresponding taxon descends from the node - '2' means that the corresponding node descends from the node - '3' means that the corresponding taxon and node descends from the node -*/ - -/* copy bipartition n of all different splitpatterns to consbiparts[k] */ -void copysplit(uli n, int k) -{ - int i, j, col; - uli z; - - col = 0; - for (i = 0; i < splitlength; i++) { - z = splitpatterns[n*splitlength + i]; - for (j = 0; j < 32 && col < Maxspc; j++) { - if (z & 1) consbiparts[k][col] = '1'; - else consbiparts[k][col] = '0'; - z = (z >> 1); - col++; - } - } -} - -/* compute majority rule consensus tree */ -void makeconsensus() -{ - int i, j, k, size, subnode; - char chari, charj; - - /* sort bipartition frequencies */ - qsort(splitfreqs, numbiparts, 2*sizeof(uli), ulicmp); - /* how many bipartitions are included in the consensus tree */ - consincluded = 0; - for (i = 0; i < numbiparts && i == consincluded; i++) { - if (2*splitfreqs[2*i] > Numtrial) consincluded = i + 1; - } - - /* collect all info about majority rule consensus tree */ - /* the +1 is due to the edge with the root */ - consconfid = new_ivector(consincluded + 1); - conssizes = new_ivector(2*consincluded + 2); - consbiparts = new_cmatrix(consincluded + 1, Maxspc); - - for (i = 0; i < consincluded; i++) { - /* copy partition to consbiparts */ - copysplit(splitfreqs[2*i+1], i); - /* frequency in percent (rounded to integer) */ - consconfid[i] = (int) floor(100.0*splitfreqs[2*i]/Numtrial + 0.5); - /* size of partition */ - conssizes[2*i] = splitsizes[splitfreqs[2*i+1]]; - conssizes[2*i+1] = i; - } - for (i = 0; i < Maxspc; i++) consbiparts[consincluded][i] = '1'; - consbiparts[consincluded][outgroup] = '0'; - consconfid[consincluded] = 100; - conssizes[2*consincluded] = Maxspc - 1; - conssizes[2*consincluded + 1] = consincluded; - - /* sort bipartitions according to cluster size */ - qsort(conssizes, consincluded + 1, 2*sizeof(int), intcmp); - - /* reconstruct consensus tree */ - for (i = 0; i < consincluded; i++) { /* try every node */ - size = conssizes[2*i]; /* size of current node */ - for (j = i + 1; j < consincluded + 1; j++) { - - /* compare only with nodes with more descendants */ - if (size == conssizes[2*j]) continue; - - /* check whether node i is a subnode of j */ - subnode = FALSE; - for (k = 0; k < Maxspc && !subnode; k++) { - chari = consbiparts[ conssizes[2*i+1] ][k]; - if (chari != '0') { - charj = consbiparts[ conssizes[2*j+1] ][k]; - if (chari == charj || charj == '3') subnode = TRUE; - } - } - - /* if i is a subnode of j change j accordingly */ - if (subnode) { - /* remove subnode i from j */ - for (k = 0; k < Maxspc; k++) { - chari = consbiparts[ conssizes[2*i+1] ][k]; - if (chari != '0') { - charj = consbiparts[ conssizes[2*j+1] ][k]; - if (chari == charj) - consbiparts[ conssizes[2*j+1] ][k] = '0'; - else if (charj == '3') { - if (chari == '1') - consbiparts[ conssizes[2*j+1] ][k] = '2'; - else if (chari == '2') - consbiparts[ conssizes[2*j+1] ][k] = '1'; - else { - /* Consensus tree [1] */ - FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR H TO DEVELOPERS\n\n\n"); - exit(1); - } - } else { - /* Consensus tree [2] */ - FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR I TO DEVELOPERS\n\n\n"); - exit(1); - } - } - } - /* add link to subnode i in node j */ - charj = consbiparts[ conssizes[2*j+1] ][ conssizes[2*i+1] ]; - if (charj == '0') - consbiparts[ conssizes[2*j+1] ][ conssizes[2*i+1] ] = '2'; - else if (charj == '1') - consbiparts[ conssizes[2*j+1] ][ conssizes[2*i+1] ] = '3'; - else { - /* Consensus tree [3] */ - FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR J TO DEVELOPERS\n\n\n"); - exit(1); - } - } - } - } -} - -/* prototype for recursion */ -void writenode(FILE *treefile, int node); - -/* write node (writeconsensustree) */ -void writenode(FILE *treefile, int node) -{ - int i, first; - - fprintf(treefile, "("); - column++; - /* write descending nodes */ - first = TRUE; - for (i = 0; i < Maxspc; i++) { - if (consbiparts[node][i] == '2' || - consbiparts[node][i] == '3') { - if (first) first = FALSE; - else { - fprintf(treefile, ","); - column++; - } - if (column > 60) { - column = 2; - fprintf(treefile, "\n"); - } - /* write node i */ - writenode(treefile, i); - - /* reliability value as internal label */ - fprintf(treefile, "%d", consconfid[i]); - - column = column + 3; - } - } - /* write descending taxa */ - for (i = 0; i < Maxspc; i++) { - if (consbiparts[node][i] == '1' || - consbiparts[node][i] == '3') { - if (first) first = FALSE; - else { - fprintf(treefile, ","); - column++; - } - if (column > 60) { - column = 2; - fprintf(treefile, "\n"); - } - column += fputid(treefile, i); - } - } - fprintf(treefile, ")"); - column++; -} - -/* write consensus tree */ -void writeconsensustree(FILE *treefile) -{ - int i, first; - - column = 1; - fprintf(treefile, "("); - column += fputid(treefile, outgroup) + 2; - fprintf(treefile, ","); - /* write descending nodes */ - first = TRUE; - for (i = 0; i < Maxspc; i++) { - if (consbiparts[consincluded][i] == '2' || - consbiparts[consincluded][i] == '3') { - if (first) first = FALSE; - else { - fprintf(treefile, ","); - column++; - } - if (column > 60) { - column = 2; - fprintf(treefile, "\n"); - } - /* write node i */ - writenode(treefile, i); - - /* reliability value as internal label */ - fprintf(treefile, "%d", consconfid[i]); - - column = column + 3; - } - } - /* write descending taxa */ - for (i = 0; i < Maxspc; i++) { - if (consbiparts[consincluded][i] == '1' || - consbiparts[consincluded][i] == '3') { - if (first) first = FALSE; - else { - fprintf(treefile, ","); - column++; - } - if (column > 60) { - column = 2; - fprintf(treefile, "\n"); - } - column += fputid(treefile, i); - } - } - fprintf(treefile, ");\n"); -} - -/* prototype for recursion */ -void nodecoordinates(int node); - -/* establish node coordinates (plotconsensustree) */ -void nodecoordinates(int node) -{ - int i, ymin, ymax, xcoordinate; - - /* first establish coordinates of descending nodes */ - for (i = 0; i < Maxspc; i++) { - if (consbiparts[node][i] == '2' || - consbiparts[node][i] == '3') - nodecoordinates(i); - } - - /* then establish coordinates of descending taxa */ - for (i = 0; i < Maxspc; i++) { - if (consbiparts[node][i] == '1' || - consbiparts[node][i] == '3') { - /* y-coordinate of taxon i */ - ycortax[i] = ytaxcounter; - ytaxcounter = ytaxcounter - 2; - } - } - - /* then establish coordinates of this node */ - ymin = 2*Maxspc - 2; - ymax = 0; - xcoordinate = 0; - for (i = 0; i < Maxspc; i++) { - if (consbiparts[node][i] == '2' || - consbiparts[node][i] == '3') { - if (ycor[i] > ymax) ymax = ycor[i]; - if (ycor[i] < ymin) ymin = ycor[i]; - if (xcor[i] > xcoordinate) xcoordinate = xcor[i]; - } - } - for (i = 0; i < Maxspc; i++) { - if (consbiparts[node][i] == '1' || - consbiparts[node][i] == '3') { - if (ycortax[i] > ymax) ymax = ycortax[i]; - if (ycortax[i] < ymin) ymin = ycortax[i]; - } - } - ycormax[node] = ymax; - ycormin[node] = ymin; - ycor[node] = (int) floor(0.5*(ymax + ymin) + 0.5); - if (xcoordinate == 0) xcoordinate = 9; - xcor[node] = xcoordinate + 4; -} - -/* prototype for recursion */ -void drawnode(int node, int xold); - -/* drawnode (plotconsensustree) */ -void drawnode(int node, int xold) -{ - int i, j; - char buf[4]; - - /* first draw vertical line */ - for (i = ycormin[node] + 1; i < ycormax[node]; i++) - treepict[xcor[node]][i] = ':'; - - /* then draw descending nodes */ - for (i = 0; i < Maxspc; i++) { - if (consbiparts[node][i] == '2' || - consbiparts[node][i] == '3') - drawnode(i, xcor[node]); - } - - /* then draw descending taxa */ - for (i = 0; i < Maxspc; i++) { - if (consbiparts[node][i] == '1' || - consbiparts[node][i] == '3') { - treepict[xcor[node]][ycortax[i]] = ':'; - for (j = xcor[node] + 1; j < xsize-10; j++) - treepict[j][ycortax[i]] = '-'; - for (j = 0; j < 10; j++) - treepict[xsize-10+j][ycortax[i]] = Identif[i][j]; - } - } - - /* then draw internal edge with consensus value */ - treepict[xold][ycor[node]] = ':'; - treepict[xcor[node]][ycor[node]] = ':'; - for (i = xold + 1; i < xcor[node]-3; i++) - treepict[i][ycor[node]] = '-'; - sprintf(buf, "%d", consconfid[node]); - if (consconfid[node] == 100) { - treepict[xcor[node]-3][ycor[node]] = buf[0]; - treepict[xcor[node]-2][ycor[node]] = buf[1]; - treepict[xcor[node]-1][ycor[node]] = buf[2]; - } else { - treepict[xcor[node]-3][ycor[node]] = '-'; - treepict[xcor[node]-2][ycor[node]] = buf[0]; - treepict[xcor[node]-1][ycor[node]] = buf[1]; - } -} - -/* plot consensus tree */ -void plotconsensustree(FILE *plotfp) -{ - int i, j, yroot, startree; - - /* star tree or no star tree */ - if (consincluded == 0) { - startree = TRUE; - consincluded = 1; /* avoids problems with malloc */ - } else - startree = FALSE; - - /* memory for x-y-coordinates of each bipartition */ - xcor = new_ivector(consincluded); - ycor = new_ivector(consincluded); - ycormax = new_ivector(consincluded); - ycormin = new_ivector(consincluded); - if (startree) consincluded = 0; /* avoids problems with malloc */ - - /* y-coordinates of each taxon */ - ycortax = new_ivector(Maxspc); - ycortax[outgroup] = 0; - - /* establish coordinates */ - ytaxcounter = 2*Maxspc - 2; - - /* first establish coordinates of descending nodes */ - for (i = 0; i < Maxspc; i++) { - if (consbiparts[consincluded][i] == '2' || - consbiparts[consincluded][i] == '3') - nodecoordinates(i); - } - - /* then establish coordinates of descending taxa */ - for (i = 0; i < Maxspc; i++) { - if (consbiparts[consincluded][i] == '1' || - consbiparts[consincluded][i] == '3') { - /* y-coordinate of taxon i */ - ycortax[i] = ytaxcounter; - ytaxcounter = ytaxcounter - 2; - } - } - - /* then establish length of root edge and size of whole tree */ - yroot = 0; - xsize = 0; - for (i = 0; i < Maxspc; i++) { - if (consbiparts[consincluded][i] == '2' || - consbiparts[consincluded][i] == '3') { - if (ycor[i] > yroot) yroot = ycor[i]; - if (xcor[i] > xsize) xsize = xcor[i]; - } - } - for (i = 0; i < Maxspc; i++) { - if (consbiparts[consincluded][i] == '1' || - consbiparts[consincluded][i] == '3') { - if (ycortax[i] > yroot) yroot = ycortax[i]; - } - } - if (xsize == 0) xsize = 9; - /* size in x direction inclusive one blank on the left */ - xsize = xsize + 6; - - /* change all x-labels so that (0,0) is down-left */ - for (i = 0; i < consincluded; i++) - xcor[i] = xsize-1-xcor[i]; - - /* draw tree */ - treepict = new_cmatrix(xsize, 2*Maxspc-1); - for (i = 0; i < xsize; i++) - for (j = 0; j < 2*Maxspc-1; j++) - treepict[i][j] = ' '; - - /* draw root */ - for (i = 1; i < yroot; i++) - treepict[1][i] = ':'; - treepict[1][0] = ':'; - for (i = 2; i < xsize - 10; i++) - treepict[i][0] = '-'; - for (i = 0; i < 10; i++) - treepict[xsize-10+i][0] = Identif[outgroup][i]; - - /* then draw descending nodes */ - for (i = 0; i < Maxspc; i++) { - if (consbiparts[consincluded][i] == '2' || - consbiparts[consincluded][i] == '3') - drawnode(i, 1); - } - - /* then draw descending taxa */ - for (i = 0; i < Maxspc; i++) { - if (consbiparts[consincluded][i] == '1' || - consbiparts[consincluded][i] == '3') { - treepict[1][ycortax[i]] = ':'; - for (j = 2; j < xsize-10; j++) - treepict[j][ycortax[i]] = '-'; - for (j = 0; j < 10; j++) - treepict[xsize-10+j][ycortax[i]] = Identif[i][j]; - } - } - - /* plot tree */ - for (i = 2*Maxspc-2; i > -1; i--) { - for (j = 0; j < xsize; j++) - fputc(treepict[j][i], plotfp); - fputc('\n', plotfp); - } - - free_ivector(xcor); - free_ivector(ycor); - free_ivector(ycormax); - free_ivector(ycormin); - free_ivector(ycortax); - free_cmatrix(treepict); -} - - - -/******************************************************************************/ -/* storing and evaluating quartet branching information */ -/******************************************************************************/ - -/* general remarks: - - for a quartet with the taxa a, b, c, d there are - three possible binary trees: - - 1) (a,b)-(c,d) - 2) (a,c)-(b,d) - 3) (a,d)-(b,c) - - For every quartet information about its branching structure is - stored. With the functions readquartet and writequartet - this information can be accessed. For every quartet (a,b,c,d) - with a < b < c < d (taxa) the branching information is encoded - using 4 bits: - - value 8 4 2 1 - +-------------+-------------+-------------+-------------+ - | not used | tree 3 | tree 2 | tree 1 | - +-------------+-------------+-------------+-------------+ - - If the branching structure of the taxa corresponds to one of the - three trees the corresponding bit is set. If the branching structure - is unclear because two of the three trees have the same maximum - likelihood value the corresponding two bits are set. If the branching - structure is completely unknown all the bits are set (the highest - bit is always cleared because it is not used). - -*/ - -/* allocate memory for quartets */ -unsigned char *mallocquartets(int taxa) -{ - uli nc, numch; - unsigned char *qinfo; - - /* compute number of quartets */ - Numquartets = (uli) taxa*(taxa-1)*(taxa-2)*(taxa-3)/24; - if (Numquartets % 2 == 0) { /* even number */ - numch = Numquartets/2; - } else { /* odd number */ - numch = (Numquartets + 1)/2; - } - /* allocate memory */ - qinfo = (unsigned char *) malloc(numch * sizeof(unsigned char) ); - if (qinfo == NULL) maerror("quartetinfo in mallocquartets"); - for (nc = 0; nc < numch; nc++) qinfo[nc] = 0; - return(qinfo); -} - -/* free quartet memory */ -void freequartets() -{ - free(quartetinfo); -} - -/* read quartet info - a < b < c < d */ -unsigned char readquartet(int a, int b, int c, int d) -{ - uli qnum; - - qnum = (uli) a - + (uli) b*(b-1)/2 - + (uli) c*(c-1)*(c-2)/6 - + (uli) d*(d-1)*(d-2)*(d-3)/24; - if (qnum % 2 == 0) { /* even number */ - /* bits 0 to 3 */ - return (quartetinfo[qnum/2] & (unsigned char) 15); - } else { /* odd number */ - /* bits 4 to 7 */ - return ((quartetinfo[(qnum-1)/2] & (unsigned char) 240)>>4); - } -} - -/* write quartet info - a < b < c < d, 0 <= info <= 15 */ -void writequartet(int a, int b, int c, int d, unsigned char info) -{ - uli qnum; - - qnum = (uli) a - + (uli) b*(b-1)/2 - + (uli) c*(c-1)*(c-2)/6 - + (uli) d*(d-1)*(d-2)*(d-3)/24; - if (qnum % 2 == 0) { /* even number */ - /* bits 0 to 3 */ - quartetinfo[qnum/2] = - ((quartetinfo[qnum/2] & (unsigned char) 240) | - (info & (unsigned char) 15)); - } else { /* odd number */ - /* bits 4 to 7 */ - quartetinfo[(qnum-1)/2] = - ((quartetinfo[(qnum-1)/2] & (unsigned char) 15) | - ((info & (unsigned char) 15)<<4)); - } -} - -/* prototypes */ -void openfiletowrite(FILE **, char[], char[]); -void closefile(FILE *); - -/* sorts three doubles in descending order */ -void sort3doubles(dvector num, ivector order) -{ - if (num[0] > num[1]) { - if(num[2] > num[0]) { - order[0] = 2; - order[1] = 0; - order[2] = 1; - } else if (num[2] < num[1]) { - order[0] = 0; - order[1] = 1; - order[2] = 2; - } else { - order[0] = 0; - order[1] = 2; - order[2] = 1; - } - } else { - if(num[2] > num[1]) { - order[0] = 2; - order[1] = 1; - order[2] = 0; - } else if (num[2] < num[0]) { - order[0] = 1; - order[1] = 0; - order[2] = 2; - } else { - order[0] = 1; - order[1] = 2; - order[2] = 0; - } - } -} - -/* checks out all possible quartets */ -void computeallquartets() -{ - double onethird; - uli nq; - unsigned char treebits[3]; - FILE *lhfp; -# if ! PARALLEL - int a, b, c, i; - double qc2, mintogo, minutes, hours, temp; - double temp1, temp2, temp3; - unsigned char discreteweight[3]; -# endif - - onethird = 1.0/3.0; - treebits[0] = (unsigned char) 1; - treebits[1] = (unsigned char) 2; - treebits[2] = (unsigned char) 4; - - if (show_optn) { /* list all unresolved quartets */ - openfiletowrite(&unresfp, UNRESOLVED, "unresolved quartet trees"); - fprintf(unresfp, "List of all completely unresolved quartets:\n\n"); - } - - nq = 0; - badqs = 0; - - /* start timer - percentage of completed quartets */ - time(&time0); - time1 = time0; - mflag = 0; - -# if PARALLEL - { - schedtype sched; - int flag; - MPI_Status stat; - int dest = 1; - uli qaddr =0; - uli qamount=0; - int qblocksent = 0; - int apr; - uli sq, noq; - initsched(&sched, numquarts(Maxspc), PP_NumProcs-1, 4); - qamount=sgss(&sched); - while (qamount > 0) { - if (PP_emptyslave()) { - PP_RecvQuartBlock(0, &sq, &noq, quartetinfo, &apr); - qblocksent -= noq; - } - dest = PP_getslave(); - PP_SendDoQuartBlock(dest, qaddr, qamount, (approxqp ? APPROX : EXACT)); - qblocksent += qamount; - qaddr += qamount; - qamount=sgss(&sched); - - MPI_Iprobe(MPI_ANY_SOURCE, PP_QUARTBLOCKSPECS, PP_Comm, &flag, &stat); - while (flag) { - PP_RecvQuartBlock(0, &sq, &noq, quartetinfo, &apr); - qblocksent -= noq; - MPI_Iprobe(MPI_ANY_SOURCE, PP_QUARTBLOCKSPECS, PP_Comm, &flag, &stat); - } - } - while (qblocksent > 0) { - PP_RecvQuartBlock(0, &sq, &noq, quartetinfo, &apr); - qblocksent -= noq; - } - } -# else /* PARALLEL */ - - addtimes(GENERAL, &tarr); - if (savequartlh_optn) { - openfiletowrite(&lhfp, ALLQUARTLH, "all quartet likelihoods"); - if (saveqlhbin_optn) writetpqfheader(Maxspc, lhfp, 3); - else writetpqfheader(Maxspc, lhfp, 4); - } - - for (i = 3; i < Maxspc; i++) - for (c = 2; c < i; c++) - for (b = 1; b < c; b++) - for (a = 0; a < b; a++) { - nq++; - - /* generate message every 15 minutes */ - /* check timer */ - time(&time2); - if ( (time2 - time1) > 900) { - /* every 900 seconds */ - /* percentage of completed quartets */ - if (mflag == 0) { - FPRINTF(STDOUTFILE "\n"); - mflag = 1; - } - qc2 = 100.*nq/Numquartets; - mintogo = (100.0-qc2) * - (double) (time2-time0)/60.0/qc2; - hours = floor(mintogo/60.0); - minutes = mintogo - 60.0*hours; - FPRINTF(STDOUTFILE "%.2f%%", qc2); - FPRINTF(STDOUTFILE " completed (remaining"); - FPRINTF(STDOUTFILE " time: %.0f", hours); - FPRINTF(STDOUTFILE " hours %.0f", minutes); - FPRINTF(STDOUTFILE " minutes)\n"); - fflush(STDOUT); - time1 = time2; - } - - /* maximum likelihood values */ - - /* exact or approximate maximum likelihood values */ - compute_quartlklhds(a,b,c,i,&qweight[0],&qweight[1],&qweight[2], (approxqp ? APPROX : EXACT)); - - if (savequartlh_optn) { - if (saveqlhbin_optn) - fwrite(qweight, sizeof(double), 3, lhfp); - else - fprintf(lhfp, "(%d,%d,%d,%d)\t%f\t%f\t%f\n", a, b, c, i, - qweight[0], qweight[1], qweight[2]); - } - - /* sort in descending order */ - sort3doubles(qweight, qworder); - - if (usebestq_optn) { - sqorder[2] = 2; - discreteweight[sqorder[2]] = treebits[qworder[0]]; - if (qweight[qworder[0]] == qweight[qworder[1]]) { - discreteweight[sqorder[2]] = discreteweight[sqorder[2]] || treebits[qworder[1]]; - if (qweight[qworder[1]] == qweight[qworder[2]]) { - discreteweight[sqorder[2]] = discreteweight[sqorder[2]] || treebits[qworder[2]]; - discreteweight[sqorder[2]] = 7; - } - } - } else { - - /* compute Bayesian weights */ - qweight[qworder[1]] = exp(qweight[qworder[1]]-qweight[qworder[0]]); - qweight[qworder[2]] = exp(qweight[qworder[2]]-qweight[qworder[0]]); - qweight[qworder[0]] = 1.0; - temp = qweight[0] + qweight[1] + qweight[2]; - qweight[0] = qweight[0]/temp; - qweight[1] = qweight[1]/temp; - qweight[2] = qweight[2]/temp; - - /* square deviations */ - temp1 = 1.0 - qweight[qworder[0]]; - sqdiff[0] = temp1 * temp1 + - qweight[qworder[1]] * qweight[qworder[1]] + - qweight[qworder[2]] * qweight[qworder[2]]; - discreteweight[0] = treebits[qworder[0]]; - - temp1 = 0.5 - qweight[qworder[0]]; - temp2 = 0.5 - qweight[qworder[1]]; - sqdiff[1] = temp1 * temp1 + temp2 * temp2 + - qweight[qworder[2]] * qweight[qworder[2]]; - discreteweight[1] = treebits[qworder[0]] + treebits[qworder[1]]; - - temp1 = onethird - qweight[qworder[0]]; - temp2 = onethird - qweight[qworder[1]]; - temp3 = onethird - qweight[qworder[2]]; - sqdiff[2] = temp1 * temp1 + temp2 * temp2 + temp3 * temp3; - discreteweight[2] = (unsigned char) 7; - - /* sort in descending order */ - sort3doubles(sqdiff, sqorder); - } - - /* determine best discrete weight */ - writequartet(a, b, c, i, discreteweight[sqorder[2]]); - - /* counting completely unresolved quartets */ - if (discreteweight[sqorder[2]] == 7) { - badqs++; - badtaxon[a]++; - badtaxon[b]++; - badtaxon[c]++; - badtaxon[i]++; - if (show_optn) { - fputid10(unresfp, a); - fprintf(unresfp, " "); - fputid10(unresfp, b); - fprintf(unresfp, " "); - fputid10(unresfp, c); - fprintf(unresfp, " "); - fputid(unresfp, i); - fprintf(unresfp, "\n"); - } - } - addtimes(QUARTETS, &tarr); - } - if (savequartlh_optn) { - closefile(lhfp); - } - if (show_optn) - closefile(unresfp); - if (mflag == 1) - FPRINTF(STDOUTFILE "\n"); -# endif /* PARALLEL */ - -} - -/* check the branching structure between the leaves (not the taxa!) - A, B, C, and I (A, B, C, I don't need to be ordered). As a result, - the two leaves that are closer related to each other than to leaf I - are found in chooseA and chooseB. If the branching structure is - not uniquely defined, ChooseA and ChooseB are chosen randomly - from the possible taxa */ -void checkquartet(int A, int B, int C, int I) -{ - int i, j, a, b, taxon[5], leaf[5], ipos; - unsigned char qresult; - int notunique = FALSE; - - /* The relationship between leaves and taxa is defined by trueID */ - taxon[1] = trueID[A]; /* taxon number */ - leaf[1] = A; /* leaf number */ - taxon[2] = trueID[B]; - leaf[2] = B; - taxon[3] = trueID[C]; - leaf[3] = C; - taxon[4] = trueID[I]; - leaf[4] = I; - - /* sort for taxa */ - /* Source: Numerical Recipes (PIKSR2.C) */ - for (j = 2; j <= 4; j++) { - a = taxon[j]; - b = leaf[j]; - i = j-1; - while (i > 0 && taxon[i] > a) { - taxon[i+1] = taxon[i]; - leaf[i+1] = leaf[i]; - i--; - } - taxon[i+1] = a; - leaf[i+1] = b; - } - - /* where is leaf I ? */ - ipos = 1; - while (leaf[ipos] != I) ipos++; - - /* look at sequence quartet */ - qresult = readquartet(taxon[1], taxon[2], taxon[3], taxon[4]); - - /* chooseA and chooseB */ - do { - switch (qresult) { - - /* one single branching structure */ - - /* 001 */ - case 1: if (ipos == 1 || ipos == 2) { - chooseA = leaf[3]; - chooseB = leaf[4]; - } else { - chooseA = leaf[1]; - chooseB = leaf[2]; - } - notunique = FALSE; - break; - - /* 010 */ - case 2: if (ipos == 1 || ipos == 3) { - chooseA = leaf[2]; - chooseB = leaf[4]; - } else { - chooseA = leaf[1]; - chooseB = leaf[3]; - } - notunique = FALSE; - break; - - /* 100 */ - case 4: if (ipos == 1 || ipos == 4) { - chooseA = leaf[2]; - chooseB = leaf[3]; - } else { - chooseA = leaf[1]; - chooseB = leaf[4]; - } - notunique = FALSE; - break; - - /* two possible branching structures */ - - /* 011 */ - case 3: if (randominteger(2)) qresult = 1; - else qresult = 2; - notunique = TRUE; - break; - - /* 101 */ - case 5: if (randominteger(2)) qresult = 1; - else qresult = 4; - notunique = TRUE; - break; - - /* 110 */ - case 6: if (randominteger(2)) qresult = 2; - else qresult = 4; - notunique = TRUE; - break; - - /* three possible branching structures */ - - /* 111 */ - case 7: qresult = (1 << randominteger(3)); /* 1, 2, or 4 */ - notunique = TRUE; - break; - - default: /* Program error [checkquartet] */ -#if PARALLEL - FPRINTF(STDOUTFILE "\n\n\n(%2d)HALT: PLEASE REPORT ERROR K-PARALLEL TO DEVELOPERS (%d,%d,%d,%d) = %ld\n\n\n", - PP_Myid, taxon[1], taxon[2], taxon[3], taxon[4], - quart2num(taxon[1], taxon[2], taxon[3], taxon[4])); -#else - FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR K TO DEVELOPERS\n\n\n"); -#endif - - } - } while (notunique); - - return; -} - diff --git a/forester/archive/RIO/others/puzzle_dqo/src/sched.c b/forester/archive/RIO/others/puzzle_dqo/src/sched.c deleted file mode 100644 index 3f1c0f6..0000000 --- a/forester/archive/RIO/others/puzzle_dqo/src/sched.c +++ /dev/null @@ -1,423 +0,0 @@ -/* - * sched.c - * - * - * Part of TREE-PUZZLE 5.0 (June 2000) - * - * (c) 1999-2000 by Heiko A. Schmidt, Korbinian Strimmer, - * M. Vingron, and Arndt von Haeseler - * (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler - * - * All parts of the source except where indicated are distributed under - * the GNU public licence. See http://www.opensource.org for details. - */ - - -#include -#include -#include -#include "sched.h" -/* #include "ppuzzle.h" */ - -#define STDOUT stdout -#ifndef PARALLEL /* because printf() runs significantly faster */ - /* than fprintf(stdout) on an Apple McIntosh */ - /* (HS) */ -# define FPRINTF printf -# define STDOUTFILE -#else -# define FPRINTF fprintf -# define STDOUTFILE STDOUT, -#endif - -int scinit; -int ssinit; -int fscinit; -int gssinit; -int tssinit; - -int n, chunksize; -int p; - -#ifdef SCHEDTEST - schedtype testsched; -#endif - -void printsched(schedtype sch) -{ - FPRINTF(STDOUTFILE "Current scheduling status:\n"); - FPRINTF(STDOUTFILE " truetasks=%5ld - alltasks=%5ld - numtasks=%5ld - numprocs=%5d\n", - sch.truetasks, sch.alltasks, sch.numtasks, sch.numprocs); - FPRINTF(STDOUTFILE " delta =%5d - overhead=%5d - rest =%5d - inited =%5d\n", - sch.delta, sch.overhead, sch.rest, sch.inited); - FPRINTF(STDOUTFILE " nconst =%5d - fconst =%5f - lconst =%5f - kconst =%5f\n", - sch.nconst, sch.fconst, sch.lconst, sch.kconst); -} - -void initsched(schedtype *sch, uli tasks, int procs, uli minchunk) -{ - if (minchunk < 1) minchunk = 1; - (*sch).minchunk = minchunk; - (*sch).truetasks = tasks; - (*sch).rest = (int)((*sch).truetasks % (*sch).minchunk); - (*sch).alltasks = (tasks - (*sch).rest); - (*sch).numtasks = (*sch).alltasks; - (*sch).numprocs = procs; - (*sch).delta = 0; - (*sch).overhead = 0; - (*sch).nconst = 0; - (*sch).fconst = 0; - (*sch).lconst = 0; - (*sch).kconst = 0; - (*sch).inited = 0; - -# ifdef PVERBOSE1 - printsched(*sch); -# endif /* PVERBOSE1 */ -} - -/************************************** -* Static Chunking -**************************************/ -uli sc(schedtype *sch) -{ - uli tmp; - - if ((*sch).inited == 0) { - (*sch).overhead = (*sch).alltasks % (*sch).numprocs; - (*sch).delta = ((*sch).alltasks - (*sch).overhead) / (*sch).numprocs; - (*sch).inited ++; - } - - if (!(*sch).overhead) { - if ((*sch).numtasks >= (*sch).delta) - tmp = (uli)(*sch).delta; - else - tmp = 0; - } else { - if ((*sch).numtasks >= ((*sch).delta + 1)) { - tmp = (uli)(*sch).delta + 1; - (*sch).overhead--; - } else - tmp = 0; - } - - /* correction */ - if ((tmp % (*sch).minchunk) > 0) { - tmp += (*sch).minchunk - (tmp % (*sch).minchunk); - } - - (*sch).numtasks -= tmp; - - if ((*sch).numtasks == 0) { - tmp += (uli)(*sch).rest; - (*sch).rest = 0; - } - return tmp; -} /* SC */ - - -/************************************** -* Self Scheduling -**************************************/ -uli ss(schedtype *sch) -{ - uli tmp; - - if ((*sch).inited == 0) { - (*sch).inited ++; - } - - if ((*sch).numtasks >= 1) - tmp = 1; - else - tmp = (*sch).numtasks; - - /* correction */ - if ((tmp % (*sch).minchunk) > 0) { - tmp += (*sch).minchunk - (tmp % (*sch).minchunk); - } - - (*sch).numtasks -= tmp; - - if ((*sch).numtasks == 0) { - tmp += (uli)(*sch).rest; - (*sch).rest = 0; - } - - return tmp; -} /* SS */ - - -/************************************** -* fixed-size chunking -**************************************/ -int fsc() -{ - static int R ; - static int delta ; - static int overhead; - - int tmp; - - if (fscinit == 0) { - R = n; - overhead = n % p; - delta = (n - overhead) / p; - fscinit ++; - } - - if (!overhead) { - if (R >= delta) - tmp = delta; - else - tmp = 0; - } else { - if (R >= (delta + 1)) { - tmp = delta + 1; - overhead--; - } else - tmp = 0; - } - - R -= tmp; - return tmp; -} /* FSC */ - - -/************************************** -* Guided Self Scheduling -**************************************/ -uli gss(schedtype *sch) -{ - uli tmp; - - if ((*sch).inited == 0) { - (*sch).inited ++; - } - - if ((*sch).numtasks >= 1) { - tmp = (uli)ceil((*sch).numtasks / (*sch).numprocs); - if (tmp == 0) tmp = 1; - } else - tmp = 0; - - /* correction */ - if ((tmp % (*sch).minchunk) > 0) { - tmp += (*sch).minchunk - (tmp % (*sch).minchunk); - } - - (*sch).numtasks -= tmp; - - if ((*sch).numtasks == 0) { - tmp += (uli)(*sch).rest; - (*sch).rest = 0; - } - return tmp; -} /* GSS */ - -/************************************** -* Smooth Guided Self Scheduling -**************************************/ -uli sgss(schedtype *sch) -{ - uli tmp; - - if ((*sch).inited == 0) { - (*sch).inited ++; - } - - if ((*sch).numtasks >= 1) { - tmp = (uli)ceil(((*sch).numtasks / (*sch).numprocs) / 2); - if (tmp == 0) tmp = 1; - } else - tmp = 0; - - /* correction */ - if ((tmp % (*sch).minchunk) > 0) { - tmp += (*sch).minchunk - (tmp % (*sch).minchunk); - } - - (*sch).numtasks -= tmp; - - if ((*sch).numtasks == 0) { - tmp += (uli)(*sch).rest; - (*sch).rest = 0; - } - return tmp; -} /* SGSS */ - - -/************************************** -* Trapezoid Self Scheduling -**************************************/ -uli tss(schedtype *sch) -{ - uli tmp; - - if ((*sch).inited == 0) { - (*sch).fconst = ceil((*sch).numtasks / (2*(*sch).numprocs)); - if ((*sch).fconst == 0) (*sch).fconst = 1; - (*sch).lconst = 1; - (*sch).nconst = ceil( (2*n) / ((*sch).fconst + (*sch).lconst) ); - (*sch).ddelta = (((*sch).fconst - (*sch).lconst) / ((*sch).nconst - 1)); - (*sch).kconst = (*sch).fconst; - FPRINTF(STDOUTFILE "f = n/2p = %.2f ; l = %.2f\n", (*sch).fconst, (*sch).lconst); - FPRINTF(STDOUTFILE "N = 2n/(f+l) = %d ; delta = (f-l)/(N-1) = %.2f\n", (*sch).nconst, (*sch).ddelta); - (*sch).inited ++; - } - - if ((*sch).kconst <= (double) (*sch).numtasks) { - tmp = (uli)ceil((*sch).kconst); - (*sch).kconst -= (*sch).ddelta; - } else { - tmp = (uli)(*sch).numtasks; - (*sch).kconst = 0.0; - } - - /* correction */ - if ((tmp % (*sch).minchunk) > 0) { - tmp += (*sch).minchunk - (tmp % (*sch).minchunk); - } - - (*sch).numtasks -= tmp; - - if ((*sch).numtasks == 0) { - tmp += (uli)(*sch).rest; - (*sch).rest = 0; - } - return tmp; - -} /* TSS */ - - -/******************/ - - -#ifdef SCHEDTEST - uli numquarts(int maxspc) - { - uli tmp; - int a, b, c, d; - - if (maxspc < 4) - return (uli)0; - else { - maxspc--; - a = maxspc-3; - b = maxspc-2; - c = maxspc-1; - d = maxspc; - - tmp = (uli) 1 + a + - (uli) b * (b-1) / 2 + - (uli) c * (c-1) * (c-2) / 6 + - (uli) d * (d-1) * (d-2) * (d-3) / 24; - return (tmp); - } - } /* numquarts */ -#endif - - - - -/************************************** -* main -**************************************/ -#ifdef SCHEDTEST -int main(int argc, char *argv[]) -{ - int tcount, - count, - lastsize, - size; - if ((argc > 4) || (argc < 3)) { - FPRINTF(STDOUTFILE "\n\n Usage: %s <# species> <# processors> []\n\n", argv[0]); - exit(1); - } - - chunksize = 1; - - switch(argc) { - case 4: - chunksize = atoi(argv[3]); - case 3: - n = numquarts(atoi(argv[1])); - p = atoi(argv[2]); - } - - FPRINTF(STDOUTFILE "proc=%6d\n", p); - FPRINTF(STDOUTFILE "task=%6d\n", n); - - initsched(&testsched, n, p, chunksize); - printsched(testsched); - - count=1; tcount = 0; - FPRINTF(STDOUTFILE "\n\n---------------------------\n"); - FPRINTF(STDOUTFILE "SC(sched) - Static Chunking\n"); - FPRINTF(STDOUTFILE "---------------------------\n\n"); - do { size = sc(&testsched); - if (size > 0) {FPRINTF(STDOUTFILE "%6d. chunk = %6d %c\n", count++, size , (size%chunksize) ? '!' : ' '); - tcount+=size;} - else FPRINTF(STDOUTFILE "%d tasks in %d chunks\n", tcount, (count-1)); - } while (size > 0); - - - initsched(&testsched, n, p, chunksize); - printsched(testsched); - - count=1; tcount = 0; - FPRINTF(STDOUTFILE "\n\n---------------------------\n"); - FPRINTF(STDOUTFILE "SS(sched) - Self Scheduling\n"); - FPRINTF(STDOUTFILE "---------------------------\n\n"); - do { size = ss(&testsched); - if (size > 0) {if (count==1) FPRINTF(STDOUTFILE "%6d. chunk = %6d %c\n", count++, size , (size%chunksize) ? '!' : ' '); - count++; - tcount+=size; - lastsize = size;} - else {FPRINTF(STDOUTFILE " ...\n"); - FPRINTF(STDOUTFILE "%6d. chunk = %6d %c\n", count++, lastsize , (lastsize%chunksize) ? '!' : ' '); - FPRINTF(STDOUTFILE "%d tasks in %d chunks\n", tcount, (count-1));} - } while (size > 0); - - -/**/ - count=1; tcount = 0; - FPRINTF(STDOUTFILE "\n\n---------------------------\n"); - FPRINTF(STDOUTFILE "FSC() - Fixed-Size Chunking\n"); - FPRINTF(STDOUTFILE "---------------------------\n\n"); - do { size = fsc(); - if (size > 0) {FPRINTF(STDOUTFILE "%6d. chunk = %6d %c\n", count++, size , (size%chunksize) ? '!' : ' '); - tcount+=size;} - else FPRINTF(STDOUTFILE "%d tasks in %d chunks\n", tcount, (count-1)); - } while (size > 0); -/**/ - - initsched(&testsched, n, p, chunksize); - printsched(testsched); - - count=1; tcount = 0; - FPRINTF(STDOUTFILE "\n\n-----------------------------------\n"); - FPRINTF(STDOUTFILE "GSS(sched) - Guided Self Scheduling\n"); - FPRINTF(STDOUTFILE "-----------------------------------\n\n"); - do { size = gss(&testsched); - if (size > 0) {FPRINTF(STDOUTFILE "%6d. chunk = %6d %c\n", count++, size , (size%chunksize) ? '!' : ' '); - tcount+=size;} - else FPRINTF(STDOUTFILE "%d tasks in %d chunks\n", tcount, (count-1)); - } while (size > 0); - - initsched(&testsched, n, p, chunksize); - printsched(testsched); - - count=1; tcount = 0; - FPRINTF(STDOUTFILE "\n\n--------------------------------------\n"); - FPRINTF(STDOUTFILE "TSS(sched) - Trapezoid Self Scheduling\n"); - FPRINTF(STDOUTFILE "--------------------------------------\n\n"); - do { size = tss(&testsched); - if (size > 0) {FPRINTF(STDOUTFILE "%6d. chunk = %6d %c\n", count++, size , (size%chunksize) ? '!' : ' '); - tcount+=size;} - else FPRINTF(STDOUTFILE "%d tasks in %d chunks\n", tcount, (count-1)); - } while (size > 0); - return (0); -} -#endif diff --git a/forester/archive/RIO/others/puzzle_dqo/src/sched.h b/forester/archive/RIO/others/puzzle_dqo/src/sched.h deleted file mode 100644 index e75bdd2..0000000 --- a/forester/archive/RIO/others/puzzle_dqo/src/sched.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * sched.h - * - * - * Part of TREE-PUZZLE 5.0 (June 2000) - * - * (c) 1999-2000 by Heiko A. Schmidt, Korbinian Strimmer, - * M. Vingron, and Arndt von Haeseler - * (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler - * - * All parts of the source except where indicated are distributed under - * the GNU public licence. See http://www.opensource.org for details. - */ - - -#ifndef SCHED_H -#define SCHED_H -#ifndef SCHEDTEST -# include "util.h" -#else - typedef unsigned long int uli; -#endif - - -typedef struct sched_t{ - uli truetasks; - uli alltasks; - uli numtasks; - uli minchunk; - int numprocs; - int delta; - double ddelta; - int overhead; - int rest; - int nconst; - double fconst; - double lconst; - double kconst; - int inited; -} schedtype; - -void num2quart(uli qnum, int *a, int *b, int *c, int *d); -uli numquarts(int maxspc); -uli quart2num (int a, int b, int c, int d); - -void printsched(schedtype sch); -void initsched(schedtype *sch, uli tasks, int procs, uli minchunk); -uli sc(schedtype *sch); -uli gss(schedtype *sch); -uli sgss(schedtype *sch); -uli tss(schedtype *sch); - -#endif /* SCHED_H */ diff --git a/forester/archive/RIO/others/puzzle_dqo/src/test b/forester/archive/RIO/others/puzzle_dqo/src/test deleted file mode 100644 index a680df2..0000000 --- a/forester/archive/RIO/others/puzzle_dqo/src/test +++ /dev/null @@ -1,19 +0,0 @@ -CC gcc -LIBS -lm -CFLAGS -g -O2 -DEFS -DPACKAGE=\"tree-puzzle\" -DVERSION=\"5.0\" -DHAVE_LIBM=1 -DSTDC_HEADERS=1 -DHAVE_LIMITS_H=1 -SET_MAKE - -HCC @HCC@ -MPICC -MPCC @MPCC@ - -MPICC -MPILIBS -MPIDEFS -MPICFLAGS - -PCC @PCC@ -PLIBS @PLIBS@ -PDEFS @PDEFS@ -PCFLAGS @PCFLAGS@ diff --git a/forester/archive/RIO/others/puzzle_dqo/src/test.in b/forester/archive/RIO/others/puzzle_dqo/src/test.in deleted file mode 100644 index 0dc7ddc..0000000 --- a/forester/archive/RIO/others/puzzle_dqo/src/test.in +++ /dev/null @@ -1,19 +0,0 @@ -CC @CC@ -LIBS @LIBS@ -CFLAGS @CFLAGS@ -DEFS @DEFS@ -SET_MAKE @SET_MAKE@ - -HCC @HCC@ -MPICC @MPICC@ -MPCC @MPCC@ - -MPICC @MPICC@ -MPILIBS @MPILIBS@ -MPIDEFS @MPIDEFS@ -MPICFLAGS @MPICFLAGS@ - -PCC @PCC@ -PLIBS @PLIBS@ -PDEFS @PDEFS@ -PCFLAGS @PCFLAGS@ diff --git a/forester/archive/RIO/others/puzzle_dqo/src/util.c b/forester/archive/RIO/others/puzzle_dqo/src/util.c deleted file mode 100644 index 6a998dc..0000000 --- a/forester/archive/RIO/others/puzzle_dqo/src/util.c +++ /dev/null @@ -1,751 +0,0 @@ -/* - * util.c - * - * - * Part of TREE-PUZZLE 5.0 (June 2000) - * - * (c) 1999-2000 by Heiko A. Schmidt, Korbinian Strimmer, - * M. Vingron, and Arndt von Haeseler - * (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler - * - * All parts of the source except where indicated are distributed under - * the GNU public licence. See http://www.opensource.org for details. - */ - - -#include "util.h" - -#define STDOUT stdout -#ifndef PARALLEL /* because printf() runs significantly faster */ - /* than fprintf(stdout) on an Apple McIntosh */ - /* (HS) */ -# define FPRINTF printf -# define STDOUTFILE -#else -# define FPRINTF fprintf -# define STDOUTFILE STDOUT, - extern int PP_NumProcs; - extern int PP_Myid; - long int PP_randn; - long int PP_rand; -#endif - - -/* - * memory allocation error handler - */ - -void maerror(char *message) -{ - FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (lack of memory: %s)\n\n", message); - FPRINTF(STDOUTFILE "Hint for Macintosh users:\n"); - FPRINTF(STDOUTFILE "Use the command of the Finder to increase the memory partition!\n\n"); - exit(1); -} - - -/* - * memory allocate double vectors, matrices, and cubes - */ - -dvector new_dvector(int n) -{ - dvector v; - - v = (dvector) malloc((unsigned) (n * sizeof(double))); - if (v == NULL) maerror("step 1 in new_dvector"); - - return v; -} - -dmatrix new_dmatrix(int nrow, int ncol) -{ - int i; - dmatrix m; - - m = (dmatrix) malloc((unsigned) (nrow * sizeof(dvector))); - if (m == NULL) maerror("step 1 in in new_dmatrix"); - - *m = (dvector) malloc((unsigned) (nrow * ncol * sizeof(double))); - if (*m == NULL) maerror("step 2 in in new_dmatrix"); - - for (i = 1; i < nrow; i++) m[i] = m[i-1] + ncol; - - return m; -} - - - - -dcube new_dcube(int ntri, int nrow, int ncol) -{ - int i, j; - dcube c; - - c = (dcube) malloc((unsigned) (ntri * sizeof(dmatrix))); - if (c == NULL) maerror("step 1 in in new_dcube"); - - *c = (dmatrix) malloc((unsigned) (ntri * nrow * sizeof(dvector))); - if (*c == NULL) maerror("step 2 in in new_dcube"); - - **c = (dvector) malloc((unsigned) (ntri * nrow * ncol * sizeof(double))); - if (**c == NULL) maerror("step 3 in in new_dcube"); - - for (j = 1; j < nrow; j++) c[0][j] = c[0][j-1] + ncol; - - for (i = 1; i < ntri; i++) { - c[i] = c[i-1] + nrow; - c[i][0] = c[i-1][0] + nrow * ncol; - for (j = 1; j < nrow; j++) c[i][j] = c[i][j-1] + ncol; - } - - return c; -} - -void free_dvector(dvector v) -{ - free((double *) v); -} - -void free_dmatrix(dmatrix m) -{ - free((double *) *m); - free((double *) m); -} - -void free_dcube(dcube c) -{ - free((double *) **c); - free((double *) *c); - free((double *) c); -} - - -/* - * memory allocate char vectors, matrices, and cubes - */ - -cvector new_cvector(int n) -{ - cvector v; - - v = (cvector) malloc((unsigned)n * sizeof(char)); - if (v == NULL) maerror("step1 in new_cvector"); - - return v; -} - -cmatrix new_cmatrix(int nrow, int ncol) -{ - int i; - cmatrix m; - - m = (cmatrix) malloc((unsigned) (nrow * sizeof(cvector))); - if (m == NULL) maerror("step 1 in new_cmatrix"); - - *m = (cvector) malloc((unsigned) (nrow * ncol * sizeof(char))); - if (*m == NULL) maerror("step 2 in new_cmatrix"); - - for (i = 1; i < nrow; i++) m[i] = m[i-1] + ncol; - - return m; -} - -ccube new_ccube(int ntri, int nrow, int ncol) -{ - int i, j; - ccube c; - - c = (ccube) malloc((unsigned) (ntri * sizeof(cmatrix))); - if (c == NULL) maerror("step 1 in new_ccube"); - - *c = (cmatrix) malloc((unsigned) (ntri * nrow * sizeof(cvector))); - if (*c == NULL) maerror("step 2 in new_ccube"); - - **c = (cvector) malloc((unsigned) (ntri * nrow * ncol * sizeof(char))); - if (**c == NULL) maerror("step 3 in new_ccube"); - - for (j = 1; j < nrow; j++) c[0][j] = c[0][j-1] + ncol; - - for (i = 1; i < ntri; i++) { - c[i] = c[i-1] + nrow; - c[i][0] = c[i-1][0] + nrow * ncol; - for (j = 1; j < nrow; j++) c[i][j] = c[i][j-1] + ncol; - } - - return c; -} - -void free_cvector(cvector v) -{ - free((char *) v); -} - -void free_cmatrix(cmatrix m) -{ - free((char *) *m); - free((char *) m); -} - -void free_ccube(ccube c) -{ - free((char *) **c); - free((char *) *c); - free((char *) c); -} - - -/* - * memory allocate int vectors, matrices, and cubes - */ - -ivector new_ivector(int n) -{ - ivector v; - - v = (ivector) malloc((unsigned) (n * sizeof(int))); - if (v == NULL) maerror("step 1 in new_ivector"); - - return v; -} - -imatrix new_imatrix(int nrow, int ncol) -{ - int i; - imatrix m; - - m = (imatrix) malloc((unsigned) (nrow * sizeof(ivector))); - if (m == NULL) maerror("step 1 in new_imatrix"); - - *m = (ivector) malloc((unsigned) (nrow * ncol * sizeof(int))); - if (*m == NULL) maerror("step 2 in new_imatrix"); - - for (i = 1; i < nrow; i++) m[i] = m[i-1] + ncol; - - return m; -} - -icube new_icube(int ntri, int nrow, int ncol) -{ - int i, j; - icube c; - - c = (icube) malloc((unsigned) (ntri * sizeof(imatrix))); - if (c == NULL) maerror("step 1 in new_icube"); - - *c = (imatrix) malloc((unsigned) (ntri * nrow * sizeof(ivector))); - if (*c == NULL) maerror("step 2 in new_icube"); - - **c = (ivector) malloc((unsigned) (ntri * nrow * ncol * sizeof(int))); - if (**c == NULL) maerror("step 3 in new_icube"); - - for (j = 1; j < nrow; j++) c[0][j] = c[0][j-1] + ncol; - - for (i = 1; i < ntri; i++) { - c[i] = c[i-1] + nrow; - c[i][0] = c[i-1][0] + nrow * ncol; - for (j = 1; j < nrow; j++) c[i][j] = c[i][j-1] + ncol; - } - - return c; -} - -void free_ivector(ivector v) -{ - free((int *) v); -} - -void free_imatrix(imatrix m) -{ - free((int *) *m); - free((int *) m); -} - -void free_icube(icube c) -{ - free((int *) **c); - free((int *) *c); - free((int *) c); -} - - -/* - * memory allocate uli vectors, matrices, and cubes - */ - -ulivector new_ulivector(int n) -{ - ulivector v; - - v = (ulivector) malloc((unsigned) (n * sizeof(uli))); - if (v == NULL) maerror("step 1 in new_ulivector"); - - return v; -} - -ulimatrix new_ulimatrix(int nrow, int ncol) -{ - int i; - ulimatrix m; - - m = (ulimatrix) malloc((unsigned) (nrow * sizeof(ulivector))); - if (m == NULL) maerror("step 1 in new_ulimatrix"); - - *m = (ulivector) malloc((unsigned) (nrow * ncol * sizeof(uli))); - if (*m == NULL) maerror("step 2 in new_ulimatrix"); - - for (i = 1; i < nrow; i++) m[i] = m[i-1] + ncol; - - return m; -} - -ulicube new_ulicube(int ntri, int nrow, int ncol) -{ - int i, j; - ulicube c; - - c = (ulicube) malloc((unsigned) (ntri * sizeof(ulimatrix))); - if (c == NULL) maerror("step 1 in new_ulicube"); - - *c = (ulimatrix) malloc((unsigned) (ntri * nrow * sizeof(ulivector))); - if (*c == NULL) maerror("step 2 in new_ulicube"); - - **c = (ulivector) malloc((unsigned) (ntri * nrow * ncol * sizeof(uli))); - if (**c == NULL) maerror("step 3 in new_ulicube"); - - for (j = 1; j < nrow; j++) c[0][j] = c[0][j-1] + ncol; - - for (i = 1; i < ntri; i++) { - c[i] = c[i-1] + nrow; - c[i][0] = c[i-1][0] + nrow * ncol; - for (j = 1; j < nrow; j++) c[i][j] = c[i][j-1] + ncol; - } - - return c; -} - -void free_ulivector(ulivector v) -{ - free((uli *) v); -} - -void free_ulimatrix(ulimatrix m) -{ - free((uli *) *m); - free((uli *) m); -} - -void free_ulicube(ulicube c) -{ - free((uli *) **c); - free((uli *) *c); - free((uli *) c); -} - - -/******************************************************************************/ -/* random numbers generator (Numerical recipes) */ -/******************************************************************************/ - -/* definitions */ -#define IM1 2147483563 -#define IM2 2147483399 -#define AM (1.0/IM1) -#define IMM1 (IM1-1) -#define IA1 40014 -#define IA2 40692 -#define IQ1 53668 -#define IQ2 52774 -#define IR1 12211 -#define IR2 3791 -#define NTAB 32 -#define NDIV (1+IMM1/NTAB) -#define EPS 1.2e-7 -#define RNMX (1.0-EPS) - -/* variable */ -long idum; - -double randomunitintervall() -/* Long period (> 2e18) random number generator. Returns a uniform random - deviate between 0.0 and 1.0 (exclusive of endpoint values). - - Source: - Press et al., "Numerical recipes in C", Cambridge University Press, 1992 - (chapter 7 "Random numbers", ran2 random number generator) */ -{ - int j; - long k; - static long idum2=123456789; - static long iy=0; - static long iv[NTAB]; - double temp; - - if (idum <= 0) { - if (-(idum) < 1) - idum=1; - else - idum=-(idum); - idum2=(idum); - for (j=NTAB+7;j>=0;j--) { - k=(idum)/IQ1; - idum=IA1*(idum-k*IQ1)-k*IR1; - if (idum < 0) - idum += IM1; - if (j < NTAB) - iv[j] = idum; - } - iy=iv[0]; - } - k=(idum)/IQ1; - idum=IA1*(idum-k*IQ1)-k*IR1; - if (idum < 0) - idum += IM1; - k=idum2/IQ2; - idum2=IA2*(idum2-k*IQ2)-k*IR2; - if (idum2 < 0) - idum2 += IM2; - j=iy/NDIV; - iy=iv[j]-idum2; - iv[j] = idum; - if (iy < 1) - iy += IMM1; - if ((temp=AM*iy) > RNMX) - return RNMX; - else - return temp; -} - -#undef IM1 -#undef IM2 -#undef AM -#undef IMM1 -#undef IA1 -#undef IA2 -#undef IQ1 -#undef IQ2 -#undef IR1 -#undef IR2 -#undef NTAB -#undef NDIV -#undef EPS -#undef RNMX - -int initrandom(int seed) -{ - srand((unsigned) time(NULL)); - if (seed < 0) - seed = rand(); - idum=-(long) seed; -# ifdef PARALLEL - { - int n; - for (n=0; n= 0.0 ? fabs(a) : -fabs(a)) - -/* Brents method in one dimension */ -double brent(double ax, double bx, double cx, double (*f)(double), double tol, - double *foptx, double *f2optx, double fax, double fbx, double fcx) -{ - int iter; - double a,b,d=0,etemp,fu,fv,fw,fx,p,q,r,tol1,tol2,u,v,w,x,xm; - double xw,wv,vx; - double e=0.0; - - a=(ax < cx ? ax : cx); - b=(ax > cx ? ax : cx); - x=bx; - fx=fbx; - if (fax < fcx) { - w=ax; - fw=fax; - v=cx; - fv=fcx; - } else { - w=cx; - fw=fcx; - v=ax; - fv=fax; - } - for (iter=1;iter<=ITMAX;iter++) { - xm=0.5*(a+b); - tol2=2.0*(tol1=tol*fabs(x)+ZEPS); - if (fabs(x-xm) <= (tol2-0.5*(b-a))) { - *foptx = fx; - xw = x-w; - wv = w-v; - vx = v-x; - *f2optx = 2.0*(fv*xw + fx*wv + fw*vx)/ - (v*v*xw + x*x*wv + w*w*vx); - return x; - } - if (fabs(e) > tol1) { - r=(x-w)*(fx-fv); - q=(x-v)*(fx-fw); - p=(x-v)*q-(x-w)*r; - q=2.0*(q-r); - if (q > 0.0) p = -p; - q=fabs(q); - etemp=e; - e=d; - if (fabs(p) >= fabs(0.5*q*etemp) || p <= q*(a-x) || p >= q*(b-x)) - d=CGOLD*(e=(x >= xm ? a-x : b-x)); - else { - d=p/q; - u=x+d; - if (u-a < tol2 || b-u < tol2) - d=SIGN(tol1,xm-x); - } - } else { - d=CGOLD*(e=(x >= xm ? a-x : b-x)); - } - u=(fabs(d) >= tol1 ? x+d : x+SIGN(tol1,d)); - fu=(*f)(u); - if (fu <= fx) { - if (u >= x) a=x; else b=x; - SHFT(v,w,x,u) - SHFT(fv,fw,fx,fu) - } else { - if (u < x) a=u; else b=u; - if (fu <= fw || w == x) { - v=w; - w=u; - fv=fw; - fw=fu; - } else if (fu <= fv || v == x || v == w) { - v=u; - fv=fu; - } - } - } - *foptx = fx; - xw = x-w; - wv = w-v; - vx = v-x; - *f2optx = 2.0*(fv*xw + fx*wv + fw*vx)/ - (v*v*xw + x*x*wv + w*w*vx); - return x; -} -#undef ITMAX -#undef CGOLD -#undef ZEPS -#undef SHFT -#undef SIGN -#undef GOLD -#undef GLIMIT -#undef TINY - -/* one-dimensional minimization - as input a lower and an upper limit and a trial - value for the minimum is needed: xmin < xguess < xmax - the function and a fractional tolerance has to be specified - onedimenmin returns the optimal x value and the value of the function - and its second derivative at this point - */ -double onedimenmin(double xmin, double xguess, double xmax, double (*f)(double), - double tol, double *fx, double *f2x) -{ - double eps, optx, ax, bx, cx, fa, fb, fc; - - /* first attempt to bracketize minimum */ - eps = xguess*tol*50.0; - ax = xguess - eps; - if (ax < xmin) ax = xmin; - bx = xguess; - cx = xguess + eps; - if (cx > xmax) cx = xmax; - - /* check if this works */ - fa = (*f)(ax); - fb = (*f)(bx); - fc = (*f)(cx); - - /* if it works use these borders else be conservative */ - if ((fa < fb) || (fc < fb)) { - if (ax != xmin) fa = (*f)(xmin); - if (cx != xmax) fc = (*f)(xmax); - optx = brent(xmin, xguess, xmax, f, tol, fx, f2x, fa, fb, fc); - } else - optx = brent(ax, bx, cx, f, tol, fx, f2x, fa, fb, fc); - - return optx; /* return optimal x */ -} - -/* two-dimensional minimization with borders and calculations of standard errors */ -/* we optimize along basis vectors - not very optimal but it seems to work well */ -void twodimenmin(double tol, - int active1, double min1, double *x1, double max1, double (*func1)(double), double *err1, - int active2, double min2, double *x2, double max2, double (*func2)(double), double *err2) -{ - int it, nump, change; - double x1old, x2old; - double fx, f2x; - - it = 0; - nump = 0; - - /* count number of parameters */ - if (active1) nump++; - if (active2) nump++; - - do { /* repeat until nothing changes any more */ - it++; - change = FALSE; - - /* optimize first variable */ - if (active1) { - - if ((*x1) <= min1) (*x1) = min1 + 0.2*(max1-min1); - if ((*x1) >= max1) (*x1) = max1 - 0.2*(max1-min1); - x1old = (*x1); - (*x1) = onedimenmin(min1, (*x1), max1, func1, tol, &fx, &f2x); - if ((*x1) < min1) (*x1) = min1; - if ((*x1) > max1) (*x1) = max1; - /* same tolerance as 1D minimization */ - if (fabs((*x1) - x1old) > 3.3*tol) change = TRUE; - - /* standard error */ - f2x = fabs(f2x); - if (1.0/(max1*max1) < f2x) (*err1) = sqrt(1.0/f2x); - else (*err1) = max1; - - } - - /* optimize second variable */ - if (active2) { - - if ((*x2) <= min2) (*x2) = min2 + 0.2*(max2-min2); - if ((*x2) >= max2) (*x2) = max2 - 0.2*(max2-min2); - x2old = (*x2); - (*x2) = onedimenmin(min2, (*x2), max2, func2, tol, &fx, &f2x); - if ((*x2) < min2) (*x2) = min2; - if ((*x2) > max2) (*x2) = max2; - /* same tolerance as 1D minimization */ - if (fabs((*x2) - x2old) > 3.3*tol) change = TRUE; - - /* standard error */ - f2x = fabs(f2x); - if (1.0/(max2*max2) < f2x) (*err2) = sqrt(1.0/f2x); - else (*err2) = max2; - - } - - if (nump == 1) return; - - } while (it != MAXITS && change); - - return; -} - diff --git a/forester/archive/RIO/others/puzzle_dqo/src/util.h b/forester/archive/RIO/others/puzzle_dqo/src/util.h deleted file mode 100644 index 20f37e5..0000000 --- a/forester/archive/RIO/others/puzzle_dqo/src/util.h +++ /dev/null @@ -1,96 +0,0 @@ -/* - * util.h - * - * - * Part of TREE-PUZZLE 5.0 (June 2000) - * - * (c) 1999-2000 by Heiko A. Schmidt, Korbinian Strimmer, - * M. Vingron, and Arndt von Haeseler - * (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler - * - * All parts of the source except where indicated are distributed under - * the GNU public licence. See http://www.opensource.org for details. - */ - - -#ifndef _UTIL_ -#define _UTIL_ - -#include -#include -#include -#include - - -/* - * general definitions - */ - -#define TRUE 1 -#define FALSE 0 - -#ifdef PARALLEL - extern long int PP_randn; - extern long int PP_rand; -#endif - -/* - * type definitions - */ - -typedef unsigned long int uli; - -typedef double *dvector, **dmatrix, ***dcube; -typedef char *cvector, **cmatrix, ***ccube; -typedef int *ivector, **imatrix, ***icube; -typedef uli *ulivector, **ulimatrix, ***ulicube; - - -/* - * prototypes of functions defined in util.c - */ - -void maerror(char *message); - -dvector new_dvector(int n); -dmatrix new_dmatrix(int nrow, int ncol); -dcube new_dcube(int ntri, int nrow, int ncol); -void free_dvector(dvector v); -void free_dmatrix(dmatrix m); -void free_dcube(dcube c); - -cvector new_cvector(int n); -cmatrix new_cmatrix(int nrow, int ncol); -ccube new_ccube(int ntri, int nrow, int ncol); -void free_cvector(cvector v); -void free_cmatrix(cmatrix m); -void free_ccube(ccube c); - -ivector new_ivector(int n); -imatrix new_imatrix(int nrow, int ncol); -icube new_icube(int ntri, int nrow, int ncol); -void free_ivector(ivector v); -void free_imatrix(imatrix m); -void free_icube(icube c); - -ulivector new_ulivector(int n); -ulimatrix new_ulimatrix(int nrow, int ncol); -ulicube new_ulicube(int ntri, int nrow, int ncol); -void free_ulivector(ulivector v); -void free_ulimatrix(ulimatrix m); -void free_ulicube(ulicube c); - -double randomunitintervall(void); -int initrandom(int seed); -int randominteger(int n); -void chooser(int t, int s, ivector slist); -void *myrealloc(void *, size_t); -cvector mygets(void); - -#define MAXITS 10 /* maximum number of iterations in twoedimenmin */ -double onedimenmin(double, double, double, double (*f )(double ), double, double *, double *); -void twodimenmin(double, int, double, double *, double, double (*func1 )(double ), double *, int, double, double *, double, double (*func2 )(double ), double *); - - - -#endif diff --git a/forester/archive/RIO/others/puzzle_mod/AUTHORS b/forester/archive/RIO/others/puzzle_mod/AUTHORS deleted file mode 100644 index cbef439..0000000 --- a/forester/archive/RIO/others/puzzle_mod/AUTHORS +++ /dev/null @@ -1,45 +0,0 @@ -since 1999 by Heiko A. Schmidt, Korbinian Strimmer, - Martin Vingron, Arndt von Haeseler - -1995-1999 by Korbinian Strimmer and Arndt von Haeseler - - - -Heiko A. Schmidt - Theoretical Bioinformatics - Deutsches Krebsforschungszentrum (DKFZ) - Im Neuenheimer Feld 280 - D-69124 Heidelberg - Germany - - email: h.schmidt@dkfz-heidelberg.de, - http://www.dkfz-heidelberg.de/tbi/ - -Korbinian Strimmer - Department of Zoology - University of Oxford - South Parks Road - Oxford OX1 3PS, UK - - email: korbinian.strimmer@zoo.ox.ac.uk - http://www.zoo.ox.ac.uk/ - -Martin Vingron - Theoretical Bioinformatics - Deutsches Krebsforschungszentrum (DKFZ) - Im Neuenheimer Feld 280 - D-69124 Heidelberg - Germany - - email: vingron@dkfz-heidelberg.de - http://www.dkfz-heidelberg.de/tbi/ - -Arndt von Haeseler - Max-Planck-Institute for Evolutionary Anthropology - Inselstr. 22 - D-04103 Leipzig - Germany - - email: haeseler@eva.mpg.de, - http://www.eva.mpg.de/ - diff --git a/forester/archive/RIO/others/puzzle_mod/COPYING b/forester/archive/RIO/others/puzzle_mod/COPYING deleted file mode 100644 index d60c31a..0000000 --- a/forester/archive/RIO/others/puzzle_mod/COPYING +++ /dev/null @@ -1,340 +0,0 @@ - GNU GENERAL PUBLIC LICENSE - Version 2, June 1991 - - Copyright (C) 1989, 1991 Free Software Foundation, Inc. - 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - Preamble - - The licenses for most software are designed to take away your -freedom to share and change it. By contrast, the GNU General Public -License is intended to guarantee your freedom to share and change free -software--to make sure the software is free for all its users. This -General Public License applies to most of the Free Software -Foundation's software and to any other program whose authors commit to -using it. (Some other Free Software Foundation software is covered by -the GNU Library General Public License instead.) You can apply it to -your programs, too. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -this service if you wish), that you receive source code or can get it -if you want it, that you can change the software or use pieces of it -in new free programs; and that you know you can do these things. - - To protect your rights, we need to make restrictions that forbid -anyone to deny you these rights or to ask you to surrender the rights. -These restrictions translate to certain responsibilities for you if you -distribute copies of the software, or if you modify it. - - For example, if you distribute copies of such a program, whether -gratis or for a fee, you must give the recipients all the rights that -you have. You must make sure that they, too, receive or can get the -source code. And you must show them these terms so they know their -rights. - - We protect your rights with two steps: (1) copyright the software, and -(2) offer you this license which gives you legal permission to copy, -distribute and/or modify the software. - - Also, for each author's protection and ours, we want to make certain -that everyone understands that there is no warranty for this free -software. If the software is modified by someone else and passed on, we -want its recipients to know that what they have is not the original, so -that any problems introduced by others will not reflect on the original -authors' reputations. - - Finally, any free program is threatened constantly by software -patents. We wish to avoid the danger that redistributors of a free -program will individually obtain patent licenses, in effect making the -program proprietary. To prevent this, we have made it clear that any -patent must be licensed for everyone's free use or not licensed at all. - - The precise terms and conditions for copying, distribution and -modification follow. - - GNU GENERAL PUBLIC LICENSE - TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION - - 0. This License applies to any program or other work which contains -a notice placed by the copyright holder saying it may be distributed -under the terms of this General Public License. The "Program", below, -refers to any such program or work, and a "work based on the Program" -means either the Program or any derivative work under copyright law: -that is to say, a work containing the Program or a portion of it, -either verbatim or with modifications and/or translated into another -language. (Hereinafter, translation is included without limitation in -the term "modification".) Each licensee is addressed as "you". - -Activities other than copying, distribution and modification are not -covered by this License; they are outside its scope. The act of -running the Program is not restricted, and the output from the Program -is covered only if its contents constitute a work based on the -Program (independent of having been made by running the Program). -Whether that is true depends on what the Program does. - - 1. You may copy and distribute verbatim copies of the Program's -source code as you receive it, in any medium, provided that you -conspicuously and appropriately publish on each copy an appropriate -copyright notice and disclaimer of warranty; keep intact all the -notices that refer to this License and to the absence of any warranty; -and give any other recipients of the Program a copy of this License -along with the Program. - -You may charge a fee for the physical act of transferring a copy, and -you may at your option offer warranty protection in exchange for a fee. - - 2. You may modify your copy or copies of the Program or any portion -of it, thus forming a work based on the Program, and copy and -distribute such modifications or work under the terms of Section 1 -above, provided that you also meet all of these conditions: - - a) You must cause the modified files to carry prominent notices - stating that you changed the files and the date of any change. - - b) You must cause any work that you distribute or publish, that in - whole or in part contains or is derived from the Program or any - part thereof, to be licensed as a whole at no charge to all third - parties under the terms of this License. - - c) If the modified program normally reads commands interactively - when run, you must cause it, when started running for such - interactive use in the most ordinary way, to print or display an - announcement including an appropriate copyright notice and a - notice that there is no warranty (or else, saying that you provide - a warranty) and that users may redistribute the program under - these conditions, and telling the user how to view a copy of this - License. (Exception: if the Program itself is interactive but - does not normally print such an announcement, your work based on - the Program is not required to print an announcement.) - -These requirements apply to the modified work as a whole. If -identifiable sections of that work are not derived from the Program, -and can be reasonably considered independent and separate works in -themselves, then this License, and its terms, do not apply to those -sections when you distribute them as separate works. But when you -distribute the same sections as part of a whole which is a work based -on the Program, the distribution of the whole must be on the terms of -this License, whose permissions for other licensees extend to the -entire whole, and thus to each and every part regardless of who wrote it. - -Thus, it is not the intent of this section to claim rights or contest -your rights to work written entirely by you; rather, the intent is to -exercise the right to control the distribution of derivative or -collective works based on the Program. - -In addition, mere aggregation of another work not based on the Program -with the Program (or with a work based on the Program) on a volume of -a storage or distribution medium does not bring the other work under -the scope of this License. - - 3. You may copy and distribute the Program (or a work based on it, -under Section 2) in object code or executable form under the terms of -Sections 1 and 2 above provided that you also do one of the following: - - a) Accompany it with the complete corresponding machine-readable - source code, which must be distributed under the terms of Sections - 1 and 2 above on a medium customarily used for software interchange; or, - - b) Accompany it with a written offer, valid for at least three - years, to give any third party, for a charge no more than your - cost of physically performing source distribution, a complete - machine-readable copy of the corresponding source code, to be - distributed under the terms of Sections 1 and 2 above on a medium - customarily used for software interchange; or, - - c) Accompany it with the information you received as to the offer - to distribute corresponding source code. (This alternative is - allowed only for noncommercial distribution and only if you - received the program in object code or executable form with such - an offer, in accord with Subsection b above.) - -The source code for a work means the preferred form of the work for -making modifications to it. For an executable work, complete source -code means all the source code for all modules it contains, plus any -associated interface definition files, plus the scripts used to -control compilation and installation of the executable. However, as a -special exception, the source code distributed need not include -anything that is normally distributed (in either source or binary -form) with the major components (compiler, kernel, and so on) of the -operating system on which the executable runs, unless that component -itself accompanies the executable. - -If distribution of executable or object code is made by offering -access to copy from a designated place, then offering equivalent -access to copy the source code from the same place counts as -distribution of the source code, even though third parties are not -compelled to copy the source along with the object code. - - 4. You may not copy, modify, sublicense, or distribute the Program -except as expressly provided under this License. Any attempt -otherwise to copy, modify, sublicense or distribute the Program is -void, and will automatically terminate your rights under this License. -However, parties who have received copies, or rights, from you under -this License will not have their licenses terminated so long as such -parties remain in full compliance. - - 5. You are not required to accept this License, since you have not -signed it. However, nothing else grants you permission to modify or -distribute the Program or its derivative works. These actions are -prohibited by law if you do not accept this License. Therefore, by -modifying or distributing the Program (or any work based on the -Program), you indicate your acceptance of this License to do so, and -all its terms and conditions for copying, distributing or modifying -the Program or works based on it. - - 6. Each time you redistribute the Program (or any work based on the -Program), the recipient automatically receives a license from the -original licensor to copy, distribute or modify the Program subject to -these terms and conditions. You may not impose any further -restrictions on the recipients' exercise of the rights granted herein. -You are not responsible for enforcing compliance by third parties to -this License. - - 7. If, as a consequence of a court judgment or allegation of patent -infringement or for any other reason (not limited to patent issues), -conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot -distribute so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you -may not distribute the Program at all. For example, if a patent -license would not permit royalty-free redistribution of the Program by -all those who receive copies directly or indirectly through you, then -the only way you could satisfy both it and this License would be to -refrain entirely from distribution of the Program. - -If any portion of this section is held invalid or unenforceable under -any particular circumstance, the balance of the section is intended to -apply and the section as a whole is intended to apply in other -circumstances. - -It is not the purpose of this section to induce you to infringe any -patents or other property right claims or to contest validity of any -such claims; this section has the sole purpose of protecting the -integrity of the free software distribution system, which is -implemented by public license practices. Many people have made -generous contributions to the wide range of software distributed -through that system in reliance on consistent application of that -system; it is up to the author/donor to decide if he or she is willing -to distribute software through any other system and a licensee cannot -impose that choice. - -This section is intended to make thoroughly clear what is believed to -be a consequence of the rest of this License. - - 8. If the distribution and/or use of the Program is restricted in -certain countries either by patents or by copyrighted interfaces, the -original copyright holder who places the Program under this License -may add an explicit geographical distribution limitation excluding -those countries, so that distribution is permitted only in or among -countries not thus excluded. In such case, this License incorporates -the limitation as if written in the body of this License. - - 9. The Free Software Foundation may publish revised and/or new versions -of the General Public License from time to time. Such new versions will -be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - -Each version is given a distinguishing version number. If the Program -specifies a version number of this License which applies to it and "any -later version", you have the option of following the terms and conditions -either of that version or of any later version published by the Free -Software Foundation. If the Program does not specify a version number of -this License, you may choose any version ever published by the Free Software -Foundation. - - 10. If you wish to incorporate parts of the Program into other free -programs whose distribution conditions are different, write to the author -to ask for permission. For software which is copyrighted by the Free -Software Foundation, write to the Free Software Foundation; we sometimes -make exceptions for this. Our decision will be guided by the two goals -of preserving the free status of all derivatives of our free software and -of promoting the sharing and reuse of software generally. - - NO WARRANTY - - 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY -FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN -OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES -PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED -OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS -TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE -PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, -REPAIR OR CORRECTION. - - 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR -REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, -INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING -OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED -TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY -YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER -PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE -POSSIBILITY OF SUCH DAMAGES. - - END OF TERMS AND CONDITIONS - - How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -convey the exclusion of warranty; and each file should have at least -the "copyright" line and a pointer to where the full notice is found. - - - Copyright (C) - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - - -Also add information on how to contact you by electronic and paper mail. - -If the program is interactive, make it output a short notice like this -when it starts in an interactive mode: - - Gnomovision version 69, Copyright (C) year name of author - Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. - This is free software, and you are welcome to redistribute it - under certain conditions; type `show c' for details. - -The hypothetical commands `show w' and `show c' should show the appropriate -parts of the General Public License. Of course, the commands you use may -be called something other than `show w' and `show c'; they could even be -mouse-clicks or menu items--whatever suits your program. - -You should also get your employer (if you work as a programmer) or your -school, if any, to sign a "copyright disclaimer" for the program, if -necessary. Here is a sample; alter the names: - - Yoyodyne, Inc., hereby disclaims all copyright interest in the program - `Gnomovision' (which makes passes at compilers) written by James Hacker. - - , 1 April 1989 - Ty Coon, President of Vice - -This General Public License does not permit incorporating your program into -proprietary programs. If your program is a subroutine library, you may -consider it more useful to permit linking proprietary applications with the -library. If this is what you want to do, use the GNU Library General -Public License instead of this License. diff --git a/forester/archive/RIO/others/puzzle_mod/ChangeLog b/forester/archive/RIO/others/puzzle_mod/ChangeLog deleted file mode 100644 index 824b296..0000000 --- a/forester/archive/RIO/others/puzzle_mod/ChangeLog +++ /dev/null @@ -1,347 +0,0 @@ - -Version date what has been changed - -5.0 26.08.2000 - changes to manual, Makefile.in - - cpREV hidden by -DCPREV flag - - chi2test, quartio included into source code files - - generic scr/Makefile.generic - - src/makefile.com for VAX - - AUTHORS, README, ChangeLog updated - - INSTALL checked - 27.08.2000 - test code excluded - - '-randseed#' added for debugging purposes - - ./data added to autoconf/automake - - warning output if cmdline parameter unknown - 11.10.2000 - fixed output of rate categories of sites before - computing them - - check whether rate categories were computed by - 1st user tree or NJ tree fixed in the output - 12.10.2000 - invariant site model normalization fixed - - -CODE FREEZE -=========== - -5.0.a33 15.08.2000 - changes for autoconf/automake - -5.0.a32 01.08.2000 - a FPE error fixed (badq == 0) - - small error in -bestq fixed - - fflush's added at several places - -5.0.a31 01.08.2000 - comments added to tree structure sorting puzzle2.c - - changes in configure.in, Makefile.in - -5.0.a30 23.07.2000 - some debugging in checkquart - - changed to autoconf - -5.0.a29 13.07.2000 - some debugging in checkquart - -5.0.a28 13.07.2000 - use best quartet topology option (-bestq) implemented - -5.0.a27 13.07.2000 - further developement to checkquart - - ascii/binary quartet values (-wqla/-wqlb) - - typo correction - -5.0.a26 11.07.2000 - fflush at all checktimer - - further developement at checkquart - - possibility to write quartet values to file (-wqlh) - -5.0.a25 06.07.2000 - fflush at checktimer - -5.0.a24 02.07.2000 - further debugging of checkquart - -5.0.a23 02.07.2000 - further developement to checkquart - -5.0.a22 29.06.2000 - checkquart added to makefile - - bad quartet stats added after reading in *.allquarts - -5.0.a21 27.06.2000 - site pattern statistics implemented and added to - SEQUENCE ALIGNMENT section in puzzle report - -5.0.a20 26.06.2000 - cpREV45 implemented - -5.0.a19 26.06.2000 - for debugging purposes: typo "MPE" changed to "FPE" - - fflush(stdout) added in chi2test - -5.0.a18 20.06.2000 - checkquart implemented - -5.0.a17 19.06.2000 - FPRINTF(STDOUTFILE and STDOUT definition changed - and moved; fputid/fputid10 writes to STDOUT instead - of stdout - - ppuzzle checks slaves enough slave-processes - - numquarts, num2quart, quart2num moved from ppuzzle.c - to puzzle1.c - - read/writeallquart implemented (undocumented feature) - to be used by -wqf/-rqf at comandline - -wqf = write quartet file (infilename.allquart) after - quartet evaluation - -rqf = read quartet file (infilename.allquart), no - quartet evaluation, unless -wqf is used as - well, then quartets are written and read in - - '-h' option at comandline -> printusage - -5.0.a16 31.05.2000 - chi2test bug fixed - - WAG matrix added, model choice adopted - 13.06.2000 - date set to June 2000 - - author order changed to Schmidt, Strimmer, Vingron, - v.Haeseler - - CPU time output stopped, due to overflow errors - 16.06.2000 - sequence composition chi2test moved before - parameter output. - - output of chi2test and bad quartet statistics split, - to do the chi2test output earlier. - -5.0.a15 02.05.2000 - Names changed back from TREE-PUZZLE to PUZZLE - 09.05.2000 - and to TREE-PUZZLE again ;-) - -5.0.a14 13.03.2000 - Changes to the manual. - - Executable names changed to (p)treepuzzle. - (changes in the makefiles) - 15.03.2000 - Output of parameters after estimation added. - -5.0.a13 18.02.2000 - ALPHA version number removed from the code - -5.0.a12 18.02.2000 - CPU time measurement problems fixed for case where - clock_t is an unsigned type. - -5.0.a11 17.02.2000 - time measure problems (CPU/wallclock) fixed - not all features in addtimes are used at the moment. - - unnecessary and unused routines removed fron source - code. - -5.0.a10 20.01.2000 - Name changes from PUZZLE to TREE-PUZZLE - - Chi2-fit model guessing for VT model added - - little model printing bug fixed - -5.0.a9 22.12.1999 - VT Model incorporated (Mueller, Vingron (2000) - JCB, to appear). - - TODO: Chi2-fit model guessing for VT model - -5.0.a8 21.12.1999 - 'sys/times.h' and 'sys/types.h' removed from - puzzle.h. They were neither ANSI conform nor - necessary, but occured in the SUN man pages. - - Definition and call of writetimesstat eliminated - from the sequention version by compiler switched, - and not just the function body as before. - - '-O4' canged to '-O' to be more generic. - -5.0.a7 21.12.1999 - Macro constants introduced for data_optn - (NUCLEOTIDE, AMINOACID, BINARY) - - round robbing of datatype and AA model option changed - in menu to make adjustment of the model possible by a - determined sequence of letters: - 'd': Auto -> Nucleotides - -> Amino acids - -> Binary states - -> Auto - ('m' && data_optn == AMINOACID): - Auto -> Dayhoff - -> JTT - -> mtREV24 - -> BLOSUM62 - -> Auto - - manual.html adjusted - -5.0.a6 20.12.1999 - new manual.html added - -5.0.a5 07.12.1999 - output bug fixed (bestrates were written before they - were computed) - -5.0.a4 02.12.1999 - header file inclusion ajusted: - added: #include - changed from: #include "ppuzzle.h" - to: #ifdef PARALLEL - # include "ppuzzle.h" - #endif - -5.0.a3 27.11.1999 - '-h' comandline option removed, because of problems - with MPICH under LINUX - - new memory leaks of 5.0.a2 closed in PP_Finalize - -5.0.a2 27.11.1999 - Cleanup of the source code - - Measurement of CPU time added - - Parallel load statistics added (quartets, trees, time) - to puzzle report. - - Cleanup debug messages - - Comments "[...]" are removed from usertrees now. - - single quotes will only be printed arount species - names if -DUSEQUOTES is set at compiletime. - - tree likelihood is printed infront of a tree as a - comment, [ lh=-xx.xxxxx ](...); - -5.0.a1 26.11.1999 - Cleanup of the directories - - Copyright changes - - Version changes - - -VERSION CHANGE -============== - -4.1.a26 25.11.1999 - Makefile made universal for pauzzle and ppuzzle - - lines not needed removed from puzzle.h - -4.1.a25 19.11.1999 - Output file prefixes for distances, trees, and - puzzlereport changed in user trees analysis case - to user tree file name - - Temporary output of likelihood to treefile added - -4.1.a24 11.11.1999 - Output of puzzling step trees changed - ptorder: [ orderno # % ID #UniqTopos #Steps ]PHYLIP - pstep: chunk #InChunk sum ID #UniqTopos #Steps - - preliminary leap frog RNG implemented, i.e. uses - the rand4 in the usual way in the sequential case. - If run in parallel all rand4 are initialized with - the same seed and started with PP_Myid-th random - number. after that each process uses the every - PP_NumProcs-th random number to make sure that these - unique. - -4.1.a23 08.11.1999 - output of sequential and parallel version to *.pstep - made identical - -4.1.a22 05.11.1999 - two different puzzle step tree outputs intruduced - and added to the menu ("[ 1. 35 ](...);": - - ordered unique tree list -> *.ptorder - Format: "[ 1. 35 ]" (Ordernumber, Amount) - - chronological tree list -> *.pstep - Format: "[ 1. 35 ]" (Chunknumber, Amount in chunk) - (the last is a problem in parallel, because the come - in chunks, as scheduled) - - debugged the output -4.1.a21 04.11.1999 - Makefile adjustments for other Plattforms - - pstep tree output changed. unique treestructures - printed to *.pstep file with a leading comment - containing an order number and the ammount padded - with blanks (e.g. "[ 1. 356 ]('mouse'..."). - output is done right before writing the puzzle file. - - controlled MPI finish to the Quit menu option added - -4.1.a20 03.11.1999 - some garbage collection (free) added - - makefile adjusted, OFLAGS for optimization added - (ppuzzle/MPICH has problems with -O, so the - ppuzzle is created without optimization) - Some minor changes in the makefiles - - still to do: garbage collection from 'internalnode' - in master process - -4.1.a19 13.10.1999 - adding the output of standardized (i.e. sorted) - puzzling step trees. Those are printed to the - standard output at the moment. (Routines to sort - and print the trees implemented) - 14.10.1999 - routines for printing the sorted trees to a string. - needed to send them between Master and Worker, and - to have a unique key to sort and count the trees. - 21.10.1999 - counting of sorted trees implemented by doubly linked - list, sort routine, print to stdout - 25.10.1999 - change place of writing distances to file right after - distances have been computed. - - output of puzzling step trees now with true name, - not numbers - 02.11.1999 - parallel counting and sending of puzzling step trees - - some parallel sending bugs fixed - -4.1.a18 14.09.1999 - adding possibility to specify input file at - command line, this specifies also the output - filenames (puzzle output: *.puzzle; treefile: - *.tree; distances: *.dist; Triangel EPS: *.eps; - unresolved: *.qlist; puzzling step trees: *.pstep) - If an unexisting name is given, one has to reenter - the right name, but the wrong one is used as prefix. - 15.09.1999 - sending back of bad quartets from slaves added - - bug in quart2num fixed (not used before; was shifted - by 1) - - first version of a README added ;-) - -4.1.a17 03.08.1999 - Recv-Error in receiving DoPuzzleBlock fixed - - double freeing of same MPI_Datatype fixed - - changing of scheduling algorithm to smaller chunks - in gss -> sgss - 13.09.1999 - bug fixed in optimization routine in ml2.c: - boundary check added - -4.1.a16 12.07.1999 - slight changes in verbosity levels - - changed all printf to FPRINTF(STDOUTFILE to - change easily from stdout to a file. - -4.1.a15 08.07.1999 - scheduler for both parallel parts - - several small changes - -4.1.a14 25.06.1999 - computation of tree parallel, scheduler dependent, - sending all biparts in one message instead of one - by one - - several small changes since a13 in sched.c, et al. - -4.1.a13 10.06.1999 - computation of tree parallel (chunk = #trees/#slaves) - - scheduling schemes implemented for minimum chunk sizes - -4.1.a12 07.06.1999 - computation of quartets properly parallel - - scheduling implemented - - counting of quartets by slave ajusted - - TODO: sending of bad quartets (array + list) - - distinction between '1st user tree' and 'NJ tree' - in result output removed again - -4.1.a11 28.05.1999 - PP_SendDoQuartBlock, PP_RecvDoQuartBlock, - PP_SendQuartBlock, PP_RecvQuartBlock - - mallocquartets() changed from global to local - variables to be more flexible - - Quartet computation moved to slave (badquartet - handling missing: output, badquartet vector); - - distinction between '1st user tree' and 'NJ tree' - added in result output (puzzle1.c around l.1756) - -4.1.a10 20.05.1999 - num2quart, numquarts, quart2num introduced - - parallel init/finalize, quartets computed on - master and slave, compared -> equal -> all necessary - parameter exported - -4.1.a9 19.05.1999 - 'dvector forg' removed from onepamratematrix - cmdline, because it's not used in the function. - -4.1.a8 18.05.1999 - add _GAMMA_ (not necessary) to gamma.h and _PUZZLE_ - to puzzle.h to avoid dublicate includes, possible - due to ppuzzle.h - - ppuzzle added to makefile and to check - - 1st parallel version but no slave computations - only sending parameters and done signals. - -4.1.a7 18.05.1999 - export reevaluation of tree and evaluation of - usertrees to evaluatetree. - -4.1.a6 17.05.1999 - -DNEWFORLOOP added to fixed.src, because the changed - for loop structure changes the sequence of randomized - quartets during likelihood mapping - - change 'int main()' to 'int main(argc, argv)' - - export more functionalities from main: - memcleanup(), inputandinit(&argc, &argv) - - grouping if's (excluding eachother) together in - switch() - - split treereavaluation and 1st usertree, - evaluate all usertrees together (TODO: both, - treereavaluation and usertrees in one loop) - - MAKE CHECK added to ./makefile - -4.1.a5 16.05.1999 - adding ´dvector Brnlength´ to lslength cmdline to - reduce globality of Brnlength. (Later better to *Tree) - -4.1.a4 11.05.1999 - structure of for loops changed in computeallquartets - and recon_tree, so that the quarted addresses are in - one contigous sequence (for a /dev/null \ - || cp -p $$d/$$file $(distdir)/$$file || :; \ - fi; \ - done - for subdir in $(SUBDIRS); do \ - if test "$$subdir" = .; then :; else \ - test -d $(distdir)/$$subdir \ - || mkdir $(distdir)/$$subdir \ - || exit 1; \ - chmod 777 $(distdir)/$$subdir; \ - (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir=../$(distdir) distdir=../$(distdir)/$$subdir distdir) \ - || exit 1; \ - fi; \ - done -info-am: -info: info-recursive -dvi-am: -dvi: dvi-recursive -check-am: all-am -check: check-recursive -installcheck-am: -installcheck: installcheck-recursive -install-exec-am: -install-exec: install-exec-recursive - -install-data-am: -install-data: install-data-recursive - -install-am: all-am - @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am -install: install-recursive -uninstall-am: -uninstall: uninstall-recursive -all-am: Makefile -all-redirect: all-recursive -install-strip: - $(MAKE) $(AM_MAKEFLAGS) AM_INSTALL_PROGRAM_FLAGS=-s install -installdirs: installdirs-recursive -installdirs-am: - - -mostlyclean-generic: - -clean-generic: - -distclean-generic: - -rm -f Makefile $(CONFIG_CLEAN_FILES) - -rm -f config.cache config.log stamp-h stamp-h[0-9]* - -maintainer-clean-generic: -mostlyclean-am: mostlyclean-tags mostlyclean-generic - -mostlyclean: mostlyclean-recursive - -clean-am: clean-tags clean-generic mostlyclean-am - -clean: clean-recursive - -distclean-am: distclean-tags distclean-generic clean-am - -distclean: distclean-recursive - -rm -f config.status - -maintainer-clean-am: maintainer-clean-tags maintainer-clean-generic \ - distclean-am - @echo "This command is intended for maintainers to use;" - @echo "it deletes files that may require special tools to rebuild." - -maintainer-clean: maintainer-clean-recursive - -rm -f config.status - -.PHONY: install-data-recursive uninstall-data-recursive \ -install-exec-recursive uninstall-exec-recursive installdirs-recursive \ -uninstalldirs-recursive all-recursive check-recursive \ -installcheck-recursive info-recursive dvi-recursive \ -mostlyclean-recursive distclean-recursive clean-recursive \ -maintainer-clean-recursive tags tags-recursive mostlyclean-tags \ -distclean-tags clean-tags maintainer-clean-tags distdir info-am info \ -dvi-am dvi check check-am installcheck-am installcheck install-exec-am \ -install-exec install-data-am install-data install-am install \ -uninstall-am uninstall all-redirect all-am all installdirs-am \ -installdirs mostlyclean-generic distclean-generic clean-generic \ -maintainer-clean-generic clean mostlyclean distclean maintainer-clean - - -# Tell versions [3.59,3.63) of GNU make to not export all variables. -# Otherwise a system limit (for SysV at least) may be exceeded. -.NOEXPORT: diff --git a/forester/archive/RIO/others/puzzle_mod/Makefile.am b/forester/archive/RIO/others/puzzle_mod/Makefile.am deleted file mode 100644 index 2a0bac6..0000000 --- a/forester/archive/RIO/others/puzzle_mod/Makefile.am +++ /dev/null @@ -1,2 +0,0 @@ -EXTRA_DIST = -SUBDIRS = src doc data diff --git a/forester/archive/RIO/others/puzzle_mod/Makefile.in b/forester/archive/RIO/others/puzzle_mod/Makefile.in deleted file mode 100644 index 06043c6..0000000 --- a/forester/archive/RIO/others/puzzle_mod/Makefile.in +++ /dev/null @@ -1,327 +0,0 @@ -# Makefile.in generated automatically by automake 1.4 from Makefile.am - -# Copyright (C) 1994, 1995-8, 1999 Free Software Foundation, Inc. -# This Makefile.in is free software; the Free Software Foundation -# gives unlimited permission to copy and/or distribute it, -# with or without modifications, as long as this notice is preserved. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY, to the extent permitted by law; without -# even the implied warranty of MERCHANTABILITY or FITNESS FOR A -# PARTICULAR PURPOSE. - - -SHELL = @SHELL@ - -srcdir = @srcdir@ -top_srcdir = @top_srcdir@ -VPATH = @srcdir@ -prefix = @prefix@ -exec_prefix = @exec_prefix@ - -bindir = @bindir@ -sbindir = @sbindir@ -libexecdir = @libexecdir@ -datadir = @datadir@ -sysconfdir = @sysconfdir@ -sharedstatedir = @sharedstatedir@ -localstatedir = @localstatedir@ -libdir = @libdir@ -infodir = @infodir@ -mandir = @mandir@ -includedir = @includedir@ -oldincludedir = /usr/include - -DESTDIR = - -pkgdatadir = $(datadir)/@PACKAGE@ -pkglibdir = $(libdir)/@PACKAGE@ -pkgincludedir = $(includedir)/@PACKAGE@ - -top_builddir = . - -ACLOCAL = @ACLOCAL@ -AUTOCONF = @AUTOCONF@ -AUTOMAKE = @AUTOMAKE@ -AUTOHEADER = @AUTOHEADER@ - -INSTALL = @INSTALL@ -INSTALL_PROGRAM = @INSTALL_PROGRAM@ $(AM_INSTALL_PROGRAM_FLAGS) -INSTALL_DATA = @INSTALL_DATA@ -INSTALL_SCRIPT = @INSTALL_SCRIPT@ -transform = @program_transform_name@ - -NORMAL_INSTALL = : -PRE_INSTALL = : -POST_INSTALL = : -NORMAL_UNINSTALL = : -PRE_UNINSTALL = : -POST_UNINSTALL = : -CC = @CC@ -MAKEINFO = @MAKEINFO@ -MPICC = @MPICC@ -MPICC0 = @MPICC0@ -MPICC1 = @MPICC1@ -MPICC2 = @MPICC2@ -MPICC3 = @MPICC3@ -MPICC4 = @MPICC4@ -MPICC5 = @MPICC5@ -MPICFLAGS = @MPICFLAGS@ -MPIDEFS = @MPIDEFS@ -MPILIBS = @MPILIBS@ -PACKAGE = @PACKAGE@ -PPUZZLE = @PPUZZLE@ -VERSION = @VERSION@ - -EXTRA_DIST = -SUBDIRS = src doc data -ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 -mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs -CONFIG_CLEAN_FILES = -DIST_COMMON = README AUTHORS COPYING ChangeLog INSTALL Makefile.am \ -Makefile.in NEWS aclocal.m4 configure configure.in install-sh missing \ -mkinstalldirs - - -DISTFILES = $(DIST_COMMON) $(SOURCES) $(HEADERS) $(TEXINFOS) $(EXTRA_DIST) - -TAR = gtar -GZIP_ENV = --best -all: all-redirect -.SUFFIXES: -$(srcdir)/Makefile.in: Makefile.am $(top_srcdir)/configure.in $(ACLOCAL_M4) - cd $(top_srcdir) && $(AUTOMAKE) --gnu --include-deps Makefile - -Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status - cd $(top_builddir) \ - && CONFIG_FILES=$@ CONFIG_HEADERS= $(SHELL) ./config.status - -$(ACLOCAL_M4): configure.in - cd $(srcdir) && $(ACLOCAL) - -config.status: $(srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) - $(SHELL) ./config.status --recheck -$(srcdir)/configure: $(srcdir)/configure.in $(ACLOCAL_M4) $(CONFIGURE_DEPENDENCIES) - cd $(srcdir) && $(AUTOCONF) - -# This directory's subdirectories are mostly independent; you can cd -# into them and run `make' without going through this Makefile. -# To change the values of `make' variables: instead of editing Makefiles, -# (1) if the variable is set in `config.status', edit `config.status' -# (which will cause the Makefiles to be regenerated when you run `make'); -# (2) otherwise, pass the desired values on the `make' command line. - -@SET_MAKE@ - -all-recursive install-data-recursive install-exec-recursive \ -installdirs-recursive install-recursive uninstall-recursive \ -check-recursive installcheck-recursive info-recursive dvi-recursive: - @set fnord $(MAKEFLAGS); amf=$$2; \ - dot_seen=no; \ - target=`echo $@ | sed s/-recursive//`; \ - list='$(SUBDIRS)'; for subdir in $$list; do \ - echo "Making $$target in $$subdir"; \ - if test "$$subdir" = "."; then \ - dot_seen=yes; \ - local_target="$$target-am"; \ - else \ - local_target="$$target"; \ - fi; \ - (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ - || case "$$amf" in *=*) exit 1;; *k*) fail=yes;; *) exit 1;; esac; \ - done; \ - if test "$$dot_seen" = "no"; then \ - $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ - fi; test -z "$$fail" - -mostlyclean-recursive clean-recursive distclean-recursive \ -maintainer-clean-recursive: - @set fnord $(MAKEFLAGS); amf=$$2; \ - dot_seen=no; \ - rev=''; list='$(SUBDIRS)'; for subdir in $$list; do \ - rev="$$subdir $$rev"; \ - test "$$subdir" = "." && dot_seen=yes; \ - done; \ - test "$$dot_seen" = "no" && rev=". $$rev"; \ - target=`echo $@ | sed s/-recursive//`; \ - for subdir in $$rev; do \ - echo "Making $$target in $$subdir"; \ - if test "$$subdir" = "."; then \ - local_target="$$target-am"; \ - else \ - local_target="$$target"; \ - fi; \ - (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ - || case "$$amf" in *=*) exit 1;; *k*) fail=yes;; *) exit 1;; esac; \ - done && test -z "$$fail" -tags-recursive: - list='$(SUBDIRS)'; for subdir in $$list; do \ - test "$$subdir" = . || (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \ - done - -tags: TAGS - -ID: $(HEADERS) $(SOURCES) $(LISP) - list='$(SOURCES) $(HEADERS)'; \ - unique=`for i in $$list; do echo $$i; done | \ - awk ' { files[$$0] = 1; } \ - END { for (i in files) print i; }'`; \ - here=`pwd` && cd $(srcdir) \ - && mkid -f$$here/ID $$unique $(LISP) - -TAGS: tags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) $(LISP) - tags=; \ - here=`pwd`; \ - list='$(SUBDIRS)'; for subdir in $$list; do \ - if test "$$subdir" = .; then :; else \ - test -f $$subdir/TAGS && tags="$$tags -i $$here/$$subdir/TAGS"; \ - fi; \ - done; \ - list='$(SOURCES) $(HEADERS)'; \ - unique=`for i in $$list; do echo $$i; done | \ - awk ' { files[$$0] = 1; } \ - END { for (i in files) print i; }'`; \ - test -z "$(ETAGS_ARGS)$$unique$(LISP)$$tags" \ - || (cd $(srcdir) && etags $(ETAGS_ARGS) $$tags $$unique $(LISP) -o $$here/TAGS) - -mostlyclean-tags: - -clean-tags: - -distclean-tags: - -rm -f TAGS ID - -maintainer-clean-tags: - -distdir = $(PACKAGE)-$(VERSION) -top_distdir = $(distdir) - -# This target untars the dist file and tries a VPATH configuration. Then -# it guarantees that the distribution is self-contained by making another -# tarfile. -distcheck: dist - -rm -rf $(distdir) - GZIP=$(GZIP_ENV) $(TAR) zxf $(distdir).tar.gz - mkdir $(distdir)/=build - mkdir $(distdir)/=inst - dc_install_base=`cd $(distdir)/=inst && pwd`; \ - cd $(distdir)/=build \ - && ../configure --srcdir=.. --prefix=$$dc_install_base \ - && $(MAKE) $(AM_MAKEFLAGS) \ - && $(MAKE) $(AM_MAKEFLAGS) dvi \ - && $(MAKE) $(AM_MAKEFLAGS) check \ - && $(MAKE) $(AM_MAKEFLAGS) install \ - && $(MAKE) $(AM_MAKEFLAGS) installcheck \ - && $(MAKE) $(AM_MAKEFLAGS) dist - -rm -rf $(distdir) - @banner="$(distdir).tar.gz is ready for distribution"; \ - dashes=`echo "$$banner" | sed s/./=/g`; \ - echo "$$dashes"; \ - echo "$$banner"; \ - echo "$$dashes" -dist: distdir - -chmod -R a+r $(distdir) - GZIP=$(GZIP_ENV) $(TAR) chozf $(distdir).tar.gz $(distdir) - -rm -rf $(distdir) -dist-all: distdir - -chmod -R a+r $(distdir) - GZIP=$(GZIP_ENV) $(TAR) chozf $(distdir).tar.gz $(distdir) - -rm -rf $(distdir) -distdir: $(DISTFILES) - -rm -rf $(distdir) - mkdir $(distdir) - -chmod 777 $(distdir) - @for file in $(DISTFILES); do \ - d=$(srcdir); \ - if test -d $$d/$$file; then \ - cp -pr $$d/$$file $(distdir)/$$file; \ - else \ - test -f $(distdir)/$$file \ - || ln $$d/$$file $(distdir)/$$file 2> /dev/null \ - || cp -p $$d/$$file $(distdir)/$$file || :; \ - fi; \ - done - for subdir in $(SUBDIRS); do \ - if test "$$subdir" = .; then :; else \ - test -d $(distdir)/$$subdir \ - || mkdir $(distdir)/$$subdir \ - || exit 1; \ - chmod 777 $(distdir)/$$subdir; \ - (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir=../$(distdir) distdir=../$(distdir)/$$subdir distdir) \ - || exit 1; \ - fi; \ - done -info-am: -info: info-recursive -dvi-am: -dvi: dvi-recursive -check-am: all-am -check: check-recursive -installcheck-am: -installcheck: installcheck-recursive -install-exec-am: -install-exec: install-exec-recursive - -install-data-am: -install-data: install-data-recursive - -install-am: all-am - @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am -install: install-recursive -uninstall-am: -uninstall: uninstall-recursive -all-am: Makefile -all-redirect: all-recursive -install-strip: - $(MAKE) $(AM_MAKEFLAGS) AM_INSTALL_PROGRAM_FLAGS=-s install -installdirs: installdirs-recursive -installdirs-am: - - -mostlyclean-generic: - -clean-generic: - -distclean-generic: - -rm -f Makefile $(CONFIG_CLEAN_FILES) - -rm -f config.cache config.log stamp-h stamp-h[0-9]* - -maintainer-clean-generic: -mostlyclean-am: mostlyclean-tags mostlyclean-generic - -mostlyclean: mostlyclean-recursive - -clean-am: clean-tags clean-generic mostlyclean-am - -clean: clean-recursive - -distclean-am: distclean-tags distclean-generic clean-am - -distclean: distclean-recursive - -rm -f config.status - -maintainer-clean-am: maintainer-clean-tags maintainer-clean-generic \ - distclean-am - @echo "This command is intended for maintainers to use;" - @echo "it deletes files that may require special tools to rebuild." - -maintainer-clean: maintainer-clean-recursive - -rm -f config.status - -.PHONY: install-data-recursive uninstall-data-recursive \ -install-exec-recursive uninstall-exec-recursive installdirs-recursive \ -uninstalldirs-recursive all-recursive check-recursive \ -installcheck-recursive info-recursive dvi-recursive \ -mostlyclean-recursive distclean-recursive clean-recursive \ -maintainer-clean-recursive tags tags-recursive mostlyclean-tags \ -distclean-tags clean-tags maintainer-clean-tags distdir info-am info \ -dvi-am dvi check check-am installcheck-am installcheck install-exec-am \ -install-exec install-data-am install-data install-am install \ -uninstall-am uninstall all-redirect all-am all installdirs-am \ -installdirs mostlyclean-generic distclean-generic clean-generic \ -maintainer-clean-generic clean mostlyclean distclean maintainer-clean - - -# Tell versions [3.59,3.63) of GNU make to not export all variables. -# Otherwise a system limit (for SysV at least) may be exceeded. -.NOEXPORT: diff --git a/forester/archive/RIO/others/puzzle_mod/aclocal.m4 b/forester/archive/RIO/others/puzzle_mod/aclocal.m4 deleted file mode 100644 index 9f8add8..0000000 --- a/forester/archive/RIO/others/puzzle_mod/aclocal.m4 +++ /dev/null @@ -1,104 +0,0 @@ -dnl aclocal.m4 generated automatically by aclocal 1.4 - -dnl Copyright (C) 1994, 1995-8, 1999 Free Software Foundation, Inc. -dnl This file is free software; the Free Software Foundation -dnl gives unlimited permission to copy and/or distribute it, -dnl with or without modifications, as long as this notice is preserved. - -dnl This program is distributed in the hope that it will be useful, -dnl but WITHOUT ANY WARRANTY, to the extent permitted by law; without -dnl even the implied warranty of MERCHANTABILITY or FITNESS FOR A -dnl PARTICULAR PURPOSE. - -# Do all the work for Automake. This macro actually does too much -- -# some checks are only needed if your package does certain things. -# But this isn't really a big deal. - -# serial 1 - -dnl Usage: -dnl AM_INIT_AUTOMAKE(package,version, [no-define]) - -AC_DEFUN(AM_INIT_AUTOMAKE, -[AC_REQUIRE([AC_PROG_INSTALL]) -PACKAGE=[$1] -AC_SUBST(PACKAGE) -VERSION=[$2] -AC_SUBST(VERSION) -dnl test to see if srcdir already configured -if test "`cd $srcdir && pwd`" != "`pwd`" && test -f $srcdir/config.status; then - AC_MSG_ERROR([source directory already configured; run "make distclean" there first]) -fi -ifelse([$3],, -AC_DEFINE_UNQUOTED(PACKAGE, "$PACKAGE", [Name of package]) -AC_DEFINE_UNQUOTED(VERSION, "$VERSION", [Version number of package])) -AC_REQUIRE([AM_SANITY_CHECK]) -AC_REQUIRE([AC_ARG_PROGRAM]) -dnl FIXME This is truly gross. -missing_dir=`cd $ac_aux_dir && pwd` -AM_MISSING_PROG(ACLOCAL, aclocal, $missing_dir) -AM_MISSING_PROG(AUTOCONF, autoconf, $missing_dir) -AM_MISSING_PROG(AUTOMAKE, automake, $missing_dir) -AM_MISSING_PROG(AUTOHEADER, autoheader, $missing_dir) -AM_MISSING_PROG(MAKEINFO, makeinfo, $missing_dir) -AC_REQUIRE([AC_PROG_MAKE_SET])]) - -# -# Check to make sure that the build environment is sane. -# - -AC_DEFUN(AM_SANITY_CHECK, -[AC_MSG_CHECKING([whether build environment is sane]) -# Just in case -sleep 1 -echo timestamp > conftestfile -# Do `set' in a subshell so we don't clobber the current shell's -# arguments. Must try -L first in case configure is actually a -# symlink; some systems play weird games with the mod time of symlinks -# (eg FreeBSD returns the mod time of the symlink's containing -# directory). -if ( - set X `ls -Lt $srcdir/configure conftestfile 2> /dev/null` - if test "[$]*" = "X"; then - # -L didn't work. - set X `ls -t $srcdir/configure conftestfile` - fi - if test "[$]*" != "X $srcdir/configure conftestfile" \ - && test "[$]*" != "X conftestfile $srcdir/configure"; then - - # If neither matched, then we have a broken ls. This can happen - # if, for instance, CONFIG_SHELL is bash and it inherits a - # broken ls alias from the environment. This has actually - # happened. Such a system could not be considered "sane". - AC_MSG_ERROR([ls -t appears to fail. Make sure there is not a broken -alias in your environment]) - fi - - test "[$]2" = conftestfile - ) -then - # Ok. - : -else - AC_MSG_ERROR([newly created file is older than distributed files! -Check your system clock]) -fi -rm -f conftest* -AC_MSG_RESULT(yes)]) - -dnl AM_MISSING_PROG(NAME, PROGRAM, DIRECTORY) -dnl The program must properly implement --version. -AC_DEFUN(AM_MISSING_PROG, -[AC_MSG_CHECKING(for working $2) -# Run test in a subshell; some versions of sh will print an error if -# an executable is not found, even if stderr is redirected. -# Redirect stdin to placate older versions of autoconf. Sigh. -if ($2 --version) < /dev/null > /dev/null 2>&1; then - $1=$2 - AC_MSG_RESULT(found) -else - $1="$3/missing $2" - AC_MSG_RESULT(missing) -fi -AC_SUBST($1)]) - diff --git a/forester/archive/RIO/others/puzzle_mod/config.status b/forester/archive/RIO/others/puzzle_mod/config.status deleted file mode 100755 index da58b56..0000000 --- a/forester/archive/RIO/others/puzzle_mod/config.status +++ /dev/null @@ -1,179 +0,0 @@ -#! /bin/sh -# Generated automatically by configure. -# Run this file to recreate the current configuration. -# This directory was configured as follows, -# on host forester.wustl.edu: -# -# ./configure -# -# Compiler output produced by configure, useful for debugging -# configure, is in ./config.log if it exists. - -ac_cs_usage="Usage: ./config.status [--recheck] [--version] [--help]" -for ac_option -do - case "$ac_option" in - -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r) - echo "running ${CONFIG_SHELL-/bin/sh} ./configure --no-create --no-recursion" - exec ${CONFIG_SHELL-/bin/sh} ./configure --no-create --no-recursion ;; - -version | --version | --versio | --versi | --vers | --ver | --ve | --v) - echo "./config.status generated by autoconf version 2.13" - exit 0 ;; - -help | --help | --hel | --he | --h) - echo "$ac_cs_usage"; exit 0 ;; - *) echo "$ac_cs_usage"; exit 1 ;; - esac -done - -ac_given_srcdir=. -ac_given_INSTALL="/usr/bin/install -c" - -trap 'rm -fr Makefile src/Makefile src/test doc/Makefile data/Makefile conftest*; exit 1' 1 2 15 - -# Protect against being on the right side of a sed subst in config.status. -sed 's/%@/@@/; s/@%/@@/; s/%g$/@g/; /@g$/s/[\\&%]/\\&/g; - s/@@/%@/; s/@@/@%/; s/@g$/%g/' > conftest.subs <<\CEOF -/^[ ]*VPATH[ ]*=[^:]*$/d - -s%@SHELL@%/bin/sh%g -s%@CFLAGS@%-g -O2%g -s%@CPPFLAGS@%%g -s%@CXXFLAGS@%%g -s%@FFLAGS@%%g -s%@DEFS@% -DPACKAGE=\"tree-puzzle\" -DVERSION=\"5.0\" -DHAVE_LIBM=1 -DSTDC_HEADERS=1 -DHAVE_LIMITS_H=1 %g -s%@LDFLAGS@%%g -s%@LIBS@%-lm %g -s%@exec_prefix@%${prefix}%g -s%@prefix@%/usr/local%g -s%@program_transform_name@%s,x,x,%g -s%@bindir@%${exec_prefix}/bin%g -s%@sbindir@%${exec_prefix}/sbin%g -s%@libexecdir@%${exec_prefix}/libexec%g -s%@datadir@%${prefix}/share%g -s%@sysconfdir@%${prefix}/etc%g -s%@sharedstatedir@%${prefix}/com%g -s%@localstatedir@%${prefix}/var%g -s%@libdir@%${exec_prefix}/lib%g -s%@includedir@%${prefix}/include%g -s%@oldincludedir@%/usr/include%g -s%@infodir@%${prefix}/info%g -s%@mandir@%${prefix}/man%g -s%@INSTALL_PROGRAM@%${INSTALL}%g -s%@INSTALL_SCRIPT@%${INSTALL_PROGRAM}%g -s%@INSTALL_DATA@%${INSTALL} -m 644%g -s%@PACKAGE@%tree-puzzle%g -s%@VERSION@%5.0%g -s%@ACLOCAL@%aclocal%g -s%@AUTOCONF@%autoconf%g -s%@AUTOMAKE@%automake%g -s%@AUTOHEADER@%autoheader%g -s%@MAKEINFO@%makeinfo%g -s%@SET_MAKE@%%g -s%@CC@%gcc%g -s%@MPICC0@%%g -s%@MPICC1@%%g -s%@MPICC2@%%g -s%@MPICC3@%%g -s%@MPICC4@%%g -s%@MPICC5@%%g -s%@MPICC@%%g -s%@MPILIBS@%%g -s%@MPIDEFS@%%g -s%@MPICFLAGS@%%g -s%@PPUZZLE@%%g -s%@CPP@%gcc -E%g - -CEOF - -# Split the substitutions into bite-sized pieces for seds with -# small command number limits, like on Digital OSF/1 and HP-UX. -ac_max_sed_cmds=90 # Maximum number of lines to put in a sed script. -ac_file=1 # Number of current file. -ac_beg=1 # First line for current file. -ac_end=$ac_max_sed_cmds # Line after last line for current file. -ac_more_lines=: -ac_sed_cmds="" -while $ac_more_lines; do - if test $ac_beg -gt 1; then - sed "1,${ac_beg}d; ${ac_end}q" conftest.subs > conftest.s$ac_file - else - sed "${ac_end}q" conftest.subs > conftest.s$ac_file - fi - if test ! -s conftest.s$ac_file; then - ac_more_lines=false - rm -f conftest.s$ac_file - else - if test -z "$ac_sed_cmds"; then - ac_sed_cmds="sed -f conftest.s$ac_file" - else - ac_sed_cmds="$ac_sed_cmds | sed -f conftest.s$ac_file" - fi - ac_file=`expr $ac_file + 1` - ac_beg=$ac_end - ac_end=`expr $ac_end + $ac_max_sed_cmds` - fi -done -if test -z "$ac_sed_cmds"; then - ac_sed_cmds=cat -fi - -CONFIG_FILES=${CONFIG_FILES-"Makefile src/Makefile src/test doc/Makefile data/Makefile"} -for ac_file in .. $CONFIG_FILES; do if test "x$ac_file" != x..; then - # Support "outfile[:infile[:infile...]]", defaulting infile="outfile.in". - case "$ac_file" in - *:*) ac_file_in=`echo "$ac_file"|sed 's%[^:]*:%%'` - ac_file=`echo "$ac_file"|sed 's%:.*%%'` ;; - *) ac_file_in="${ac_file}.in" ;; - esac - - # Adjust a relative srcdir, top_srcdir, and INSTALL for subdirectories. - - # Remove last slash and all that follows it. Not all systems have dirname. - ac_dir=`echo $ac_file|sed 's%/[^/][^/]*$%%'` - if test "$ac_dir" != "$ac_file" && test "$ac_dir" != .; then - # The file is in a subdirectory. - test ! -d "$ac_dir" && mkdir "$ac_dir" - ac_dir_suffix="/`echo $ac_dir|sed 's%^\./%%'`" - # A "../" for each directory in $ac_dir_suffix. - ac_dots=`echo $ac_dir_suffix|sed 's%/[^/]*%../%g'` - else - ac_dir_suffix= ac_dots= - fi - - case "$ac_given_srcdir" in - .) srcdir=. - if test -z "$ac_dots"; then top_srcdir=. - else top_srcdir=`echo $ac_dots|sed 's%/$%%'`; fi ;; - /*) srcdir="$ac_given_srcdir$ac_dir_suffix"; top_srcdir="$ac_given_srcdir" ;; - *) # Relative path. - srcdir="$ac_dots$ac_given_srcdir$ac_dir_suffix" - top_srcdir="$ac_dots$ac_given_srcdir" ;; - esac - - case "$ac_given_INSTALL" in - [/$]*) INSTALL="$ac_given_INSTALL" ;; - *) INSTALL="$ac_dots$ac_given_INSTALL" ;; - esac - - echo creating "$ac_file" - rm -f "$ac_file" - configure_input="Generated automatically from `echo $ac_file_in|sed 's%.*/%%'` by configure." - case "$ac_file" in - *Makefile*) ac_comsub="1i\\ -# $configure_input" ;; - *) ac_comsub= ;; - esac - - ac_file_inputs=`echo $ac_file_in|sed -e "s%^%$ac_given_srcdir/%" -e "s%:% $ac_given_srcdir/%g"` - sed -e "$ac_comsub -s%@configure_input@%$configure_input%g -s%@srcdir@%$srcdir%g -s%@top_srcdir@%$top_srcdir%g -s%@INSTALL@%$INSTALL%g -" $ac_file_inputs | (eval "$ac_sed_cmds") > $ac_file -fi; done -rm -f conftest.s* - - - -exit 0 diff --git a/forester/archive/RIO/others/puzzle_mod/configure b/forester/archive/RIO/others/puzzle_mod/configure deleted file mode 100755 index 5d4db41..0000000 --- a/forester/archive/RIO/others/puzzle_mod/configure +++ /dev/null @@ -1,2265 +0,0 @@ -#! /bin/sh - -# Guess values for system-dependent variables and create Makefiles. -# Generated automatically using autoconf version 2.13 -# Copyright (C) 1992, 93, 94, 95, 96 Free Software Foundation, Inc. -# -# This configure script is free software; the Free Software Foundation -# gives unlimited permission to copy, distribute and modify it. - -# Defaults: -ac_help= -ac_default_prefix=/usr/local -# Any additions from configure.in: - -# Initialize some variables set by options. -# The variables have the same names as the options, with -# dashes changed to underlines. -build=NONE -cache_file=./config.cache -exec_prefix=NONE -host=NONE -no_create= -nonopt=NONE -no_recursion= -prefix=NONE -program_prefix=NONE -program_suffix=NONE -program_transform_name=s,x,x, -silent= -site= -srcdir= -target=NONE -verbose= -x_includes=NONE -x_libraries=NONE -bindir='${exec_prefix}/bin' -sbindir='${exec_prefix}/sbin' -libexecdir='${exec_prefix}/libexec' -datadir='${prefix}/share' -sysconfdir='${prefix}/etc' -sharedstatedir='${prefix}/com' -localstatedir='${prefix}/var' -libdir='${exec_prefix}/lib' -includedir='${prefix}/include' -oldincludedir='/usr/include' -infodir='${prefix}/info' -mandir='${prefix}/man' - -# Initialize some other variables. -subdirs= -MFLAGS= MAKEFLAGS= -SHELL=${CONFIG_SHELL-/bin/sh} -# Maximum number of lines to put in a shell here document. -ac_max_here_lines=12 - -ac_prev= -for ac_option -do - - # If the previous option needs an argument, assign it. - if test -n "$ac_prev"; then - eval "$ac_prev=\$ac_option" - ac_prev= - continue - fi - - case "$ac_option" in - -*=*) ac_optarg=`echo "$ac_option" | sed 's/[-_a-zA-Z0-9]*=//'` ;; - *) ac_optarg= ;; - esac - - # Accept the important Cygnus configure options, so we can diagnose typos. - - case "$ac_option" in - - -bindir | --bindir | --bindi | --bind | --bin | --bi) - ac_prev=bindir ;; - -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*) - bindir="$ac_optarg" ;; - - -build | --build | --buil | --bui | --bu) - ac_prev=build ;; - -build=* | --build=* | --buil=* | --bui=* | --bu=*) - build="$ac_optarg" ;; - - -cache-file | --cache-file | --cache-fil | --cache-fi \ - | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c) - ac_prev=cache_file ;; - -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \ - | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*) - cache_file="$ac_optarg" ;; - - -datadir | --datadir | --datadi | --datad | --data | --dat | --da) - ac_prev=datadir ;; - -datadir=* | --datadir=* | --datadi=* | --datad=* | --data=* | --dat=* \ - | --da=*) - datadir="$ac_optarg" ;; - - -disable-* | --disable-*) - ac_feature=`echo $ac_option|sed -e 's/-*disable-//'` - # Reject names that are not valid shell variable names. - if test -n "`echo $ac_feature| sed 's/[-a-zA-Z0-9_]//g'`"; then - { echo "configure: error: $ac_feature: invalid feature name" 1>&2; exit 1; } - fi - ac_feature=`echo $ac_feature| sed 's/-/_/g'` - eval "enable_${ac_feature}=no" ;; - - -enable-* | --enable-*) - ac_feature=`echo $ac_option|sed -e 's/-*enable-//' -e 's/=.*//'` - # Reject names that are not valid shell variable names. - if test -n "`echo $ac_feature| sed 's/[-_a-zA-Z0-9]//g'`"; then - { echo "configure: error: $ac_feature: invalid feature name" 1>&2; exit 1; } - fi - ac_feature=`echo $ac_feature| sed 's/-/_/g'` - case "$ac_option" in - *=*) ;; - *) ac_optarg=yes ;; - esac - eval "enable_${ac_feature}='$ac_optarg'" ;; - - -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \ - | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \ - | --exec | --exe | --ex) - ac_prev=exec_prefix ;; - -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \ - | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \ - | --exec=* | --exe=* | --ex=*) - exec_prefix="$ac_optarg" ;; - - -gas | --gas | --ga | --g) - # Obsolete; use --with-gas. - with_gas=yes ;; - - -help | --help | --hel | --he) - # Omit some internal or obsolete options to make the list less imposing. - # This message is too long to be a string in the A/UX 3.1 sh. - cat << EOF -Usage: configure [options] [host] -Options: [defaults in brackets after descriptions] -Configuration: - --cache-file=FILE cache test results in FILE - --help print this message - --no-create do not create output files - --quiet, --silent do not print \`checking...' messages - --version print the version of autoconf that created configure -Directory and file names: - --prefix=PREFIX install architecture-independent files in PREFIX - [$ac_default_prefix] - --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX - [same as prefix] - --bindir=DIR user executables in DIR [EPREFIX/bin] - --sbindir=DIR system admin executables in DIR [EPREFIX/sbin] - --libexecdir=DIR program executables in DIR [EPREFIX/libexec] - --datadir=DIR read-only architecture-independent data in DIR - [PREFIX/share] - --sysconfdir=DIR read-only single-machine data in DIR [PREFIX/etc] - --sharedstatedir=DIR modifiable architecture-independent data in DIR - [PREFIX/com] - --localstatedir=DIR modifiable single-machine data in DIR [PREFIX/var] - --libdir=DIR object code libraries in DIR [EPREFIX/lib] - --includedir=DIR C header files in DIR [PREFIX/include] - --oldincludedir=DIR C header files for non-gcc in DIR [/usr/include] - --infodir=DIR info documentation in DIR [PREFIX/info] - --mandir=DIR man documentation in DIR [PREFIX/man] - --srcdir=DIR find the sources in DIR [configure dir or ..] - --program-prefix=PREFIX prepend PREFIX to installed program names - --program-suffix=SUFFIX append SUFFIX to installed program names - --program-transform-name=PROGRAM - run sed PROGRAM on installed program names -EOF - cat << EOF -Host type: - --build=BUILD configure for building on BUILD [BUILD=HOST] - --host=HOST configure for HOST [guessed] - --target=TARGET configure for TARGET [TARGET=HOST] -Features and packages: - --disable-FEATURE do not include FEATURE (same as --enable-FEATURE=no) - --enable-FEATURE[=ARG] include FEATURE [ARG=yes] - --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] - --without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no) - --x-includes=DIR X include files are in DIR - --x-libraries=DIR X library files are in DIR -EOF - if test -n "$ac_help"; then - echo "--enable and --with options recognized:$ac_help" - fi - exit 0 ;; - - -host | --host | --hos | --ho) - ac_prev=host ;; - -host=* | --host=* | --hos=* | --ho=*) - host="$ac_optarg" ;; - - -includedir | --includedir | --includedi | --included | --include \ - | --includ | --inclu | --incl | --inc) - ac_prev=includedir ;; - -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \ - | --includ=* | --inclu=* | --incl=* | --inc=*) - includedir="$ac_optarg" ;; - - -infodir | --infodir | --infodi | --infod | --info | --inf) - ac_prev=infodir ;; - -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*) - infodir="$ac_optarg" ;; - - -libdir | --libdir | --libdi | --libd) - ac_prev=libdir ;; - -libdir=* | --libdir=* | --libdi=* | --libd=*) - libdir="$ac_optarg" ;; - - -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \ - | --libexe | --libex | --libe) - ac_prev=libexecdir ;; - -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \ - | --libexe=* | --libex=* | --libe=*) - libexecdir="$ac_optarg" ;; - - -localstatedir | --localstatedir | --localstatedi | --localstated \ - | --localstate | --localstat | --localsta | --localst \ - | --locals | --local | --loca | --loc | --lo) - ac_prev=localstatedir ;; - -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \ - | --localstate=* | --localstat=* | --localsta=* | --localst=* \ - | --locals=* | --local=* | --loca=* | --loc=* | --lo=*) - localstatedir="$ac_optarg" ;; - - -mandir | --mandir | --mandi | --mand | --man | --ma | --m) - ac_prev=mandir ;; - -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*) - mandir="$ac_optarg" ;; - - -nfp | --nfp | --nf) - # Obsolete; use --without-fp. - with_fp=no ;; - - -no-create | --no-create | --no-creat | --no-crea | --no-cre \ - | --no-cr | --no-c) - no_create=yes ;; - - -no-recursion | --no-recursion | --no-recursio | --no-recursi \ - | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) - no_recursion=yes ;; - - -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \ - | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \ - | --oldin | --oldi | --old | --ol | --o) - ac_prev=oldincludedir ;; - -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \ - | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \ - | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*) - oldincludedir="$ac_optarg" ;; - - -prefix | --prefix | --prefi | --pref | --pre | --pr | --p) - ac_prev=prefix ;; - -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*) - prefix="$ac_optarg" ;; - - -program-prefix | --program-prefix | --program-prefi | --program-pref \ - | --program-pre | --program-pr | --program-p) - ac_prev=program_prefix ;; - -program-prefix=* | --program-prefix=* | --program-prefi=* \ - | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*) - program_prefix="$ac_optarg" ;; - - -program-suffix | --program-suffix | --program-suffi | --program-suff \ - | --program-suf | --program-su | --program-s) - ac_prev=program_suffix ;; - -program-suffix=* | --program-suffix=* | --program-suffi=* \ - | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*) - program_suffix="$ac_optarg" ;; - - -program-transform-name | --program-transform-name \ - | --program-transform-nam | --program-transform-na \ - | --program-transform-n | --program-transform- \ - | --program-transform | --program-transfor \ - | --program-transfo | --program-transf \ - | --program-trans | --program-tran \ - | --progr-tra | --program-tr | --program-t) - ac_prev=program_transform_name ;; - -program-transform-name=* | --program-transform-name=* \ - | --program-transform-nam=* | --program-transform-na=* \ - | --program-transform-n=* | --program-transform-=* \ - | --program-transform=* | --program-transfor=* \ - | --program-transfo=* | --program-transf=* \ - | --program-trans=* | --program-tran=* \ - | --progr-tra=* | --program-tr=* | --program-t=*) - program_transform_name="$ac_optarg" ;; - - -q | -quiet | --quiet | --quie | --qui | --qu | --q \ - | -silent | --silent | --silen | --sile | --sil) - silent=yes ;; - - -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb) - ac_prev=sbindir ;; - -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \ - | --sbi=* | --sb=*) - sbindir="$ac_optarg" ;; - - -sharedstatedir | --sharedstatedir | --sharedstatedi \ - | --sharedstated | --sharedstate | --sharedstat | --sharedsta \ - | --sharedst | --shareds | --shared | --share | --shar \ - | --sha | --sh) - ac_prev=sharedstatedir ;; - -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \ - | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \ - | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \ - | --sha=* | --sh=*) - sharedstatedir="$ac_optarg" ;; - - -site | --site | --sit) - ac_prev=site ;; - -site=* | --site=* | --sit=*) - site="$ac_optarg" ;; - - -srcdir | --srcdir | --srcdi | --srcd | --src | --sr) - ac_prev=srcdir ;; - -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*) - srcdir="$ac_optarg" ;; - - -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \ - | --syscon | --sysco | --sysc | --sys | --sy) - ac_prev=sysconfdir ;; - -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \ - | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*) - sysconfdir="$ac_optarg" ;; - - -target | --target | --targe | --targ | --tar | --ta | --t) - ac_prev=target ;; - -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*) - target="$ac_optarg" ;; - - -v | -verbose | --verbose | --verbos | --verbo | --verb) - verbose=yes ;; - - -version | --version | --versio | --versi | --vers) - echo "configure generated by autoconf version 2.13" - exit 0 ;; - - -with-* | --with-*) - ac_package=`echo $ac_option|sed -e 's/-*with-//' -e 's/=.*//'` - # Reject names that are not valid shell variable names. - if test -n "`echo $ac_package| sed 's/[-_a-zA-Z0-9]//g'`"; then - { echo "configure: error: $ac_package: invalid package name" 1>&2; exit 1; } - fi - ac_package=`echo $ac_package| sed 's/-/_/g'` - case "$ac_option" in - *=*) ;; - *) ac_optarg=yes ;; - esac - eval "with_${ac_package}='$ac_optarg'" ;; - - -without-* | --without-*) - ac_package=`echo $ac_option|sed -e 's/-*without-//'` - # Reject names that are not valid shell variable names. - if test -n "`echo $ac_package| sed 's/[-a-zA-Z0-9_]//g'`"; then - { echo "configure: error: $ac_package: invalid package name" 1>&2; exit 1; } - fi - ac_package=`echo $ac_package| sed 's/-/_/g'` - eval "with_${ac_package}=no" ;; - - --x) - # Obsolete; use --with-x. - with_x=yes ;; - - -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \ - | --x-incl | --x-inc | --x-in | --x-i) - ac_prev=x_includes ;; - -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \ - | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*) - x_includes="$ac_optarg" ;; - - -x-libraries | --x-libraries | --x-librarie | --x-librari \ - | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l) - ac_prev=x_libraries ;; - -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \ - | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*) - x_libraries="$ac_optarg" ;; - - -*) { echo "configure: error: $ac_option: invalid option; use --help to show usage" 1>&2; exit 1; } - ;; - - *) - if test -n "`echo $ac_option| sed 's/[-a-z0-9.]//g'`"; then - echo "configure: warning: $ac_option: invalid host type" 1>&2 - fi - if test "x$nonopt" != xNONE; then - { echo "configure: error: can only configure for one host and one target at a time" 1>&2; exit 1; } - fi - nonopt="$ac_option" - ;; - - esac -done - -if test -n "$ac_prev"; then - { echo "configure: error: missing argument to --`echo $ac_prev | sed 's/_/-/g'`" 1>&2; exit 1; } -fi - -trap 'rm -fr conftest* confdefs* core core.* *.core $ac_clean_files; exit 1' 1 2 15 - -# File descriptor usage: -# 0 standard input -# 1 file creation -# 2 errors and warnings -# 3 some systems may open it to /dev/tty -# 4 used on the Kubota Titan -# 6 checking for... messages and results -# 5 compiler messages saved in config.log -if test "$silent" = yes; then - exec 6>/dev/null -else - exec 6>&1 -fi -exec 5>./config.log - -echo "\ -This file contains any messages produced by compilers while -running configure, to aid debugging if configure makes a mistake. -" 1>&5 - -# Strip out --no-create and --no-recursion so they do not pile up. -# Also quote any args containing shell metacharacters. -ac_configure_args= -for ac_arg -do - case "$ac_arg" in - -no-create | --no-create | --no-creat | --no-crea | --no-cre \ - | --no-cr | --no-c) ;; - -no-recursion | --no-recursion | --no-recursio | --no-recursi \ - | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) ;; - *" "*|*" "*|*[\[\]\~\#\$\^\&\*\(\)\{\}\\\|\;\<\>\?]*) - ac_configure_args="$ac_configure_args '$ac_arg'" ;; - *) ac_configure_args="$ac_configure_args $ac_arg" ;; - esac -done - -# NLS nuisances. -# Only set these to C if already set. These must not be set unconditionally -# because not all systems understand e.g. LANG=C (notably SCO). -# Fixing LC_MESSAGES prevents Solaris sh from translating var values in `set'! -# Non-C LC_CTYPE values break the ctype check. -if test "${LANG+set}" = set; then LANG=C; export LANG; fi -if test "${LC_ALL+set}" = set; then LC_ALL=C; export LC_ALL; fi -if test "${LC_MESSAGES+set}" = set; then LC_MESSAGES=C; export LC_MESSAGES; fi -if test "${LC_CTYPE+set}" = set; then LC_CTYPE=C; export LC_CTYPE; fi - -# confdefs.h avoids OS command line length limits that DEFS can exceed. -rm -rf conftest* confdefs.h -# AIX cpp loses on an empty file, so make sure it contains at least a newline. -echo > confdefs.h - -# A filename unique to this package, relative to the directory that -# configure is in, which we can look for to find out if srcdir is correct. -ac_unique_file=src/ml.h - -# Find the source files, if location was not specified. -if test -z "$srcdir"; then - ac_srcdir_defaulted=yes - # Try the directory containing this script, then its parent. - ac_prog=$0 - ac_confdir=`echo $ac_prog|sed 's%/[^/][^/]*$%%'` - test "x$ac_confdir" = "x$ac_prog" && ac_confdir=. - srcdir=$ac_confdir - if test ! -r $srcdir/$ac_unique_file; then - srcdir=.. - fi -else - ac_srcdir_defaulted=no -fi -if test ! -r $srcdir/$ac_unique_file; then - if test "$ac_srcdir_defaulted" = yes; then - { echo "configure: error: can not find sources in $ac_confdir or .." 1>&2; exit 1; } - else - { echo "configure: error: can not find sources in $srcdir" 1>&2; exit 1; } - fi -fi -srcdir=`echo "${srcdir}" | sed 's%\([^/]\)/*$%\1%'` - -# Prefer explicitly selected file to automatically selected ones. -if test -z "$CONFIG_SITE"; then - if test "x$prefix" != xNONE; then - CONFIG_SITE="$prefix/share/config.site $prefix/etc/config.site" - else - CONFIG_SITE="$ac_default_prefix/share/config.site $ac_default_prefix/etc/config.site" - fi -fi -for ac_site_file in $CONFIG_SITE; do - if test -r "$ac_site_file"; then - echo "loading site script $ac_site_file" - . "$ac_site_file" - fi -done - - -ac_ext=c -# CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options. -ac_cpp='$CPP $CPPFLAGS' -ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5' -ac_link='${CC-cc} -o conftest${ac_exeext} $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5' -cross_compiling=$ac_cv_prog_cc_cross - -ac_exeext= -ac_objext=o -if (echo "testing\c"; echo 1,2,3) | grep c >/dev/null; then - # Stardent Vistra SVR4 grep lacks -e, says ghazi@caip.rutgers.edu. - if (echo -n testing; echo 1,2,3) | sed s/-n/xn/ | grep xn >/dev/null; then - ac_n= ac_c=' -' ac_t=' ' - else - ac_n=-n ac_c= ac_t= - fi -else - ac_n= ac_c='\c' ac_t= -fi - - - -ac_aux_dir= -for ac_dir in $srcdir $srcdir/.. $srcdir/../..; do - if test -f $ac_dir/install-sh; then - ac_aux_dir=$ac_dir - ac_install_sh="$ac_aux_dir/install-sh -c" - break - elif test -f $ac_dir/install.sh; then - ac_aux_dir=$ac_dir - ac_install_sh="$ac_aux_dir/install.sh -c" - break - fi -done -if test -z "$ac_aux_dir"; then - { echo "configure: error: can not find install-sh or install.sh in $srcdir $srcdir/.. $srcdir/../.." 1>&2; exit 1; } -fi -ac_config_guess=$ac_aux_dir/config.guess -ac_config_sub=$ac_aux_dir/config.sub -ac_configure=$ac_aux_dir/configure # This should be Cygnus configure. - -# Find a good install program. We prefer a C program (faster), -# so one script is as good as another. But avoid the broken or -# incompatible versions: -# SysV /etc/install, /usr/sbin/install -# SunOS /usr/etc/install -# IRIX /sbin/install -# AIX /bin/install -# AIX 4 /usr/bin/installbsd, which doesn't work without a -g flag -# AFS /usr/afsws/bin/install, which mishandles nonexistent args -# SVR4 /usr/ucb/install, which tries to use the nonexistent group "staff" -# ./install, which can be erroneously created by make from ./install.sh. -echo $ac_n "checking for a BSD compatible install""... $ac_c" 1>&6 -echo "configure:550: checking for a BSD compatible install" >&5 -if test -z "$INSTALL"; then -if eval "test \"`echo '$''{'ac_cv_path_install'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - IFS="${IFS= }"; ac_save_IFS="$IFS"; IFS=":" - for ac_dir in $PATH; do - # Account for people who put trailing slashes in PATH elements. - case "$ac_dir/" in - /|./|.//|/etc/*|/usr/sbin/*|/usr/etc/*|/sbin/*|/usr/afsws/bin/*|/usr/ucb/*) ;; - *) - # OSF1 and SCO ODT 3.0 have their own names for install. - # Don't use installbsd from OSF since it installs stuff as root - # by default. - for ac_prog in ginstall scoinst install; do - if test -f $ac_dir/$ac_prog; then - if test $ac_prog = install && - grep dspmsg $ac_dir/$ac_prog >/dev/null 2>&1; then - # AIX install. It has an incompatible calling convention. - : - else - ac_cv_path_install="$ac_dir/$ac_prog -c" - break 2 - fi - fi - done - ;; - esac - done - IFS="$ac_save_IFS" - -fi - if test "${ac_cv_path_install+set}" = set; then - INSTALL="$ac_cv_path_install" - else - # As a last resort, use the slow shell script. We don't cache a - # path for INSTALL within a source directory, because that will - # break other packages using the cache if that directory is - # removed, or if the path is relative. - INSTALL="$ac_install_sh" - fi -fi -echo "$ac_t""$INSTALL" 1>&6 - -# Use test -z because SunOS4 sh mishandles braces in ${var-val}. -# It thinks the first close brace ends the variable substitution. -test -z "$INSTALL_PROGRAM" && INSTALL_PROGRAM='${INSTALL}' - -test -z "$INSTALL_SCRIPT" && INSTALL_SCRIPT='${INSTALL_PROGRAM}' - -test -z "$INSTALL_DATA" && INSTALL_DATA='${INSTALL} -m 644' - -echo $ac_n "checking whether build environment is sane""... $ac_c" 1>&6 -echo "configure:603: checking whether build environment is sane" >&5 -# Just in case -sleep 1 -echo timestamp > conftestfile -# Do `set' in a subshell so we don't clobber the current shell's -# arguments. Must try -L first in case configure is actually a -# symlink; some systems play weird games with the mod time of symlinks -# (eg FreeBSD returns the mod time of the symlink's containing -# directory). -if ( - set X `ls -Lt $srcdir/configure conftestfile 2> /dev/null` - if test "$*" = "X"; then - # -L didn't work. - set X `ls -t $srcdir/configure conftestfile` - fi - if test "$*" != "X $srcdir/configure conftestfile" \ - && test "$*" != "X conftestfile $srcdir/configure"; then - - # If neither matched, then we have a broken ls. This can happen - # if, for instance, CONFIG_SHELL is bash and it inherits a - # broken ls alias from the environment. This has actually - # happened. Such a system could not be considered "sane". - { echo "configure: error: ls -t appears to fail. Make sure there is not a broken -alias in your environment" 1>&2; exit 1; } - fi - - test "$2" = conftestfile - ) -then - # Ok. - : -else - { echo "configure: error: newly created file is older than distributed files! -Check your system clock" 1>&2; exit 1; } -fi -rm -f conftest* -echo "$ac_t""yes" 1>&6 -if test "$program_transform_name" = s,x,x,; then - program_transform_name= -else - # Double any \ or $. echo might interpret backslashes. - cat <<\EOF_SED > conftestsed -s,\\,\\\\,g; s,\$,$$,g -EOF_SED - program_transform_name="`echo $program_transform_name|sed -f conftestsed`" - rm -f conftestsed -fi -test "$program_prefix" != NONE && - program_transform_name="s,^,${program_prefix},; $program_transform_name" -# Use a double $ so make ignores it. -test "$program_suffix" != NONE && - program_transform_name="s,\$\$,${program_suffix},; $program_transform_name" - -# sed with no file args requires a program. -test "$program_transform_name" = "" && program_transform_name="s,x,x," - -echo $ac_n "checking whether ${MAKE-make} sets \${MAKE}""... $ac_c" 1>&6 -echo "configure:660: checking whether ${MAKE-make} sets \${MAKE}" >&5 -set dummy ${MAKE-make}; ac_make=`echo "$2" | sed 'y%./+-%__p_%'` -if eval "test \"`echo '$''{'ac_cv_prog_make_${ac_make}_set'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftestmake <<\EOF -all: - @echo 'ac_maketemp="${MAKE}"' -EOF -# GNU make sometimes prints "make[1]: Entering...", which would confuse us. -eval `${MAKE-make} -f conftestmake 2>/dev/null | grep temp=` -if test -n "$ac_maketemp"; then - eval ac_cv_prog_make_${ac_make}_set=yes -else - eval ac_cv_prog_make_${ac_make}_set=no -fi -rm -f conftestmake -fi -if eval "test \"`echo '$ac_cv_prog_make_'${ac_make}_set`\" = yes"; then - echo "$ac_t""yes" 1>&6 - SET_MAKE= -else - echo "$ac_t""no" 1>&6 - SET_MAKE="MAKE=${MAKE-make}" -fi - - -PACKAGE=tree-puzzle - -VERSION=5.0 - -if test "`cd $srcdir && pwd`" != "`pwd`" && test -f $srcdir/config.status; then - { echo "configure: error: source directory already configured; run "make distclean" there first" 1>&2; exit 1; } -fi -cat >> confdefs.h <> confdefs.h <&6 -echo "configure:706: checking for working aclocal" >&5 -# Run test in a subshell; some versions of sh will print an error if -# an executable is not found, even if stderr is redirected. -# Redirect stdin to placate older versions of autoconf. Sigh. -if (aclocal --version) < /dev/null > /dev/null 2>&1; then - ACLOCAL=aclocal - echo "$ac_t""found" 1>&6 -else - ACLOCAL="$missing_dir/missing aclocal" - echo "$ac_t""missing" 1>&6 -fi - -echo $ac_n "checking for working autoconf""... $ac_c" 1>&6 -echo "configure:719: checking for working autoconf" >&5 -# Run test in a subshell; some versions of sh will print an error if -# an executable is not found, even if stderr is redirected. -# Redirect stdin to placate older versions of autoconf. Sigh. -if (autoconf --version) < /dev/null > /dev/null 2>&1; then - AUTOCONF=autoconf - echo "$ac_t""found" 1>&6 -else - AUTOCONF="$missing_dir/missing autoconf" - echo "$ac_t""missing" 1>&6 -fi - -echo $ac_n "checking for working automake""... $ac_c" 1>&6 -echo "configure:732: checking for working automake" >&5 -# Run test in a subshell; some versions of sh will print an error if -# an executable is not found, even if stderr is redirected. -# Redirect stdin to placate older versions of autoconf. Sigh. -if (automake --version) < /dev/null > /dev/null 2>&1; then - AUTOMAKE=automake - echo "$ac_t""found" 1>&6 -else - AUTOMAKE="$missing_dir/missing automake" - echo "$ac_t""missing" 1>&6 -fi - -echo $ac_n "checking for working autoheader""... $ac_c" 1>&6 -echo "configure:745: checking for working autoheader" >&5 -# Run test in a subshell; some versions of sh will print an error if -# an executable is not found, even if stderr is redirected. -# Redirect stdin to placate older versions of autoconf. Sigh. -if (autoheader --version) < /dev/null > /dev/null 2>&1; then - AUTOHEADER=autoheader - echo "$ac_t""found" 1>&6 -else - AUTOHEADER="$missing_dir/missing autoheader" - echo "$ac_t""missing" 1>&6 -fi - -echo $ac_n "checking for working makeinfo""... $ac_c" 1>&6 -echo "configure:758: checking for working makeinfo" >&5 -# Run test in a subshell; some versions of sh will print an error if -# an executable is not found, even if stderr is redirected. -# Redirect stdin to placate older versions of autoconf. Sigh. -if (makeinfo --version) < /dev/null > /dev/null 2>&1; then - MAKEINFO=makeinfo - echo "$ac_t""found" 1>&6 -else - MAKEINFO="$missing_dir/missing makeinfo" - echo "$ac_t""missing" 1>&6 -fi - - - -# Extract the first word of "gcc", so it can be a program name with args. -set dummy gcc; ac_word=$2 -echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:775: checking for $ac_word" >&5 -if eval "test \"`echo '$''{'ac_cv_prog_CC'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - if test -n "$CC"; then - ac_cv_prog_CC="$CC" # Let the user override the test. -else - IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" - ac_dummy="$PATH" - for ac_dir in $ac_dummy; do - test -z "$ac_dir" && ac_dir=. - if test -f $ac_dir/$ac_word; then - ac_cv_prog_CC="gcc" - break - fi - done - IFS="$ac_save_ifs" -fi -fi -CC="$ac_cv_prog_CC" -if test -n "$CC"; then - echo "$ac_t""$CC" 1>&6 -else - echo "$ac_t""no" 1>&6 -fi - -if test -z "$CC"; then - # Extract the first word of "cc", so it can be a program name with args. -set dummy cc; ac_word=$2 -echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:805: checking for $ac_word" >&5 -if eval "test \"`echo '$''{'ac_cv_prog_CC'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - if test -n "$CC"; then - ac_cv_prog_CC="$CC" # Let the user override the test. -else - IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" - ac_prog_rejected=no - ac_dummy="$PATH" - for ac_dir in $ac_dummy; do - test -z "$ac_dir" && ac_dir=. - if test -f $ac_dir/$ac_word; then - if test "$ac_dir/$ac_word" = "/usr/ucb/cc"; then - ac_prog_rejected=yes - continue - fi - ac_cv_prog_CC="cc" - break - fi - done - IFS="$ac_save_ifs" -if test $ac_prog_rejected = yes; then - # We found a bogon in the path, so make sure we never use it. - set dummy $ac_cv_prog_CC - shift - if test $# -gt 0; then - # We chose a different compiler from the bogus one. - # However, it has the same basename, so the bogon will be chosen - # first if we set CC to just the basename; use the full file name. - shift - set dummy "$ac_dir/$ac_word" "$@" - shift - ac_cv_prog_CC="$@" - fi -fi -fi -fi -CC="$ac_cv_prog_CC" -if test -n "$CC"; then - echo "$ac_t""$CC" 1>&6 -else - echo "$ac_t""no" 1>&6 -fi - - if test -z "$CC"; then - case "`uname -s`" in - *win32* | *WIN32*) - # Extract the first word of "cl", so it can be a program name with args. -set dummy cl; ac_word=$2 -echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:856: checking for $ac_word" >&5 -if eval "test \"`echo '$''{'ac_cv_prog_CC'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - if test -n "$CC"; then - ac_cv_prog_CC="$CC" # Let the user override the test. -else - IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" - ac_dummy="$PATH" - for ac_dir in $ac_dummy; do - test -z "$ac_dir" && ac_dir=. - if test -f $ac_dir/$ac_word; then - ac_cv_prog_CC="cl" - break - fi - done - IFS="$ac_save_ifs" -fi -fi -CC="$ac_cv_prog_CC" -if test -n "$CC"; then - echo "$ac_t""$CC" 1>&6 -else - echo "$ac_t""no" 1>&6 -fi - ;; - esac - fi - test -z "$CC" && { echo "configure: error: no acceptable cc found in \$PATH" 1>&2; exit 1; } -fi - -echo $ac_n "checking whether the C compiler ($CC $CFLAGS $LDFLAGS) works""... $ac_c" 1>&6 -echo "configure:888: checking whether the C compiler ($CC $CFLAGS $LDFLAGS) works" >&5 - -ac_ext=c -# CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options. -ac_cpp='$CPP $CPPFLAGS' -ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5' -ac_link='${CC-cc} -o conftest${ac_exeext} $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5' -cross_compiling=$ac_cv_prog_cc_cross - -cat > conftest.$ac_ext << EOF - -#line 899 "configure" -#include "confdefs.h" - -main(){return(0);} -EOF -if { (eval echo configure:904: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then - ac_cv_prog_cc_works=yes - # If we can't run a trivial program, we are probably using a cross compiler. - if (./conftest; exit) 2>/dev/null; then - ac_cv_prog_cc_cross=no - else - ac_cv_prog_cc_cross=yes - fi -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - ac_cv_prog_cc_works=no -fi -rm -fr conftest* -ac_ext=c -# CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options. -ac_cpp='$CPP $CPPFLAGS' -ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5' -ac_link='${CC-cc} -o conftest${ac_exeext} $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5' -cross_compiling=$ac_cv_prog_cc_cross - -echo "$ac_t""$ac_cv_prog_cc_works" 1>&6 -if test $ac_cv_prog_cc_works = no; then - { echo "configure: error: installation or configuration problem: C compiler cannot create executables." 1>&2; exit 1; } -fi -echo $ac_n "checking whether the C compiler ($CC $CFLAGS $LDFLAGS) is a cross-compiler""... $ac_c" 1>&6 -echo "configure:930: checking whether the C compiler ($CC $CFLAGS $LDFLAGS) is a cross-compiler" >&5 -echo "$ac_t""$ac_cv_prog_cc_cross" 1>&6 -cross_compiling=$ac_cv_prog_cc_cross - -echo $ac_n "checking whether we are using GNU C""... $ac_c" 1>&6 -echo "configure:935: checking whether we are using GNU C" >&5 -if eval "test \"`echo '$''{'ac_cv_prog_gcc'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.c <&5; (eval $ac_try) 2>&5; }; } | egrep yes >/dev/null 2>&1; then - ac_cv_prog_gcc=yes -else - ac_cv_prog_gcc=no -fi -fi - -echo "$ac_t""$ac_cv_prog_gcc" 1>&6 - -if test $ac_cv_prog_gcc = yes; then - GCC=yes -else - GCC= -fi - -ac_test_CFLAGS="${CFLAGS+set}" -ac_save_CFLAGS="$CFLAGS" -CFLAGS= -echo $ac_n "checking whether ${CC-cc} accepts -g""... $ac_c" 1>&6 -echo "configure:963: checking whether ${CC-cc} accepts -g" >&5 -if eval "test \"`echo '$''{'ac_cv_prog_cc_g'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - echo 'void f(){}' > conftest.c -if test -z "`${CC-cc} -g -c conftest.c 2>&1`"; then - ac_cv_prog_cc_g=yes -else - ac_cv_prog_cc_g=no -fi -rm -f conftest* - -fi - -echo "$ac_t""$ac_cv_prog_cc_g" 1>&6 -if test "$ac_test_CFLAGS" = set; then - CFLAGS="$ac_save_CFLAGS" -elif test $ac_cv_prog_cc_g = yes; then - if test "$GCC" = yes; then - CFLAGS="-g -O2" - else - CFLAGS="-g" - fi -else - if test "$GCC" = yes; then - CFLAGS="-O2" - else - CFLAGS= - fi -fi - -if test "x$CC" != xcc; then - echo $ac_n "checking whether $CC and cc understand -c and -o together""... $ac_c" 1>&6 -echo "configure:996: checking whether $CC and cc understand -c and -o together" >&5 -else - echo $ac_n "checking whether cc understands -c and -o together""... $ac_c" 1>&6 -echo "configure:999: checking whether cc understands -c and -o together" >&5 -fi -set dummy $CC; ac_cc="`echo $2 | - sed -e 's/[^a-zA-Z0-9_]/_/g' -e 's/^[0-9]/_/'`" -if eval "test \"`echo '$''{'ac_cv_prog_cc_${ac_cc}_c_o'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - echo 'foo(){}' > conftest.c -# Make sure it works both with $CC and with simple cc. -# We do the test twice because some compilers refuse to overwrite an -# existing .o file with -o, though they will create one. -ac_try='${CC-cc} -c conftest.c -o conftest.o 1>&5' -if { (eval echo configure:1011: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } && - test -f conftest.o && { (eval echo configure:1012: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }; -then - eval ac_cv_prog_cc_${ac_cc}_c_o=yes - if test "x$CC" != xcc; then - # Test first that cc exists at all. - if { ac_try='cc -c conftest.c 1>&5'; { (eval echo configure:1017: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }; }; then - ac_try='cc -c conftest.c -o conftest.o 1>&5' - if { (eval echo configure:1019: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } && - test -f conftest.o && { (eval echo configure:1020: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }; - then - # cc works too. - : - else - # cc exists but doesn't like -o. - eval ac_cv_prog_cc_${ac_cc}_c_o=no - fi - fi - fi -else - eval ac_cv_prog_cc_${ac_cc}_c_o=no -fi -rm -f conftest* - -fi -if eval "test \"`echo '$ac_cv_prog_cc_'${ac_cc}_c_o`\" = yes"; then - echo "$ac_t""yes" 1>&6 -else - echo "$ac_t""no" 1>&6 - cat >> confdefs.h <<\EOF -#define NO_MINUS_C_MINUS_O 1 -EOF - -fi - -# Find a good install program. We prefer a C program (faster), -# so one script is as good as another. But avoid the broken or -# incompatible versions: -# SysV /etc/install, /usr/sbin/install -# SunOS /usr/etc/install -# IRIX /sbin/install -# AIX /bin/install -# AIX 4 /usr/bin/installbsd, which doesn't work without a -g flag -# AFS /usr/afsws/bin/install, which mishandles nonexistent args -# SVR4 /usr/ucb/install, which tries to use the nonexistent group "staff" -# ./install, which can be erroneously created by make from ./install.sh. -echo $ac_n "checking for a BSD compatible install""... $ac_c" 1>&6 -echo "configure:1058: checking for a BSD compatible install" >&5 -if test -z "$INSTALL"; then -if eval "test \"`echo '$''{'ac_cv_path_install'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - IFS="${IFS= }"; ac_save_IFS="$IFS"; IFS=":" - for ac_dir in $PATH; do - # Account for people who put trailing slashes in PATH elements. - case "$ac_dir/" in - /|./|.//|/etc/*|/usr/sbin/*|/usr/etc/*|/sbin/*|/usr/afsws/bin/*|/usr/ucb/*) ;; - *) - # OSF1 and SCO ODT 3.0 have their own names for install. - # Don't use installbsd from OSF since it installs stuff as root - # by default. - for ac_prog in ginstall scoinst install; do - if test -f $ac_dir/$ac_prog; then - if test $ac_prog = install && - grep dspmsg $ac_dir/$ac_prog >/dev/null 2>&1; then - # AIX install. It has an incompatible calling convention. - : - else - ac_cv_path_install="$ac_dir/$ac_prog -c" - break 2 - fi - fi - done - ;; - esac - done - IFS="$ac_save_IFS" - -fi - if test "${ac_cv_path_install+set}" = set; then - INSTALL="$ac_cv_path_install" - else - # As a last resort, use the slow shell script. We don't cache a - # path for INSTALL within a source directory, because that will - # break other packages using the cache if that directory is - # removed, or if the path is relative. - INSTALL="$ac_install_sh" - fi -fi -echo "$ac_t""$INSTALL" 1>&6 - -# Use test -z because SunOS4 sh mishandles braces in ${var-val}. -# It thinks the first close brace ends the variable substitution. -test -z "$INSTALL_PROGRAM" && INSTALL_PROGRAM='${INSTALL}' - -test -z "$INSTALL_SCRIPT" && INSTALL_SCRIPT='${INSTALL_PROGRAM}' - -test -z "$INSTALL_DATA" && INSTALL_DATA='${INSTALL} -m 644' - -echo $ac_n "checking whether ${MAKE-make} sets \${MAKE}""... $ac_c" 1>&6 -echo "configure:1111: checking whether ${MAKE-make} sets \${MAKE}" >&5 -set dummy ${MAKE-make}; ac_make=`echo "$2" | sed 'y%./+-%__p_%'` -if eval "test \"`echo '$''{'ac_cv_prog_make_${ac_make}_set'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftestmake <<\EOF -all: - @echo 'ac_maketemp="${MAKE}"' -EOF -# GNU make sometimes prints "make[1]: Entering...", which would confuse us. -eval `${MAKE-make} -f conftestmake 2>/dev/null | grep temp=` -if test -n "$ac_maketemp"; then - eval ac_cv_prog_make_${ac_make}_set=yes -else - eval ac_cv_prog_make_${ac_make}_set=no -fi -rm -f conftestmake -fi -if eval "test \"`echo '$ac_cv_prog_make_'${ac_make}_set`\" = yes"; then - echo "$ac_t""yes" 1>&6 - SET_MAKE= -else - echo "$ac_t""no" 1>&6 - SET_MAKE="MAKE=${MAKE-make}" -fi - - - - - -if test "$MPICC" != "" ; then - # Extract the first word of "$MPICC", so it can be a program name with args. -set dummy $MPICC; ac_word=$2 -echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:1145: checking for $ac_word" >&5 -if eval "test \"`echo '$''{'ac_cv_path_MPICC0'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - case "$MPICC0" in - /*) - ac_cv_path_MPICC0="$MPICC0" # Let the user override the test with a path. - ;; - ?:/*) - ac_cv_path_MPICC0="$MPICC0" # Let the user override the test with a dos path. - ;; - *) - IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" - ac_dummy="$PATH" - for ac_dir in $ac_dummy; do - test -z "$ac_dir" && ac_dir=. - if test -f $ac_dir/$ac_word; then - ac_cv_path_MPICC0="$ac_dir/$ac_word" - break - fi - done - IFS="$ac_save_ifs" - ;; -esac -fi -MPICC0="$ac_cv_path_MPICC0" -if test -n "$MPICC0"; then - echo "$ac_t""$MPICC0" 1>&6 -else - echo "$ac_t""no" 1>&6 -fi - -fi -# Extract the first word of "mpcc", so it can be a program name with args. -set dummy mpcc; ac_word=$2 -echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:1181: checking for $ac_word" >&5 -if eval "test \"`echo '$''{'ac_cv_path_MPICC1'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - case "$MPICC1" in - /*) - ac_cv_path_MPICC1="$MPICC1" # Let the user override the test with a path. - ;; - ?:/*) - ac_cv_path_MPICC1="$MPICC1" # Let the user override the test with a dos path. - ;; - *) - IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" - ac_dummy="$PATH" - for ac_dir in $ac_dummy; do - test -z "$ac_dir" && ac_dir=. - if test -f $ac_dir/$ac_word; then - ac_cv_path_MPICC1="$ac_dir/$ac_word" - break - fi - done - IFS="$ac_save_ifs" - ;; -esac -fi -MPICC1="$ac_cv_path_MPICC1" -if test -n "$MPICC1"; then - echo "$ac_t""$MPICC1" 1>&6 -else - echo "$ac_t""no" 1>&6 -fi - -# Extract the first word of "hcc", so it can be a program name with args. -set dummy hcc; ac_word=$2 -echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:1216: checking for $ac_word" >&5 -if eval "test \"`echo '$''{'ac_cv_path_MPICC2'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - case "$MPICC2" in - /*) - ac_cv_path_MPICC2="$MPICC2" # Let the user override the test with a path. - ;; - ?:/*) - ac_cv_path_MPICC2="$MPICC2" # Let the user override the test with a dos path. - ;; - *) - IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" - ac_dummy="$PATH" - for ac_dir in $ac_dummy; do - test -z "$ac_dir" && ac_dir=. - if test -f $ac_dir/$ac_word; then - ac_cv_path_MPICC2="$ac_dir/$ac_word" - break - fi - done - IFS="$ac_save_ifs" - ;; -esac -fi -MPICC2="$ac_cv_path_MPICC2" -if test -n "$MPICC2"; then - echo "$ac_t""$MPICC2" 1>&6 -else - echo "$ac_t""no" 1>&6 -fi - -# Extract the first word of "mpicc", so it can be a program name with args. -set dummy mpicc; ac_word=$2 -echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:1251: checking for $ac_word" >&5 -if eval "test \"`echo '$''{'ac_cv_path_MPICC3'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - case "$MPICC3" in - /*) - ac_cv_path_MPICC3="$MPICC3" # Let the user override the test with a path. - ;; - ?:/*) - ac_cv_path_MPICC3="$MPICC3" # Let the user override the test with a dos path. - ;; - *) - IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" - ac_dummy="$PATH" - for ac_dir in $ac_dummy; do - test -z "$ac_dir" && ac_dir=. - if test -f $ac_dir/$ac_word; then - ac_cv_path_MPICC3="$ac_dir/$ac_word" - break - fi - done - IFS="$ac_save_ifs" - ;; -esac -fi -MPICC3="$ac_cv_path_MPICC3" -if test -n "$MPICC3"; then - echo "$ac_t""$MPICC3" 1>&6 -else - echo "$ac_t""no" 1>&6 -fi - -# Extract the first word of "mpicc_lam", so it can be a program name with args. -set dummy mpicc_lam; ac_word=$2 -echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:1286: checking for $ac_word" >&5 -if eval "test \"`echo '$''{'ac_cv_path_MPICC4'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - case "$MPICC4" in - /*) - ac_cv_path_MPICC4="$MPICC4" # Let the user override the test with a path. - ;; - ?:/*) - ac_cv_path_MPICC4="$MPICC4" # Let the user override the test with a dos path. - ;; - *) - IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" - ac_dummy="$PATH" - for ac_dir in $ac_dummy; do - test -z "$ac_dir" && ac_dir=. - if test -f $ac_dir/$ac_word; then - ac_cv_path_MPICC4="$ac_dir/$ac_word" - break - fi - done - IFS="$ac_save_ifs" - ;; -esac -fi -MPICC4="$ac_cv_path_MPICC4" -if test -n "$MPICC4"; then - echo "$ac_t""$MPICC4" 1>&6 -else - echo "$ac_t""no" 1>&6 -fi - -# Extract the first word of "mpicc_mpich", so it can be a program name with args. -set dummy mpicc_mpich; ac_word=$2 -echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:1321: checking for $ac_word" >&5 -if eval "test \"`echo '$''{'ac_cv_path_MPICC5'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - case "$MPICC5" in - /*) - ac_cv_path_MPICC5="$MPICC5" # Let the user override the test with a path. - ;; - ?:/*) - ac_cv_path_MPICC5="$MPICC5" # Let the user override the test with a dos path. - ;; - *) - IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" - ac_dummy="$PATH" - for ac_dir in $ac_dummy; do - test -z "$ac_dir" && ac_dir=. - if test -f $ac_dir/$ac_word; then - ac_cv_path_MPICC5="$ac_dir/$ac_word" - break - fi - done - IFS="$ac_save_ifs" - ;; -esac -fi -MPICC5="$ac_cv_path_MPICC5" -if test -n "$MPICC5"; then - echo "$ac_t""$MPICC5" 1>&6 -else - echo "$ac_t""no" 1>&6 -fi - - - if test "$MPICC0" != "" ; then - if test "$MPICCSET" = "" ; then -cat > conftest.c < -int main (int argc, char **argv) -{ -MPI_Init(&argc,&argv); -MPI_Finalize(); -exit(0); -} -EOF - - -MPICC=$MPICC0 - - if test "$MPICC" != "" ; then - echo $ac_n "checking whether $MPICC works as MPI compiler""... $ac_c" 1>&6 -echo "configure:1371: checking whether $MPICC works as MPI compiler" >&5 - $MPICC conftest.c -o conftest > /dev/null 2>&1 - if test $? = 0 ; then - echo "$ac_t""yes" 1>&6 - #MPICC=$MPICC - MPILIBS= - MPICCSET=$MPICC - PPUZZLE=ppuzzle - else - echo "$ac_t""no" 1>&6 - echo $ac_n "checking whether $MPICC needs -lmpi""... $ac_c" 1>&6 -echo "configure:1382: checking whether $MPICC needs -lmpi" >&5 - $MPICC conftest.c -o conftest -lmpi > /dev/null 2>&1 - if test $? = 0 ; then - echo "$ac_t""yes" 1>&6 - #MPICC=$PCC - MPILIBS=-lmpi - MPICCSET=$MPICC - PPUZZLE=ppuzzle - else - echo "$ac_t""no" 1>&6 - MPICC= - MPILIBS= - MPICCSET= - PPUZZLE= - fi - fi - fi - rm -f conftest* - fi - fi - if test "$MPICC1" != "" ; then - if test "$MPICCSET" = "" ; then -cat > conftest.c < -int main (int argc, char **argv) -{ -MPI_Init(&argc,&argv); -MPI_Finalize(); -exit(0); -} -EOF - - -MPICC=$MPICC1 - - if test "$MPICC" != "" ; then - echo $ac_n "checking whether $MPICC works as MPI compiler""... $ac_c" 1>&6 -echo "configure:1419: checking whether $MPICC works as MPI compiler" >&5 - $MPICC conftest.c -o conftest > /dev/null 2>&1 - if test $? = 0 ; then - echo "$ac_t""yes" 1>&6 - #MPICC=$MPICC - MPILIBS= - MPICCSET=$MPICC - PPUZZLE=ppuzzle - else - echo "$ac_t""no" 1>&6 - echo $ac_n "checking whether $MPICC needs -lmpi""... $ac_c" 1>&6 -echo "configure:1430: checking whether $MPICC needs -lmpi" >&5 - $MPICC conftest.c -o conftest -lmpi > /dev/null 2>&1 - if test $? = 0 ; then - echo "$ac_t""yes" 1>&6 - #MPICC=$PCC - MPILIBS=-lmpi - MPICCSET=$MPICC - PPUZZLE=ppuzzle - else - echo "$ac_t""no" 1>&6 - MPICC= - MPILIBS= - MPICCSET= - PPUZZLE= - fi - fi - fi - rm -f conftest* - fi - fi - if test "$MPICC2" != "" ; then - if test "$MPICCSET" = "" ; then -cat > conftest.c < -int main (int argc, char **argv) -{ -MPI_Init(&argc,&argv); -MPI_Finalize(); -exit(0); -} -EOF - - -MPICC=$MPICC2 - - if test "$MPICC" != "" ; then - echo $ac_n "checking whether $MPICC works as MPI compiler""... $ac_c" 1>&6 -echo "configure:1467: checking whether $MPICC works as MPI compiler" >&5 - $MPICC conftest.c -o conftest > /dev/null 2>&1 - if test $? = 0 ; then - echo "$ac_t""yes" 1>&6 - #MPICC=$MPICC - MPILIBS= - MPICCSET=$MPICC - PPUZZLE=ppuzzle - else - echo "$ac_t""no" 1>&6 - echo $ac_n "checking whether $MPICC needs -lmpi""... $ac_c" 1>&6 -echo "configure:1478: checking whether $MPICC needs -lmpi" >&5 - $MPICC conftest.c -o conftest -lmpi > /dev/null 2>&1 - if test $? = 0 ; then - echo "$ac_t""yes" 1>&6 - #MPICC=$PCC - MPILIBS=-lmpi - MPICCSET=$MPICC - PPUZZLE=ppuzzle - else - echo "$ac_t""no" 1>&6 - MPICC= - MPILIBS= - MPICCSET= - PPUZZLE= - fi - fi - fi - rm -f conftest* - fi - fi - if test "$MPICC3" != "" ; then - if test "$MPICCSET" = "" ; then -cat > conftest.c < -int main (int argc, char **argv) -{ -MPI_Init(&argc,&argv); -MPI_Finalize(); -exit(0); -} -EOF - - -MPICC=$MPICC3 - - if test "$MPICC" != "" ; then - echo $ac_n "checking whether $MPICC works as MPI compiler""... $ac_c" 1>&6 -echo "configure:1515: checking whether $MPICC works as MPI compiler" >&5 - $MPICC conftest.c -o conftest > /dev/null 2>&1 - if test $? = 0 ; then - echo "$ac_t""yes" 1>&6 - #MPICC=$MPICC - MPILIBS= - MPICCSET=$MPICC - PPUZZLE=ppuzzle - else - echo "$ac_t""no" 1>&6 - echo $ac_n "checking whether $MPICC needs -lmpi""... $ac_c" 1>&6 -echo "configure:1526: checking whether $MPICC needs -lmpi" >&5 - $MPICC conftest.c -o conftest -lmpi > /dev/null 2>&1 - if test $? = 0 ; then - echo "$ac_t""yes" 1>&6 - #MPICC=$PCC - MPILIBS=-lmpi - MPICCSET=$MPICC - PPUZZLE=ppuzzle - else - echo "$ac_t""no" 1>&6 - MPICC= - MPILIBS= - MPICCSET= - PPUZZLE= - fi - fi - fi - rm -f conftest* - fi - fi - if test "$MPICC4" != "" ; then - if test "$MPICCSET" = "" ; then -cat > conftest.c < -int main (int argc, char **argv) -{ -MPI_Init(&argc,&argv); -MPI_Finalize(); -exit(0); -} -EOF - - -MPICC=$MPICC4 - - if test "$MPICC" != "" ; then - echo $ac_n "checking whether $MPICC works as MPI compiler""... $ac_c" 1>&6 -echo "configure:1563: checking whether $MPICC works as MPI compiler" >&5 - $MPICC conftest.c -o conftest > /dev/null 2>&1 - if test $? = 0 ; then - echo "$ac_t""yes" 1>&6 - #MPICC=$MPICC - MPILIBS= - MPICCSET=$MPICC - PPUZZLE=ppuzzle - else - echo "$ac_t""no" 1>&6 - echo $ac_n "checking whether $MPICC needs -lmpi""... $ac_c" 1>&6 -echo "configure:1574: checking whether $MPICC needs -lmpi" >&5 - $MPICC conftest.c -o conftest -lmpi > /dev/null 2>&1 - if test $? = 0 ; then - echo "$ac_t""yes" 1>&6 - #MPICC=$PCC - MPILIBS=-lmpi - MPICCSET=$MPICC - PPUZZLE=ppuzzle - else - echo "$ac_t""no" 1>&6 - MPICC= - MPILIBS= - MPICCSET= - PPUZZLE= - fi - fi - fi - rm -f conftest* - fi - fi - if test "$MPICC5" != "" ; then - if test "$MPICCSET" = "" ; then -cat > conftest.c < -int main (int argc, char **argv) -{ -MPI_Init(&argc,&argv); -MPI_Finalize(); -exit(0); -} -EOF - - -MPICC=$MPICC5 - - if test "$MPICC" != "" ; then - echo $ac_n "checking whether $MPICC works as MPI compiler""... $ac_c" 1>&6 -echo "configure:1611: checking whether $MPICC works as MPI compiler" >&5 - $MPICC conftest.c -o conftest > /dev/null 2>&1 - if test $? = 0 ; then - echo "$ac_t""yes" 1>&6 - #MPICC=$MPICC - MPILIBS= - MPICCSET=$MPICC - PPUZZLE=ppuzzle - else - echo "$ac_t""no" 1>&6 - echo $ac_n "checking whether $MPICC needs -lmpi""... $ac_c" 1>&6 -echo "configure:1622: checking whether $MPICC needs -lmpi" >&5 - $MPICC conftest.c -o conftest -lmpi > /dev/null 2>&1 - if test $? = 0 ; then - echo "$ac_t""yes" 1>&6 - #MPICC=$PCC - MPILIBS=-lmpi - MPICCSET=$MPICC - PPUZZLE=ppuzzle - else - echo "$ac_t""no" 1>&6 - MPICC= - MPILIBS= - MPICCSET= - PPUZZLE= - fi - fi - fi - rm -f conftest* - fi - fi - -ac_cv_prog_MPICC=$MPICC - - - - - - - -echo $ac_n "checking for main in -lm""... $ac_c" 1>&6 -echo "configure:1652: checking for main in -lm" >&5 -ac_lib_var=`echo m'_'main | sed 'y%./+-%__p_%'` -if eval "test \"`echo '$''{'ac_cv_lib_$ac_lib_var'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - ac_save_LIBS="$LIBS" -LIBS="-lm $LIBS" -cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then - rm -rf conftest* - eval "ac_cv_lib_$ac_lib_var=yes" -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - eval "ac_cv_lib_$ac_lib_var=no" -fi -rm -f conftest* -LIBS="$ac_save_LIBS" - -fi -if eval "test \"`echo '$ac_cv_lib_'$ac_lib_var`\" = yes"; then - echo "$ac_t""yes" 1>&6 - ac_tr_lib=HAVE_LIB`echo m | sed -e 's/[^a-zA-Z0-9_]/_/g' \ - -e 'y/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/'` - cat >> confdefs.h <&6 -fi - - -echo $ac_n "checking how to run the C preprocessor""... $ac_c" 1>&6 -echo "configure:1696: checking how to run the C preprocessor" >&5 -# On Suns, sometimes $CPP names a directory. -if test -n "$CPP" && test -d "$CPP"; then - CPP= -fi -if test -z "$CPP"; then -if eval "test \"`echo '$''{'ac_cv_prog_CPP'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - # This must be in double quotes, not single quotes, because CPP may get - # substituted into the Makefile and "${CC-cc}" will confuse make. - CPP="${CC-cc} -E" - # On the NeXT, cc -E runs the code through the compiler's parser, - # not just through cpp. - cat > conftest.$ac_ext < -Syntax Error -EOF -ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:1717: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } -ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` -if test -z "$ac_err"; then - : -else - echo "$ac_err" >&5 - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - CPP="${CC-cc} -E -traditional-cpp" - cat > conftest.$ac_ext < -Syntax Error -EOF -ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:1734: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } -ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` -if test -z "$ac_err"; then - : -else - echo "$ac_err" >&5 - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - CPP="${CC-cc} -nologo -E" - cat > conftest.$ac_ext < -Syntax Error -EOF -ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:1751: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } -ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` -if test -z "$ac_err"; then - : -else - echo "$ac_err" >&5 - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - CPP=/lib/cpp -fi -rm -f conftest* -fi -rm -f conftest* -fi -rm -f conftest* - ac_cv_prog_CPP="$CPP" -fi - CPP="$ac_cv_prog_CPP" -else - ac_cv_prog_CPP="$CPP" -fi -echo "$ac_t""$CPP" 1>&6 - -echo $ac_n "checking for ANSI C header files""... $ac_c" 1>&6 -echo "configure:1776: checking for ANSI C header files" >&5 -if eval "test \"`echo '$''{'ac_cv_header_stdc'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -#include -#include -#include -EOF -ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:1789: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } -ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` -if test -z "$ac_err"; then - rm -rf conftest* - ac_cv_header_stdc=yes -else - echo "$ac_err" >&5 - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - ac_cv_header_stdc=no -fi -rm -f conftest* - -if test $ac_cv_header_stdc = yes; then - # SunOS 4.x string.h does not declare mem*, contrary to ANSI. -cat > conftest.$ac_ext < -EOF -if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | - egrep "memchr" >/dev/null 2>&1; then - : -else - rm -rf conftest* - ac_cv_header_stdc=no -fi -rm -f conftest* - -fi - -if test $ac_cv_header_stdc = yes; then - # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI. -cat > conftest.$ac_ext < -EOF -if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | - egrep "free" >/dev/null 2>&1; then - : -else - rm -rf conftest* - ac_cv_header_stdc=no -fi -rm -f conftest* - -fi - -if test $ac_cv_header_stdc = yes; then - # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi. -if test "$cross_compiling" = yes; then - : -else - cat > conftest.$ac_ext < -#define ISLOWER(c) ('a' <= (c) && (c) <= 'z') -#define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c)) -#define XOR(e, f) (((e) && !(f)) || (!(e) && (f))) -int main () { int i; for (i = 0; i < 256; i++) -if (XOR (islower (i), ISLOWER (i)) || toupper (i) != TOUPPER (i)) exit(2); -exit (0); } - -EOF -if { (eval echo configure:1856: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null -then - : -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -fr conftest* - ac_cv_header_stdc=no -fi -rm -fr conftest* -fi - -fi -fi - -echo "$ac_t""$ac_cv_header_stdc" 1>&6 -if test $ac_cv_header_stdc = yes; then - cat >> confdefs.h <<\EOF -#define STDC_HEADERS 1 -EOF - -fi - -for ac_hdr in limits.h -do -ac_safe=`echo "$ac_hdr" | sed 'y%./+-%__p_%'` -echo $ac_n "checking for $ac_hdr""... $ac_c" 1>&6 -echo "configure:1883: checking for $ac_hdr" >&5 -if eval "test \"`echo '$''{'ac_cv_header_$ac_safe'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -EOF -ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:1893: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } -ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` -if test -z "$ac_err"; then - rm -rf conftest* - eval "ac_cv_header_$ac_safe=yes" -else - echo "$ac_err" >&5 - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - eval "ac_cv_header_$ac_safe=no" -fi -rm -f conftest* -fi -if eval "test \"`echo '$ac_cv_header_'$ac_safe`\" = yes"; then - echo "$ac_t""yes" 1>&6 - ac_tr_hdr=HAVE_`echo $ac_hdr | sed 'y%abcdefghijklmnopqrstuvwxyz./-%ABCDEFGHIJKLMNOPQRSTUVWXYZ___%'` - cat >> confdefs.h <&6 -fi -done - - - - -echo $ac_n "checking for working const""... $ac_c" 1>&6 -echo "configure:1923: checking for working const" >&5 -if eval "test \"`echo '$''{'ac_cv_c_const'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext <j = 5; -} -{ /* ULTRIX-32 V3.1 (Rev 9) vcc rejects this */ - const int foo = 10; -} - -; return 0; } -EOF -if { (eval echo configure:1977: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then - rm -rf conftest* - ac_cv_c_const=yes -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - ac_cv_c_const=no -fi -rm -f conftest* -fi - -echo "$ac_t""$ac_cv_c_const" 1>&6 -if test $ac_cv_c_const = no; then - cat >> confdefs.h <<\EOF -#define const -EOF - -fi - -echo $ac_n "checking for size_t""... $ac_c" 1>&6 -echo "configure:1998: checking for size_t" >&5 -if eval "test \"`echo '$''{'ac_cv_type_size_t'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -#if STDC_HEADERS -#include -#include -#endif -EOF -if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | - egrep "(^|[^a-zA-Z_0-9])size_t[^a-zA-Z_0-9]" >/dev/null 2>&1; then - rm -rf conftest* - ac_cv_type_size_t=yes -else - rm -rf conftest* - ac_cv_type_size_t=no -fi -rm -f conftest* - -fi -echo "$ac_t""$ac_cv_type_size_t" 1>&6 -if test $ac_cv_type_size_t = no; then - cat >> confdefs.h <<\EOF -#define size_t unsigned -EOF - -fi - - - -trap '' 1 2 15 - -trap 'rm -fr conftest* confdefs* core core.* *.core $ac_clean_files; exit 1' 1 2 15 - -test "x$prefix" = xNONE && prefix=$ac_default_prefix -# Let make expand exec_prefix. -test "x$exec_prefix" = xNONE && exec_prefix='${prefix}' - -# Any assignment to VPATH causes Sun make to only execute -# the first set of double-colon rules, so remove it if not needed. -# If there is a colon in the path, we need to keep it. -if test "x$srcdir" = x.; then - ac_vpsub='/^[ ]*VPATH[ ]*=[^:]*$/d' -fi - -trap 'rm -f $CONFIG_STATUS conftest*; exit 1' 1 2 15 - -# Transform confdefs.h into DEFS. -# Protect against shell expansion while executing Makefile rules. -# Protect against Makefile macro expansion. -cat > conftest.defs <<\EOF -s%#define \([A-Za-z_][A-Za-z0-9_]*\) *\(.*\)%-D\1=\2%g -s%[ `~#$^&*(){}\\|;'"<>?]%\\&%g -s%\[%\\&%g -s%\]%\\&%g -s%\$%$$%g -EOF -DEFS=`sed -f conftest.defs confdefs.h | tr '\012' ' '` -rm -f conftest.defs - - -# Without the "./", some shells look in PATH for config.status. -: ${CONFIG_STATUS=./config.status} - -echo creating $CONFIG_STATUS -rm -f $CONFIG_STATUS -cat > $CONFIG_STATUS </dev/null | sed 1q`: -# -# $0 $ac_configure_args -# -# Compiler output produced by configure, useful for debugging -# configure, is in ./config.log if it exists. - -ac_cs_usage="Usage: $CONFIG_STATUS [--recheck] [--version] [--help]" -for ac_option -do - case "\$ac_option" in - -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r) - echo "running \${CONFIG_SHELL-/bin/sh} $0 $ac_configure_args --no-create --no-recursion" - exec \${CONFIG_SHELL-/bin/sh} $0 $ac_configure_args --no-create --no-recursion ;; - -version | --version | --versio | --versi | --vers | --ver | --ve | --v) - echo "$CONFIG_STATUS generated by autoconf version 2.13" - exit 0 ;; - -help | --help | --hel | --he | --h) - echo "\$ac_cs_usage"; exit 0 ;; - *) echo "\$ac_cs_usage"; exit 1 ;; - esac -done - -ac_given_srcdir=$srcdir -ac_given_INSTALL="$INSTALL" - -trap 'rm -fr `echo "Makefile src/Makefile src/test doc/Makefile data/Makefile" | sed "s/:[^ ]*//g"` conftest*; exit 1' 1 2 15 -EOF -cat >> $CONFIG_STATUS < conftest.subs <<\\CEOF -$ac_vpsub -$extrasub -s%@SHELL@%$SHELL%g -s%@CFLAGS@%$CFLAGS%g -s%@CPPFLAGS@%$CPPFLAGS%g -s%@CXXFLAGS@%$CXXFLAGS%g -s%@FFLAGS@%$FFLAGS%g -s%@DEFS@%$DEFS%g -s%@LDFLAGS@%$LDFLAGS%g -s%@LIBS@%$LIBS%g -s%@exec_prefix@%$exec_prefix%g -s%@prefix@%$prefix%g -s%@program_transform_name@%$program_transform_name%g -s%@bindir@%$bindir%g -s%@sbindir@%$sbindir%g -s%@libexecdir@%$libexecdir%g -s%@datadir@%$datadir%g -s%@sysconfdir@%$sysconfdir%g -s%@sharedstatedir@%$sharedstatedir%g -s%@localstatedir@%$localstatedir%g -s%@libdir@%$libdir%g -s%@includedir@%$includedir%g -s%@oldincludedir@%$oldincludedir%g -s%@infodir@%$infodir%g -s%@mandir@%$mandir%g -s%@INSTALL_PROGRAM@%$INSTALL_PROGRAM%g -s%@INSTALL_SCRIPT@%$INSTALL_SCRIPT%g -s%@INSTALL_DATA@%$INSTALL_DATA%g -s%@PACKAGE@%$PACKAGE%g -s%@VERSION@%$VERSION%g -s%@ACLOCAL@%$ACLOCAL%g -s%@AUTOCONF@%$AUTOCONF%g -s%@AUTOMAKE@%$AUTOMAKE%g -s%@AUTOHEADER@%$AUTOHEADER%g -s%@MAKEINFO@%$MAKEINFO%g -s%@SET_MAKE@%$SET_MAKE%g -s%@CC@%$CC%g -s%@MPICC0@%$MPICC0%g -s%@MPICC1@%$MPICC1%g -s%@MPICC2@%$MPICC2%g -s%@MPICC3@%$MPICC3%g -s%@MPICC4@%$MPICC4%g -s%@MPICC5@%$MPICC5%g -s%@MPICC@%$MPICC%g -s%@MPILIBS@%$MPILIBS%g -s%@MPIDEFS@%$MPIDEFS%g -s%@MPICFLAGS@%$MPICFLAGS%g -s%@PPUZZLE@%$PPUZZLE%g -s%@CPP@%$CPP%g - -CEOF -EOF - -cat >> $CONFIG_STATUS <<\EOF - -# Split the substitutions into bite-sized pieces for seds with -# small command number limits, like on Digital OSF/1 and HP-UX. -ac_max_sed_cmds=90 # Maximum number of lines to put in a sed script. -ac_file=1 # Number of current file. -ac_beg=1 # First line for current file. -ac_end=$ac_max_sed_cmds # Line after last line for current file. -ac_more_lines=: -ac_sed_cmds="" -while $ac_more_lines; do - if test $ac_beg -gt 1; then - sed "1,${ac_beg}d; ${ac_end}q" conftest.subs > conftest.s$ac_file - else - sed "${ac_end}q" conftest.subs > conftest.s$ac_file - fi - if test ! -s conftest.s$ac_file; then - ac_more_lines=false - rm -f conftest.s$ac_file - else - if test -z "$ac_sed_cmds"; then - ac_sed_cmds="sed -f conftest.s$ac_file" - else - ac_sed_cmds="$ac_sed_cmds | sed -f conftest.s$ac_file" - fi - ac_file=`expr $ac_file + 1` - ac_beg=$ac_end - ac_end=`expr $ac_end + $ac_max_sed_cmds` - fi -done -if test -z "$ac_sed_cmds"; then - ac_sed_cmds=cat -fi -EOF - -cat >> $CONFIG_STATUS <> $CONFIG_STATUS <<\EOF -for ac_file in .. $CONFIG_FILES; do if test "x$ac_file" != x..; then - # Support "outfile[:infile[:infile...]]", defaulting infile="outfile.in". - case "$ac_file" in - *:*) ac_file_in=`echo "$ac_file"|sed 's%[^:]*:%%'` - ac_file=`echo "$ac_file"|sed 's%:.*%%'` ;; - *) ac_file_in="${ac_file}.in" ;; - esac - - # Adjust a relative srcdir, top_srcdir, and INSTALL for subdirectories. - - # Remove last slash and all that follows it. Not all systems have dirname. - ac_dir=`echo $ac_file|sed 's%/[^/][^/]*$%%'` - if test "$ac_dir" != "$ac_file" && test "$ac_dir" != .; then - # The file is in a subdirectory. - test ! -d "$ac_dir" && mkdir "$ac_dir" - ac_dir_suffix="/`echo $ac_dir|sed 's%^\./%%'`" - # A "../" for each directory in $ac_dir_suffix. - ac_dots=`echo $ac_dir_suffix|sed 's%/[^/]*%../%g'` - else - ac_dir_suffix= ac_dots= - fi - - case "$ac_given_srcdir" in - .) srcdir=. - if test -z "$ac_dots"; then top_srcdir=. - else top_srcdir=`echo $ac_dots|sed 's%/$%%'`; fi ;; - /*) srcdir="$ac_given_srcdir$ac_dir_suffix"; top_srcdir="$ac_given_srcdir" ;; - *) # Relative path. - srcdir="$ac_dots$ac_given_srcdir$ac_dir_suffix" - top_srcdir="$ac_dots$ac_given_srcdir" ;; - esac - - case "$ac_given_INSTALL" in - [/$]*) INSTALL="$ac_given_INSTALL" ;; - *) INSTALL="$ac_dots$ac_given_INSTALL" ;; - esac - - echo creating "$ac_file" - rm -f "$ac_file" - configure_input="Generated automatically from `echo $ac_file_in|sed 's%.*/%%'` by configure." - case "$ac_file" in - *Makefile*) ac_comsub="1i\\ -# $configure_input" ;; - *) ac_comsub= ;; - esac - - ac_file_inputs=`echo $ac_file_in|sed -e "s%^%$ac_given_srcdir/%" -e "s%:% $ac_given_srcdir/%g"` - sed -e "$ac_comsub -s%@configure_input@%$configure_input%g -s%@srcdir@%$srcdir%g -s%@top_srcdir@%$top_srcdir%g -s%@INSTALL@%$INSTALL%g -" $ac_file_inputs | (eval "$ac_sed_cmds") > $ac_file -fi; done -rm -f conftest.s* - -EOF -cat >> $CONFIG_STATUS <> $CONFIG_STATUS <<\EOF - -exit 0 -EOF -chmod +x $CONFIG_STATUS -rm -fr confdefs* $ac_clean_files -test "$no_create" = yes || ${CONFIG_SHELL-/bin/sh} $CONFIG_STATUS || exit 1 - diff --git a/forester/archive/RIO/others/puzzle_mod/configure.in b/forester/archive/RIO/others/puzzle_mod/configure.in deleted file mode 100644 index 57f0e27..0000000 --- a/forester/archive/RIO/others/puzzle_mod/configure.in +++ /dev/null @@ -1,117 +0,0 @@ - -dnl Disable caching. -define([AC_CACHE_LOAD], )dnl -define([AC_CACHE_SAVE], )dnl - -dnl Process this file with autoconf to produce a configure script. -AC_INIT(src/ml.h) - -AM_INIT_AUTOMAKE(tree-puzzle, 5.0) - -dnl Checks for programs. -AC_PROG_CC -AC_PROG_CC_C_O -AC_PROG_INSTALL -AC_PROG_MAKE_SET - - -AC_DEFUN(AC_TEST_MPICC,[dnl - if test "$1" != "" ; then - if test "$MPICCSET" = "" ; then -cat > conftest.c < -int main (int argc, char **argv) -{ -MPI_Init(&argc,&argv); -MPI_Finalize(); -exit(0); -} -EOF - - -MPICC=$1 -dnl if test "$MPICC" != "$CC" ; then -dnl -dnl fi - - if test "$MPICC" != "" ; then - AC_MSG_CHECKING(whether $MPICC works as MPI compiler) - $MPICC conftest.c -o conftest > /dev/null 2>&1 - if test $? = 0 ; then - AC_MSG_RESULT(yes) - #MPICC=$MPICC - MPILIBS= - MPICCSET=$MPICC - PPUZZLE=ppuzzle - else - AC_MSG_RESULT(no) - AC_MSG_CHECKING(whether $MPICC needs -lmpi) - $MPICC conftest.c -o conftest -lmpi > /dev/null 2>&1 - if test $? = 0 ; then - AC_MSG_RESULT(yes) - #MPICC=$PCC - MPILIBS=-lmpi - MPICCSET=$MPICC - PPUZZLE=ppuzzle - else - AC_MSG_RESULT(no) - MPICC= - MPILIBS= - MPICCSET= - PPUZZLE= - fi - fi - fi - rm -f conftest* - fi - fi ]) - -if test "$MPICC" != "" ; then - AC_PATH_PROG(MPICC0, $MPICC) -fi -AC_PATH_PROG(MPICC1, mpcc) -AC_PATH_PROG(MPICC2, hcc) -AC_PATH_PROG(MPICC3, mpicc) -AC_PATH_PROG(MPICC4, mpicc_lam) -AC_PATH_PROG(MPICC5, mpicc_mpich) - -AC_TEST_MPICC($MPICC0) -AC_TEST_MPICC($MPICC1) -AC_TEST_MPICC($MPICC2) -AC_TEST_MPICC($MPICC3) -AC_TEST_MPICC($MPICC4) -AC_TEST_MPICC($MPICC5) - -ac_cv_prog_MPICC=$MPICC - -AC_SUBST(MPICC) -AC_SUBST(MPILIBS) -AC_SUBST(MPIDEFS) -AC_SUBST(MPICFLAGS) -AC_SUBST(PPUZZLE) - -dnl Checks for libraries. -dnl Replace `main' with a function in -lm: -AC_CHECK_LIB(m, main) -dnl AC_CHECK_LIB(mpi, main) - -dnl Checks for header files. -AC_HEADER_STDC -AC_CHECK_HEADERS(limits.h) -dnl AC_HAVE_HEADERS(mpi.h) - -dnl AC_HAVE_HEADERS(rpc/xdr.h) - - -dnl Checks for typedefs, structures, and compiler characteristics. -AC_C_CONST -AC_TYPE_SIZE_T - -dnl Checks for library functions. -dnl AC_CHECK_FUNCS(xdr_u_char) -dnl AC_CHECK_FUNCS(xdr_double) -dnl AC_CHECK_FUNCS(xdrstdio_create) -dnl AC_CHECK_FUNCS(xdr_destroy) -dnl AC_CHECK_FUNCS(xdr_inline) - -AC_OUTPUT(Makefile src/Makefile src/test doc/Makefile data/Makefile) diff --git a/forester/archive/RIO/others/puzzle_mod/data/Makefile b/forester/archive/RIO/others/puzzle_mod/data/Makefile deleted file mode 100644 index 13d6fc1..0000000 --- a/forester/archive/RIO/others/puzzle_mod/data/Makefile +++ /dev/null @@ -1,177 +0,0 @@ -# Generated automatically from Makefile.in by configure. -# Makefile.in generated automatically by automake 1.4 from Makefile.am - -# Copyright (C) 1994, 1995-8, 1999 Free Software Foundation, Inc. -# This Makefile.in is free software; the Free Software Foundation -# gives unlimited permission to copy and/or distribute it, -# with or without modifications, as long as this notice is preserved. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY, to the extent permitted by law; without -# even the implied warranty of MERCHANTABILITY or FITNESS FOR A -# PARTICULAR PURPOSE. - - -SHELL = /bin/sh - -srcdir = . -top_srcdir = .. -prefix = /usr/local -exec_prefix = ${prefix} - -bindir = ${exec_prefix}/bin -sbindir = ${exec_prefix}/sbin -libexecdir = ${exec_prefix}/libexec -datadir = ${prefix}/share -sysconfdir = ${prefix}/etc -sharedstatedir = ${prefix}/com -localstatedir = ${prefix}/var -libdir = ${exec_prefix}/lib -infodir = ${prefix}/info -mandir = ${prefix}/man -includedir = ${prefix}/include -oldincludedir = /usr/include - -DESTDIR = - -pkgdatadir = $(datadir)/tree-puzzle -pkglibdir = $(libdir)/tree-puzzle -pkgincludedir = $(includedir)/tree-puzzle - -top_builddir = .. - -ACLOCAL = aclocal -AUTOCONF = autoconf -AUTOMAKE = automake -AUTOHEADER = autoheader - -INSTALL = /usr/bin/install -c -INSTALL_PROGRAM = ${INSTALL} $(AM_INSTALL_PROGRAM_FLAGS) -INSTALL_DATA = ${INSTALL} -m 644 -INSTALL_SCRIPT = ${INSTALL_PROGRAM} -transform = s,x,x, - -NORMAL_INSTALL = : -PRE_INSTALL = : -POST_INSTALL = : -NORMAL_UNINSTALL = : -PRE_UNINSTALL = : -POST_UNINSTALL = : -CC = gcc -MAKEINFO = makeinfo -MPICC = -MPICC0 = -MPICC1 = -MPICC2 = -MPICC3 = -MPICC4 = -MPICC5 = -MPICFLAGS = -MPIDEFS = -MPILIBS = -PACKAGE = tree-puzzle -PPUZZLE = -VERSION = 5.0 - -EXTRA_DIST = atp6.a globin.a marswolf.n primates.b -mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs -CONFIG_CLEAN_FILES = -DIST_COMMON = Makefile.am Makefile.in - - -DISTFILES = $(DIST_COMMON) $(SOURCES) $(HEADERS) $(TEXINFOS) $(EXTRA_DIST) - -TAR = gtar -GZIP_ENV = --best -all: all-redirect -.SUFFIXES: -$(srcdir)/Makefile.in: Makefile.am $(top_srcdir)/configure.in $(ACLOCAL_M4) - cd $(top_srcdir) && $(AUTOMAKE) --gnu --include-deps data/Makefile - -Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status - cd $(top_builddir) \ - && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status - -tags: TAGS -TAGS: - - -distdir = $(top_builddir)/$(PACKAGE)-$(VERSION)/$(subdir) - -subdir = data - -distdir: $(DISTFILES) - @for file in $(DISTFILES); do \ - d=$(srcdir); \ - if test -d $$d/$$file; then \ - cp -pr $$d/$$file $(distdir)/$$file; \ - else \ - test -f $(distdir)/$$file \ - || ln $$d/$$file $(distdir)/$$file 2> /dev/null \ - || cp -p $$d/$$file $(distdir)/$$file || :; \ - fi; \ - done -info-am: -info: info-am -dvi-am: -dvi: dvi-am -check-am: all-am -check: check-am -installcheck-am: -installcheck: installcheck-am -install-exec-am: -install-exec: install-exec-am - -install-data-am: -install-data: install-data-am - -install-am: all-am - @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am -install: install-am -uninstall-am: -uninstall: uninstall-am -all-am: Makefile -all-redirect: all-am -install-strip: - $(MAKE) $(AM_MAKEFLAGS) AM_INSTALL_PROGRAM_FLAGS=-s install -installdirs: - - -mostlyclean-generic: - -clean-generic: - -distclean-generic: - -rm -f Makefile $(CONFIG_CLEAN_FILES) - -rm -f config.cache config.log stamp-h stamp-h[0-9]* - -maintainer-clean-generic: -mostlyclean-am: mostlyclean-generic - -mostlyclean: mostlyclean-am - -clean-am: clean-generic mostlyclean-am - -clean: clean-am - -distclean-am: distclean-generic clean-am - -distclean: distclean-am - -maintainer-clean-am: maintainer-clean-generic distclean-am - @echo "This command is intended for maintainers to use;" - @echo "it deletes files that may require special tools to rebuild." - -maintainer-clean: maintainer-clean-am - -.PHONY: tags distdir info-am info dvi-am dvi check check-am \ -installcheck-am installcheck install-exec-am install-exec \ -install-data-am install-data install-am install uninstall-am uninstall \ -all-redirect all-am all installdirs mostlyclean-generic \ -distclean-generic clean-generic maintainer-clean-generic clean \ -mostlyclean distclean maintainer-clean - - -# Tell versions [3.59,3.63) of GNU make to not export all variables. -# Otherwise a system limit (for SysV at least) may be exceeded. -.NOEXPORT: diff --git a/forester/archive/RIO/others/puzzle_mod/data/Makefile.am b/forester/archive/RIO/others/puzzle_mod/data/Makefile.am deleted file mode 100644 index 9589f1e..0000000 --- a/forester/archive/RIO/others/puzzle_mod/data/Makefile.am +++ /dev/null @@ -1 +0,0 @@ -EXTRA_DIST = atp6.a globin.a marswolf.n primates.b diff --git a/forester/archive/RIO/others/puzzle_mod/data/Makefile.in b/forester/archive/RIO/others/puzzle_mod/data/Makefile.in deleted file mode 100644 index 47fa224..0000000 --- a/forester/archive/RIO/others/puzzle_mod/data/Makefile.in +++ /dev/null @@ -1,177 +0,0 @@ -# Makefile.in generated automatically by automake 1.4 from Makefile.am - -# Copyright (C) 1994, 1995-8, 1999 Free Software Foundation, Inc. -# This Makefile.in is free software; the Free Software Foundation -# gives unlimited permission to copy and/or distribute it, -# with or without modifications, as long as this notice is preserved. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY, to the extent permitted by law; without -# even the implied warranty of MERCHANTABILITY or FITNESS FOR A -# PARTICULAR PURPOSE. - - -SHELL = @SHELL@ - -srcdir = @srcdir@ -top_srcdir = @top_srcdir@ -VPATH = @srcdir@ -prefix = @prefix@ -exec_prefix = @exec_prefix@ - -bindir = @bindir@ -sbindir = @sbindir@ -libexecdir = @libexecdir@ -datadir = @datadir@ -sysconfdir = @sysconfdir@ -sharedstatedir = @sharedstatedir@ -localstatedir = @localstatedir@ -libdir = @libdir@ -infodir = @infodir@ -mandir = @mandir@ -includedir = @includedir@ -oldincludedir = /usr/include - -DESTDIR = - -pkgdatadir = $(datadir)/@PACKAGE@ -pkglibdir = $(libdir)/@PACKAGE@ -pkgincludedir = $(includedir)/@PACKAGE@ - -top_builddir = .. - -ACLOCAL = @ACLOCAL@ -AUTOCONF = @AUTOCONF@ -AUTOMAKE = @AUTOMAKE@ -AUTOHEADER = @AUTOHEADER@ - -INSTALL = @INSTALL@ -INSTALL_PROGRAM = @INSTALL_PROGRAM@ $(AM_INSTALL_PROGRAM_FLAGS) -INSTALL_DATA = @INSTALL_DATA@ -INSTALL_SCRIPT = @INSTALL_SCRIPT@ -transform = @program_transform_name@ - -NORMAL_INSTALL = : -PRE_INSTALL = : -POST_INSTALL = : -NORMAL_UNINSTALL = : -PRE_UNINSTALL = : -POST_UNINSTALL = : -CC = @CC@ -MAKEINFO = @MAKEINFO@ -MPICC = @MPICC@ -MPICC0 = @MPICC0@ -MPICC1 = @MPICC1@ -MPICC2 = @MPICC2@ -MPICC3 = @MPICC3@ -MPICC4 = @MPICC4@ -MPICC5 = @MPICC5@ -MPICFLAGS = @MPICFLAGS@ -MPIDEFS = @MPIDEFS@ -MPILIBS = @MPILIBS@ -PACKAGE = @PACKAGE@ -PPUZZLE = @PPUZZLE@ -VERSION = @VERSION@ - -EXTRA_DIST = atp6.a globin.a marswolf.n primates.b -mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs -CONFIG_CLEAN_FILES = -DIST_COMMON = Makefile.am Makefile.in - - -DISTFILES = $(DIST_COMMON) $(SOURCES) $(HEADERS) $(TEXINFOS) $(EXTRA_DIST) - -TAR = gtar -GZIP_ENV = --best -all: all-redirect -.SUFFIXES: -$(srcdir)/Makefile.in: Makefile.am $(top_srcdir)/configure.in $(ACLOCAL_M4) - cd $(top_srcdir) && $(AUTOMAKE) --gnu --include-deps data/Makefile - -Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status - cd $(top_builddir) \ - && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status - -tags: TAGS -TAGS: - - -distdir = $(top_builddir)/$(PACKAGE)-$(VERSION)/$(subdir) - -subdir = data - -distdir: $(DISTFILES) - @for file in $(DISTFILES); do \ - d=$(srcdir); \ - if test -d $$d/$$file; then \ - cp -pr $$d/$$file $(distdir)/$$file; \ - else \ - test -f $(distdir)/$$file \ - || ln $$d/$$file $(distdir)/$$file 2> /dev/null \ - || cp -p $$d/$$file $(distdir)/$$file || :; \ - fi; \ - done -info-am: -info: info-am -dvi-am: -dvi: dvi-am -check-am: all-am -check: check-am -installcheck-am: -installcheck: installcheck-am -install-exec-am: -install-exec: install-exec-am - -install-data-am: -install-data: install-data-am - -install-am: all-am - @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am -install: install-am -uninstall-am: -uninstall: uninstall-am -all-am: Makefile -all-redirect: all-am -install-strip: - $(MAKE) $(AM_MAKEFLAGS) AM_INSTALL_PROGRAM_FLAGS=-s install -installdirs: - - -mostlyclean-generic: - -clean-generic: - -distclean-generic: - -rm -f Makefile $(CONFIG_CLEAN_FILES) - -rm -f config.cache config.log stamp-h stamp-h[0-9]* - -maintainer-clean-generic: -mostlyclean-am: mostlyclean-generic - -mostlyclean: mostlyclean-am - -clean-am: clean-generic mostlyclean-am - -clean: clean-am - -distclean-am: distclean-generic clean-am - -distclean: distclean-am - -maintainer-clean-am: maintainer-clean-generic distclean-am - @echo "This command is intended for maintainers to use;" - @echo "it deletes files that may require special tools to rebuild." - -maintainer-clean: maintainer-clean-am - -.PHONY: tags distdir info-am info dvi-am dvi check check-am \ -installcheck-am installcheck install-exec-am install-exec \ -install-data-am install-data install-am install uninstall-am uninstall \ -all-redirect all-am all installdirs mostlyclean-generic \ -distclean-generic clean-generic maintainer-clean-generic clean \ -mostlyclean distclean maintainer-clean - - -# Tell versions [3.59,3.63) of GNU make to not export all variables. -# Otherwise a system limit (for SysV at least) may be exceeded. -.NOEXPORT: diff --git a/forester/archive/RIO/others/puzzle_mod/doc/Makefile b/forester/archive/RIO/others/puzzle_mod/doc/Makefile deleted file mode 100644 index 008b529..0000000 --- a/forester/archive/RIO/others/puzzle_mod/doc/Makefile +++ /dev/null @@ -1,177 +0,0 @@ -# Generated automatically from Makefile.in by configure. -# Makefile.in generated automatically by automake 1.4-p5 from Makefile.am - -# Copyright (C) 1994, 1995-8, 1999, 2001 Free Software Foundation, Inc. -# This Makefile.in is free software; the Free Software Foundation -# gives unlimited permission to copy and/or distribute it, -# with or without modifications, as long as this notice is preserved. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY, to the extent permitted by law; without -# even the implied warranty of MERCHANTABILITY or FITNESS FOR A -# PARTICULAR PURPOSE. - - -SHELL = /bin/sh - -srcdir = . -top_srcdir = .. -prefix = /usr/local -exec_prefix = ${prefix} - -bindir = ${exec_prefix}/bin -sbindir = ${exec_prefix}/sbin -libexecdir = ${exec_prefix}/libexec -datadir = ${prefix}/share -sysconfdir = ${prefix}/etc -sharedstatedir = ${prefix}/com -localstatedir = ${prefix}/var -libdir = ${exec_prefix}/lib -infodir = ${prefix}/info -mandir = ${prefix}/man -includedir = ${prefix}/include -oldincludedir = /usr/include - -DESTDIR = - -pkgdatadir = $(datadir)/tree-puzzle -pkglibdir = $(libdir)/tree-puzzle -pkgincludedir = $(includedir)/tree-puzzle - -top_builddir = .. - -ACLOCAL = aclocal -AUTOCONF = autoconf -AUTOMAKE = automake -AUTOHEADER = autoheader - -INSTALL = /usr/bin/install -c -INSTALL_PROGRAM = ${INSTALL} $(AM_INSTALL_PROGRAM_FLAGS) -INSTALL_DATA = ${INSTALL} -m 644 -INSTALL_SCRIPT = ${INSTALL_PROGRAM} -transform = s,x,x, - -NORMAL_INSTALL = : -PRE_INSTALL = : -POST_INSTALL = : -NORMAL_UNINSTALL = : -PRE_UNINSTALL = : -POST_UNINSTALL = : -CC = gcc -MAKEINFO = makeinfo -MPICC = -MPICC0 = -MPICC1 = -MPICC2 = -MPICC3 = -MPICC4 = -MPICC5 = -MPICFLAGS = -MPIDEFS = -MPILIBS = -PACKAGE = tree-puzzle -PPUZZLE = -VERSION = 5.0 - -EXTRA_DIST = manual.html ppuzzle.gif puzzle.gif -mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs -CONFIG_CLEAN_FILES = -DIST_COMMON = Makefile.am Makefile.in - - -DISTFILES = $(DIST_COMMON) $(SOURCES) $(HEADERS) $(TEXINFOS) $(EXTRA_DIST) - -TAR = gtar -GZIP_ENV = --best -all: all-redirect -.SUFFIXES: -$(srcdir)/Makefile.in: Makefile.am $(top_srcdir)/configure.in $(ACLOCAL_M4) - cd $(top_srcdir) && $(AUTOMAKE) --gnu --include-deps doc/Makefile - -Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status - cd $(top_builddir) \ - && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status - -tags: TAGS -TAGS: - - -distdir = $(top_builddir)/$(PACKAGE)-$(VERSION)/$(subdir) - -subdir = doc - -distdir: $(DISTFILES) - @for file in $(DISTFILES); do \ - d=$(srcdir); \ - if test -d $$d/$$file; then \ - cp -pr $$d/$$file $(distdir)/$$file; \ - else \ - test -f $(distdir)/$$file \ - || ln $$d/$$file $(distdir)/$$file 2> /dev/null \ - || cp -p $$d/$$file $(distdir)/$$file || :; \ - fi; \ - done -info-am: -info: info-am -dvi-am: -dvi: dvi-am -check-am: all-am -check: check-am -installcheck-am: -installcheck: installcheck-am -install-exec-am: -install-exec: install-exec-am - -install-data-am: -install-data: install-data-am - -install-am: all-am - @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am -install: install-am -uninstall-am: -uninstall: uninstall-am -all-am: Makefile -all-redirect: all-am -install-strip: - $(MAKE) $(AM_MAKEFLAGS) AM_INSTALL_PROGRAM_FLAGS=-s install -installdirs: - - -mostlyclean-generic: - -clean-generic: - -distclean-generic: - -rm -f Makefile $(CONFIG_CLEAN_FILES) - -rm -f config.cache config.log stamp-h stamp-h[0-9]* - -maintainer-clean-generic: -mostlyclean-am: mostlyclean-generic - -mostlyclean: mostlyclean-am - -clean-am: clean-generic mostlyclean-am - -clean: clean-am - -distclean-am: distclean-generic clean-am - -distclean: distclean-am - -maintainer-clean-am: maintainer-clean-generic distclean-am - @echo "This command is intended for maintainers to use;" - @echo "it deletes files that may require special tools to rebuild." - -maintainer-clean: maintainer-clean-am - -.PHONY: tags distdir info-am info dvi-am dvi check check-am \ -installcheck-am installcheck install-exec-am install-exec \ -install-data-am install-data install-am install uninstall-am uninstall \ -all-redirect all-am all installdirs mostlyclean-generic \ -distclean-generic clean-generic maintainer-clean-generic clean \ -mostlyclean distclean maintainer-clean - - -# Tell versions [3.59,3.63) of GNU make to not export all variables. -# Otherwise a system limit (for SysV at least) may be exceeded. -.NOEXPORT: diff --git a/forester/archive/RIO/others/puzzle_mod/doc/Makefile.am b/forester/archive/RIO/others/puzzle_mod/doc/Makefile.am deleted file mode 100644 index 3cb95e6..0000000 --- a/forester/archive/RIO/others/puzzle_mod/doc/Makefile.am +++ /dev/null @@ -1 +0,0 @@ -EXTRA_DIST = manual.html ppuzzle.gif puzzle.gif diff --git a/forester/archive/RIO/others/puzzle_mod/doc/Makefile.in b/forester/archive/RIO/others/puzzle_mod/doc/Makefile.in deleted file mode 100644 index b5588c3..0000000 --- a/forester/archive/RIO/others/puzzle_mod/doc/Makefile.in +++ /dev/null @@ -1,177 +0,0 @@ -# Makefile.in generated automatically by automake 1.4-p5 from Makefile.am - -# Copyright (C) 1994, 1995-8, 1999, 2001 Free Software Foundation, Inc. -# This Makefile.in is free software; the Free Software Foundation -# gives unlimited permission to copy and/or distribute it, -# with or without modifications, as long as this notice is preserved. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY, to the extent permitted by law; without -# even the implied warranty of MERCHANTABILITY or FITNESS FOR A -# PARTICULAR PURPOSE. - - -SHELL = @SHELL@ - -srcdir = @srcdir@ -top_srcdir = @top_srcdir@ -VPATH = @srcdir@ -prefix = @prefix@ -exec_prefix = @exec_prefix@ - -bindir = @bindir@ -sbindir = @sbindir@ -libexecdir = @libexecdir@ -datadir = @datadir@ -sysconfdir = @sysconfdir@ -sharedstatedir = @sharedstatedir@ -localstatedir = @localstatedir@ -libdir = @libdir@ -infodir = @infodir@ -mandir = @mandir@ -includedir = @includedir@ -oldincludedir = /usr/include - -DESTDIR = - -pkgdatadir = $(datadir)/@PACKAGE@ -pkglibdir = $(libdir)/@PACKAGE@ -pkgincludedir = $(includedir)/@PACKAGE@ - -top_builddir = .. - -ACLOCAL = @ACLOCAL@ -AUTOCONF = @AUTOCONF@ -AUTOMAKE = @AUTOMAKE@ -AUTOHEADER = @AUTOHEADER@ - -INSTALL = @INSTALL@ -INSTALL_PROGRAM = @INSTALL_PROGRAM@ $(AM_INSTALL_PROGRAM_FLAGS) -INSTALL_DATA = @INSTALL_DATA@ -INSTALL_SCRIPT = @INSTALL_SCRIPT@ -transform = @program_transform_name@ - -NORMAL_INSTALL = : -PRE_INSTALL = : -POST_INSTALL = : -NORMAL_UNINSTALL = : -PRE_UNINSTALL = : -POST_UNINSTALL = : -CC = @CC@ -MAKEINFO = @MAKEINFO@ -MPICC = @MPICC@ -MPICC0 = @MPICC0@ -MPICC1 = @MPICC1@ -MPICC2 = @MPICC2@ -MPICC3 = @MPICC3@ -MPICC4 = @MPICC4@ -MPICC5 = @MPICC5@ -MPICFLAGS = @MPICFLAGS@ -MPIDEFS = @MPIDEFS@ -MPILIBS = @MPILIBS@ -PACKAGE = @PACKAGE@ -PPUZZLE = @PPUZZLE@ -VERSION = @VERSION@ - -EXTRA_DIST = manual.html ppuzzle.gif puzzle.gif -mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs -CONFIG_CLEAN_FILES = -DIST_COMMON = Makefile.am Makefile.in - - -DISTFILES = $(DIST_COMMON) $(SOURCES) $(HEADERS) $(TEXINFOS) $(EXTRA_DIST) - -TAR = gtar -GZIP_ENV = --best -all: all-redirect -.SUFFIXES: -$(srcdir)/Makefile.in: Makefile.am $(top_srcdir)/configure.in $(ACLOCAL_M4) - cd $(top_srcdir) && $(AUTOMAKE) --gnu --include-deps doc/Makefile - -Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status - cd $(top_builddir) \ - && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status - -tags: TAGS -TAGS: - - -distdir = $(top_builddir)/$(PACKAGE)-$(VERSION)/$(subdir) - -subdir = doc - -distdir: $(DISTFILES) - @for file in $(DISTFILES); do \ - d=$(srcdir); \ - if test -d $$d/$$file; then \ - cp -pr $$d/$$file $(distdir)/$$file; \ - else \ - test -f $(distdir)/$$file \ - || ln $$d/$$file $(distdir)/$$file 2> /dev/null \ - || cp -p $$d/$$file $(distdir)/$$file || :; \ - fi; \ - done -info-am: -info: info-am -dvi-am: -dvi: dvi-am -check-am: all-am -check: check-am -installcheck-am: -installcheck: installcheck-am -install-exec-am: -install-exec: install-exec-am - -install-data-am: -install-data: install-data-am - -install-am: all-am - @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am -install: install-am -uninstall-am: -uninstall: uninstall-am -all-am: Makefile -all-redirect: all-am -install-strip: - $(MAKE) $(AM_MAKEFLAGS) AM_INSTALL_PROGRAM_FLAGS=-s install -installdirs: - - -mostlyclean-generic: - -clean-generic: - -distclean-generic: - -rm -f Makefile $(CONFIG_CLEAN_FILES) - -rm -f config.cache config.log stamp-h stamp-h[0-9]* - -maintainer-clean-generic: -mostlyclean-am: mostlyclean-generic - -mostlyclean: mostlyclean-am - -clean-am: clean-generic mostlyclean-am - -clean: clean-am - -distclean-am: distclean-generic clean-am - -distclean: distclean-am - -maintainer-clean-am: maintainer-clean-generic distclean-am - @echo "This command is intended for maintainers to use;" - @echo "it deletes files that may require special tools to rebuild." - -maintainer-clean: maintainer-clean-am - -.PHONY: tags distdir info-am info dvi-am dvi check check-am \ -installcheck-am installcheck install-exec-am install-exec \ -install-data-am install-data install-am install uninstall-am uninstall \ -all-redirect all-am all installdirs mostlyclean-generic \ -distclean-generic clean-generic maintainer-clean-generic clean \ -mostlyclean distclean maintainer-clean - - -# Tell versions [3.59,3.63) of GNU make to not export all variables. -# Otherwise a system limit (for SysV at least) may be exceeded. -.NOEXPORT: diff --git a/forester/archive/RIO/others/puzzle_mod/install-sh b/forester/archive/RIO/others/puzzle_mod/install-sh deleted file mode 100755 index e9de238..0000000 --- a/forester/archive/RIO/others/puzzle_mod/install-sh +++ /dev/null @@ -1,251 +0,0 @@ -#!/bin/sh -# -# install - install a program, script, or datafile -# This comes from X11R5 (mit/util/scripts/install.sh). -# -# Copyright 1991 by the Massachusetts Institute of Technology -# -# Permission to use, copy, modify, distribute, and sell this software and its -# documentation for any purpose is hereby granted without fee, provided that -# the above copyright notice appear in all copies and that both that -# copyright notice and this permission notice appear in supporting -# documentation, and that the name of M.I.T. not be used in advertising or -# publicity pertaining to distribution of the software without specific, -# written prior permission. M.I.T. makes no representations about the -# suitability of this software for any purpose. It is provided "as is" -# without express or implied warranty. -# -# Calling this script install-sh is preferred over install.sh, to prevent -# `make' implicit rules from creating a file called install from it -# when there is no Makefile. -# -# This script is compatible with the BSD install script, but was written -# from scratch. It can only install one file at a time, a restriction -# shared with many OS's install programs. - - -# set DOITPROG to echo to test this script - -# Don't use :- since 4.3BSD and earlier shells don't like it. -doit="${DOITPROG-}" - - -# put in absolute paths if you don't have them in your path; or use env. vars. - -mvprog="${MVPROG-mv}" -cpprog="${CPPROG-cp}" -chmodprog="${CHMODPROG-chmod}" -chownprog="${CHOWNPROG-chown}" -chgrpprog="${CHGRPPROG-chgrp}" -stripprog="${STRIPPROG-strip}" -rmprog="${RMPROG-rm}" -mkdirprog="${MKDIRPROG-mkdir}" - -transformbasename="" -transform_arg="" -instcmd="$mvprog" -chmodcmd="$chmodprog 0755" -chowncmd="" -chgrpcmd="" -stripcmd="" -rmcmd="$rmprog -f" -mvcmd="$mvprog" -src="" -dst="" -dir_arg="" - -while [ x"$1" != x ]; do - case $1 in - -c) instcmd="$cpprog" - shift - continue;; - - -d) dir_arg=true - shift - continue;; - - -m) chmodcmd="$chmodprog $2" - shift - shift - continue;; - - -o) chowncmd="$chownprog $2" - shift - shift - continue;; - - -g) chgrpcmd="$chgrpprog $2" - shift - shift - continue;; - - -s) stripcmd="$stripprog" - shift - continue;; - - -t=*) transformarg=`echo $1 | sed 's/-t=//'` - shift - continue;; - - -b=*) transformbasename=`echo $1 | sed 's/-b=//'` - shift - continue;; - - *) if [ x"$src" = x ] - then - src=$1 - else - # this colon is to work around a 386BSD /bin/sh bug - : - dst=$1 - fi - shift - continue;; - esac -done - -if [ x"$src" = x ] -then - echo "install: no input file specified" - exit 1 -else - true -fi - -if [ x"$dir_arg" != x ]; then - dst=$src - src="" - - if [ -d $dst ]; then - instcmd=: - chmodcmd="" - else - instcmd=mkdir - fi -else - -# Waiting for this to be detected by the "$instcmd $src $dsttmp" command -# might cause directories to be created, which would be especially bad -# if $src (and thus $dsttmp) contains '*'. - - if [ -f $src -o -d $src ] - then - true - else - echo "install: $src does not exist" - exit 1 - fi - - if [ x"$dst" = x ] - then - echo "install: no destination specified" - exit 1 - else - true - fi - -# If destination is a directory, append the input filename; if your system -# does not like double slashes in filenames, you may need to add some logic - - if [ -d $dst ] - then - dst="$dst"/`basename $src` - else - true - fi -fi - -## this sed command emulates the dirname command -dstdir=`echo $dst | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'` - -# Make sure that the destination directory exists. -# this part is taken from Noah Friedman's mkinstalldirs script - -# Skip lots of stat calls in the usual case. -if [ ! -d "$dstdir" ]; then -defaultIFS=' -' -IFS="${IFS-${defaultIFS}}" - -oIFS="${IFS}" -# Some sh's can't handle IFS=/ for some reason. -IFS='%' -set - `echo ${dstdir} | sed -e 's@/@%@g' -e 's@^%@/@'` -IFS="${oIFS}" - -pathcomp='' - -while [ $# -ne 0 ] ; do - pathcomp="${pathcomp}${1}" - shift - - if [ ! -d "${pathcomp}" ] ; - then - $mkdirprog "${pathcomp}" - else - true - fi - - pathcomp="${pathcomp}/" -done -fi - -if [ x"$dir_arg" != x ] -then - $doit $instcmd $dst && - - if [ x"$chowncmd" != x ]; then $doit $chowncmd $dst; else true ; fi && - if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dst; else true ; fi && - if [ x"$stripcmd" != x ]; then $doit $stripcmd $dst; else true ; fi && - if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dst; else true ; fi -else - -# If we're going to rename the final executable, determine the name now. - - if [ x"$transformarg" = x ] - then - dstfile=`basename $dst` - else - dstfile=`basename $dst $transformbasename | - sed $transformarg`$transformbasename - fi - -# don't allow the sed command to completely eliminate the filename - - if [ x"$dstfile" = x ] - then - dstfile=`basename $dst` - else - true - fi - -# Make a temp file name in the proper directory. - - dsttmp=$dstdir/#inst.$$# - -# Move or copy the file name to the temp name - - $doit $instcmd $src $dsttmp && - - trap "rm -f ${dsttmp}" 0 && - -# and set any options; do chmod last to preserve setuid bits - -# If any of these fail, we abort the whole thing. If we want to -# ignore errors from any of these, just make sure not to ignore -# errors from the above "$doit $instcmd $src $dsttmp" command. - - if [ x"$chowncmd" != x ]; then $doit $chowncmd $dsttmp; else true;fi && - if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dsttmp; else true;fi && - if [ x"$stripcmd" != x ]; then $doit $stripcmd $dsttmp; else true;fi && - if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dsttmp; else true;fi && - -# Now rename the file to the real destination. - - $doit $rmcmd -f $dstdir/$dstfile && - $doit $mvcmd $dsttmp $dstdir/$dstfile - -fi && - - -exit 0 diff --git a/forester/archive/RIO/others/puzzle_mod/missing b/forester/archive/RIO/others/puzzle_mod/missing deleted file mode 100755 index 7789652..0000000 --- a/forester/archive/RIO/others/puzzle_mod/missing +++ /dev/null @@ -1,190 +0,0 @@ -#! /bin/sh -# Common stub for a few missing GNU programs while installing. -# Copyright (C) 1996, 1997 Free Software Foundation, Inc. -# Franc,ois Pinard , 1996. - -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2, or (at your option) -# any later version. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. - -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA -# 02111-1307, USA. - -if test $# -eq 0; then - echo 1>&2 "Try \`$0 --help' for more information" - exit 1 -fi - -case "$1" in - - -h|--h|--he|--hel|--help) - echo "\ -$0 [OPTION]... PROGRAM [ARGUMENT]... - -Handle \`PROGRAM [ARGUMENT]...' for when PROGRAM is missing, or return an -error status if there is no known handling for PROGRAM. - -Options: - -h, --help display this help and exit - -v, --version output version information and exit - -Supported PROGRAM values: - aclocal touch file \`aclocal.m4' - autoconf touch file \`configure' - autoheader touch file \`config.h.in' - automake touch all \`Makefile.in' files - bison create \`y.tab.[ch]', if possible, from existing .[ch] - flex create \`lex.yy.c', if possible, from existing .c - lex create \`lex.yy.c', if possible, from existing .c - makeinfo touch the output file - yacc create \`y.tab.[ch]', if possible, from existing .[ch]" - ;; - - -v|--v|--ve|--ver|--vers|--versi|--versio|--version) - echo "missing - GNU libit 0.0" - ;; - - -*) - echo 1>&2 "$0: Unknown \`$1' option" - echo 1>&2 "Try \`$0 --help' for more information" - exit 1 - ;; - - aclocal) - echo 1>&2 "\ -WARNING: \`$1' is missing on your system. You should only need it if - you modified \`acinclude.m4' or \`configure.in'. You might want - to install the \`Automake' and \`Perl' packages. Grab them from - any GNU archive site." - touch aclocal.m4 - ;; - - autoconf) - echo 1>&2 "\ -WARNING: \`$1' is missing on your system. You should only need it if - you modified \`configure.in'. You might want to install the - \`Autoconf' and \`GNU m4' packages. Grab them from any GNU - archive site." - touch configure - ;; - - autoheader) - echo 1>&2 "\ -WARNING: \`$1' is missing on your system. You should only need it if - you modified \`acconfig.h' or \`configure.in'. You might want - to install the \`Autoconf' and \`GNU m4' packages. Grab them - from any GNU archive site." - files=`sed -n 's/^[ ]*A[CM]_CONFIG_HEADER(\([^)]*\)).*/\1/p' configure.in` - test -z "$files" && files="config.h" - touch_files= - for f in $files; do - case "$f" in - *:*) touch_files="$touch_files "`echo "$f" | - sed -e 's/^[^:]*://' -e 's/:.*//'`;; - *) touch_files="$touch_files $f.in";; - esac - done - touch $touch_files - ;; - - automake) - echo 1>&2 "\ -WARNING: \`$1' is missing on your system. You should only need it if - you modified \`Makefile.am', \`acinclude.m4' or \`configure.in'. - You might want to install the \`Automake' and \`Perl' packages. - Grab them from any GNU archive site." - find . -type f -name Makefile.am -print | - sed 's/\.am$/.in/' | - while read f; do touch "$f"; done - ;; - - bison|yacc) - echo 1>&2 "\ -WARNING: \`$1' is missing on your system. You should only need it if - you modified a \`.y' file. You may need the \`Bison' package - in order for those modifications to take effect. You can get - \`Bison' from any GNU archive site." - rm -f y.tab.c y.tab.h - if [ $# -ne 1 ]; then - eval LASTARG="\${$#}" - case "$LASTARG" in - *.y) - SRCFILE=`echo "$LASTARG" | sed 's/y$/c/'` - if [ -f "$SRCFILE" ]; then - cp "$SRCFILE" y.tab.c - fi - SRCFILE=`echo "$LASTARG" | sed 's/y$/h/'` - if [ -f "$SRCFILE" ]; then - cp "$SRCFILE" y.tab.h - fi - ;; - esac - fi - if [ ! -f y.tab.h ]; then - echo >y.tab.h - fi - if [ ! -f y.tab.c ]; then - echo 'main() { return 0; }' >y.tab.c - fi - ;; - - lex|flex) - echo 1>&2 "\ -WARNING: \`$1' is missing on your system. You should only need it if - you modified a \`.l' file. You may need the \`Flex' package - in order for those modifications to take effect. You can get - \`Flex' from any GNU archive site." - rm -f lex.yy.c - if [ $# -ne 1 ]; then - eval LASTARG="\${$#}" - case "$LASTARG" in - *.l) - SRCFILE=`echo "$LASTARG" | sed 's/l$/c/'` - if [ -f "$SRCFILE" ]; then - cp "$SRCFILE" lex.yy.c - fi - ;; - esac - fi - if [ ! -f lex.yy.c ]; then - echo 'main() { return 0; }' >lex.yy.c - fi - ;; - - makeinfo) - echo 1>&2 "\ -WARNING: \`$1' is missing on your system. You should only need it if - you modified a \`.texi' or \`.texinfo' file, or any other file - indirectly affecting the aspect of the manual. The spurious - call might also be the consequence of using a buggy \`make' (AIX, - DU, IRIX). You might want to install the \`Texinfo' package or - the \`GNU make' package. Grab either from any GNU archive site." - file=`echo "$*" | sed -n 's/.*-o \([^ ]*\).*/\1/p'` - if test -z "$file"; then - file=`echo "$*" | sed 's/.* \([^ ]*\) *$/\1/'` - file=`sed -n '/^@setfilename/ { s/.* \([^ ]*\) *$/\1/; p; q; }' $file` - fi - touch $file - ;; - - *) - echo 1>&2 "\ -WARNING: \`$1' is needed, and you do not seem to have it handy on your - system. You might have modified some files without having the - proper tools for further handling them. Check the \`README' file, - it often tells you about the needed prerequirements for installing - this package. You may also peek at any GNU archive site, in case - some other package would contain this missing \`$1' program." - exit 1 - ;; -esac - -exit 0 diff --git a/forester/archive/RIO/others/puzzle_mod/mkinstalldirs b/forester/archive/RIO/others/puzzle_mod/mkinstalldirs deleted file mode 100755 index 1d8b882..0000000 --- a/forester/archive/RIO/others/puzzle_mod/mkinstalldirs +++ /dev/null @@ -1,40 +0,0 @@ -#! /bin/sh -# mkinstalldirs --- make directory hierarchy -# Author: Noah Friedman -# Created: 1993-05-16 -# Public domain - -# $Id: mkinstalldirs,v 1.1.1.1 2005/03/22 08:35:12 cmzmasek Exp $ - -errstatus=0 - -for file -do - set fnord `echo ":$file" | sed -ne 's/^:\//#/;s/^://;s/\// /g;s/^#/\//;p'` - shift - - pathcomp= - for d - do - pathcomp="$pathcomp$d" - case "$pathcomp" in - -* ) pathcomp=./$pathcomp ;; - esac - - if test ! -d "$pathcomp"; then - echo "mkdir $pathcomp" - - mkdir "$pathcomp" || lasterr=$? - - if test ! -d "$pathcomp"; then - errstatus=$lasterr - fi - fi - - pathcomp="$pathcomp/" - done -done - -exit $errstatus - -# mkinstalldirs ends here diff --git a/forester/archive/RIO/others/puzzle_mod/src/00README b/forester/archive/RIO/others/puzzle_mod/src/00README deleted file mode 100644 index a50e005..0000000 --- a/forester/archive/RIO/others/puzzle_mod/src/00README +++ /dev/null @@ -1,97 +0,0 @@ -Modifications by Christian Zmasek ---------------------------------- - - -!WARNING: Use this modified version of TREE-PUZZLE 5.0 ONLY - together with FORESTER/RIO! - -!For all other puposes download the excellent original! - - -Changes: --------- - - -puzzle1.c: void putdistance(FILE *fp): - -remove: "/* seven in one row */ - if ((j + 1) % 7 == 0 && j+1 != Maxspc) - fprintf(fp, "\n ");" - - - - - -puzzle1.c: int main(int argc, char *argv[]): - -remove: -"FPRINTF(STDOUTFILE "Writing parameters to file %s\n", OUTFILE); - openfiletowrite(&ofp, OUTFILE, "general output"); - writeoutputfile(ofp,WRITEPARAMS); - fclose(ofp);" - -"openfiletoappend(&ofp, OUTFILE, "general output"); - writeoutputfile(ofp,WRITEREST);" - -"openfiletoappend(&ofp, OUTFILE, "general output"); - writeoutputfile(ofp,WRITEREST);" - -"openfiletoappend(&ofp, OUTFILE, "general output"); - writeoutputfile(ofp,WRITEREST);" - -"timestamp(ofp); - closefile(ofp);" - - - - -puzzle2.c: void getsizesites(FILE *ifp): - -257 -> 8000 - - - -puzzle2.c: void readid(FILE *infp, int t): - -for (i = 0; i < 10; i++) { -> for (i = 0; i < 26; i++) { - -for (i = 9; i > -1; i--) { -> for (i = 25; i > -1; i--) { - -for (j = 0; (j < 10) && (flag == TRUE); j++) -> for (j = 0; (j < 26) && (flag == TRUE); j++) - - - -puzzle2.c: void initid(int t): - -Identif = new_cmatrix(t, 10); -> Identif = new_cmatrix(t, 26); - -for (j = 0; j < 10; j++) -> for (j = 0; j < 26; j++) - - - -puzzle2.c: fputid10(FILE *ofp, int t): - -for (i = 0; i < 10; i++) -> for (i = 0; i < 26; i++) - - - -puzzle2.c: int fputid(FILE *ofp, int t): - -while (Identif[t][i] != ' ' && i < 10) { -> while (Identif[t][i] != ' ' && i < 26) { - - - - -ml2.c: Node *internalnode(Tree *tr, char **chpp, int *ninode): - -char ident[100], idcomp[11]; -> char ident[100], idcomp[27]; - -idcomp[10] = '\0'; -> idcomp[26] = '\0'; - -} while (!stop && (ff != 10)); -> } while (!stop && (ff != 26)); - - - - - - diff --git a/forester/archive/RIO/others/puzzle_mod/src/Makefile b/forester/archive/RIO/others/puzzle_mod/src/Makefile deleted file mode 100644 index 9c6d4c0..0000000 --- a/forester/archive/RIO/others/puzzle_mod/src/Makefile +++ /dev/null @@ -1,356 +0,0 @@ -# Generated automatically from Makefile.in by configure. -# Makefile.in generated automatically by automake 1.4 from Makefile.am - -# Copyright (C) 1994, 1995-8, 1999 Free Software Foundation, Inc. -# This Makefile.in is free software; the Free Software Foundation -# gives unlimited permission to copy and/or distribute it, -# with or without modifications, as long as this notice is preserved. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY, to the extent permitted by law; without -# even the implied warranty of MERCHANTABILITY or FITNESS FOR A -# PARTICULAR PURPOSE. - - -SHELL = /bin/sh - -srcdir = . -top_srcdir = .. -prefix = /usr/local -exec_prefix = ${prefix} - -bindir = ${exec_prefix}/bin -sbindir = ${exec_prefix}/sbin -libexecdir = ${exec_prefix}/libexec -datadir = ${prefix}/share -sysconfdir = ${prefix}/etc -sharedstatedir = ${prefix}/com -localstatedir = ${prefix}/var -libdir = ${exec_prefix}/lib -infodir = ${prefix}/info -mandir = ${prefix}/man -includedir = ${prefix}/include -oldincludedir = /usr/include - -DESTDIR = - -pkgdatadir = $(datadir)/tree-puzzle -pkglibdir = $(libdir)/tree-puzzle -pkgincludedir = $(includedir)/tree-puzzle - -top_builddir = .. - -ACLOCAL = aclocal -AUTOCONF = autoconf -AUTOMAKE = automake -AUTOHEADER = autoheader - -INSTALL = /usr/bin/install -c -INSTALL_PROGRAM = ${INSTALL} $(AM_INSTALL_PROGRAM_FLAGS) -INSTALL_DATA = ${INSTALL} -m 644 -INSTALL_SCRIPT = ${INSTALL_PROGRAM} -transform = s,x,x, - -NORMAL_INSTALL = : -PRE_INSTALL = : -POST_INSTALL = : -NORMAL_UNINSTALL = : -PRE_UNINSTALL = : -POST_UNINSTALL = : -CC = gcc -MAKEINFO = makeinfo -MPICC = -MPICC0 = -MPICC1 = -MPICC2 = -MPICC3 = -MPICC4 = -MPICC5 = -MPICFLAGS = -MPIDEFS = -MPILIBS = -PACKAGE = tree-puzzle -PPUZZLE = -VERSION = 5.0 - -bin_PROGRAMS = puzzle -EXTRA_PROGRAMS = ppuzzle - -puzzle_SOURCES = gamma.c ml1.c ml2.c ml3.c model1.c model2.c puzzle1.c puzzle2.c util.c ml.h util.h puzzle.h gamma.h -puzzle_LDADD = sgamma.o sml1.o sml2.o sml3.o smodel1.o smodel2.o spuzzle1.o spuzzle2.o sutil.o - -SDEFS = -SCFLAGS = -SLDFLAGS = -lm - -SCOMPILE = $(CC) $(SDEFS) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(SCFLAGS) $(CFLAGS) -SCCLD = $(CC) -SLINK = $(SCCLD) $(AM_CFLAGS) $(CFLAGS) $(SLDFLAGS) $(LDFLAGS) - -ppuzzle_SOURCES = gamma.c ml1.c ml2.c ml3.c model1.c model2.c puzzle1.c puzzle2.c sched.c util.c ppuzzle.c ml.h util.h puzzle.h gamma.h ppuzzle.h sched.h -ppuzzle_LDADD = pgamma.o pml1.o pml2.o pml3.o pmodel1.o pmodel2.o ppuzzle1.o ppuzzle2.o psched.o putil.o ppuzzle.o - -PCC = -PDEFS = -DPARALLEL -PCFLAGS = -PLDFLAGS = -lm - -PCOMPILE = $(PCC) $(PDEFS) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(PCFLAGS) $(CFLAGS) -PCCLD = $(PCC) -PLINK = $(PCCLD) $(AM_CFLAGS) $(PCFLAGS) $(CFLAGS) $(PLDFLAGS) $(LDFLAGS) -mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs -CONFIG_CLEAN_FILES = test -PROGRAMS = $(bin_PROGRAMS) - - -DEFS = -DPACKAGE=\"tree-puzzle\" -DVERSION=\"5.0\" -DHAVE_LIBM=1 -DSTDC_HEADERS=1 -DHAVE_LIMITS_H=1 -I. -I$(srcdir) -CPPFLAGS = -LDFLAGS = -LIBS = -lm -ppuzzle_OBJECTS = gamma.o ml1.o ml2.o ml3.o model1.o model2.o puzzle1.o \ -puzzle2.o sched.o util.o ppuzzle.o -ppuzzle_DEPENDENCIES = pgamma.o pml1.o pml2.o pml3.o pmodel1.o \ -pmodel2.o ppuzzle1.o ppuzzle2.o psched.o putil.o ppuzzle.o -ppuzzle_LDFLAGS = -puzzle_OBJECTS = gamma.o ml1.o ml2.o ml3.o model1.o model2.o puzzle1.o \ -puzzle2.o util.o -puzzle_DEPENDENCIES = sgamma.o sml1.o sml2.o sml3.o smodel1.o smodel2.o \ -spuzzle1.o spuzzle2.o sutil.o -puzzle_LDFLAGS = -CFLAGS = -g -O2 -COMPILE = $(CC) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -CCLD = $(CC) -LINK = $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(LDFLAGS) -o $@ -DIST_COMMON = README Makefile.am Makefile.in test.in - - -DISTFILES = $(DIST_COMMON) $(SOURCES) $(HEADERS) $(TEXINFOS) $(EXTRA_DIST) - -TAR = gtar -GZIP_ENV = --best -SOURCES = $(ppuzzle_SOURCES) $(puzzle_SOURCES) -OBJECTS = $(ppuzzle_OBJECTS) $(puzzle_OBJECTS) - -all: all-redirect -.SUFFIXES: -.SUFFIXES: .S .c .o .s -$(srcdir)/Makefile.in: Makefile.am $(top_srcdir)/configure.in $(ACLOCAL_M4) - cd $(top_srcdir) && $(AUTOMAKE) --gnu --include-deps src/Makefile - -Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status - cd $(top_builddir) \ - && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status - -test: $(top_builddir)/config.status test.in - cd $(top_builddir) && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status - -mostlyclean-binPROGRAMS: - -clean-binPROGRAMS: - -test -z "$(bin_PROGRAMS)" || rm -f $(bin_PROGRAMS) - -distclean-binPROGRAMS: - -maintainer-clean-binPROGRAMS: - -install-binPROGRAMS: $(bin_PROGRAMS) - @$(NORMAL_INSTALL) - $(mkinstalldirs) $(DESTDIR)$(bindir) - @list='$(bin_PROGRAMS)'; for p in $$list; do \ - if test -f $$p; then \ - echo " $(INSTALL_PROGRAM) $$p $(DESTDIR)$(bindir)/`echo $$p|sed 's/$(EXEEXT)$$//'|sed '$(transform)'|sed 's/$$/$(EXEEXT)/'`"; \ - $(INSTALL_PROGRAM) $$p $(DESTDIR)$(bindir)/`echo $$p|sed 's/$(EXEEXT)$$//'|sed '$(transform)'|sed 's/$$/$(EXEEXT)/'`; \ - else :; fi; \ - done - -uninstall-binPROGRAMS: - @$(NORMAL_UNINSTALL) - list='$(bin_PROGRAMS)'; for p in $$list; do \ - rm -f $(DESTDIR)$(bindir)/`echo $$p|sed 's/$(EXEEXT)$$//'|sed '$(transform)'|sed 's/$$/$(EXEEXT)/'`; \ - done - -.c.o: - $(COMPILE) -c $< - -.s.o: - $(COMPILE) -c $< - -.S.o: - $(COMPILE) -c $< - -mostlyclean-compile: - -rm -f *.o core *.core - -clean-compile: - -distclean-compile: - -rm -f *.tab.c - -maintainer-clean-compile: - -tags: TAGS - -ID: $(HEADERS) $(SOURCES) $(LISP) - list='$(SOURCES) $(HEADERS)'; \ - unique=`for i in $$list; do echo $$i; done | \ - awk ' { files[$$0] = 1; } \ - END { for (i in files) print i; }'`; \ - here=`pwd` && cd $(srcdir) \ - && mkid -f$$here/ID $$unique $(LISP) - -TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) $(LISP) - tags=; \ - here=`pwd`; \ - list='$(SOURCES) $(HEADERS)'; \ - unique=`for i in $$list; do echo $$i; done | \ - awk ' { files[$$0] = 1; } \ - END { for (i in files) print i; }'`; \ - test -z "$(ETAGS_ARGS)$$unique$(LISP)$$tags" \ - || (cd $(srcdir) && etags $(ETAGS_ARGS) $$tags $$unique $(LISP) -o $$here/TAGS) - -mostlyclean-tags: - -clean-tags: - -distclean-tags: - -rm -f TAGS ID - -maintainer-clean-tags: - -distdir = $(top_builddir)/$(PACKAGE)-$(VERSION)/$(subdir) - -subdir = src - -distdir: $(DISTFILES) - @for file in $(DISTFILES); do \ - d=$(srcdir); \ - if test -d $$d/$$file; then \ - cp -pr $$d/$$file $(distdir)/$$file; \ - else \ - test -f $(distdir)/$$file \ - || ln $$d/$$file $(distdir)/$$file 2> /dev/null \ - || cp -p $$d/$$file $(distdir)/$$file || :; \ - fi; \ - done -info-am: -info: info-am -dvi-am: -dvi: dvi-am -check-am: all-am -check: check-am -installcheck-am: -installcheck: installcheck-am -install-exec-am: install-binPROGRAMS -install-exec: install-exec-am - -install-data-am: -install-data: install-data-am - -install-am: all-am - @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am -install: install-am -uninstall-am: uninstall-binPROGRAMS -uninstall: uninstall-am -all-am: Makefile $(PROGRAMS) -all-redirect: all-am -install-strip: - $(MAKE) $(AM_MAKEFLAGS) AM_INSTALL_PROGRAM_FLAGS=-s install -installdirs: - $(mkinstalldirs) $(DESTDIR)$(bindir) - - -mostlyclean-generic: - -clean-generic: - -distclean-generic: - -rm -f Makefile $(CONFIG_CLEAN_FILES) - -rm -f config.cache config.log stamp-h stamp-h[0-9]* - -maintainer-clean-generic: -mostlyclean-am: mostlyclean-binPROGRAMS mostlyclean-compile \ - mostlyclean-tags mostlyclean-generic - -mostlyclean: mostlyclean-am - -clean-am: clean-binPROGRAMS clean-compile clean-tags clean-generic \ - mostlyclean-am - -clean: clean-am - -distclean-am: distclean-binPROGRAMS distclean-compile distclean-tags \ - distclean-generic clean-am - -distclean: distclean-am - -maintainer-clean-am: maintainer-clean-binPROGRAMS \ - maintainer-clean-compile maintainer-clean-tags \ - maintainer-clean-generic distclean-am - @echo "This command is intended for maintainers to use;" - @echo "it deletes files that may require special tools to rebuild." - -maintainer-clean: maintainer-clean-am - -.PHONY: mostlyclean-binPROGRAMS distclean-binPROGRAMS clean-binPROGRAMS \ -maintainer-clean-binPROGRAMS uninstall-binPROGRAMS install-binPROGRAMS \ -mostlyclean-compile distclean-compile clean-compile \ -maintainer-clean-compile tags mostlyclean-tags distclean-tags \ -clean-tags maintainer-clean-tags distdir info-am info dvi-am dvi check \ -check-am installcheck-am installcheck install-exec-am install-exec \ -install-data-am install-data install-am install uninstall-am uninstall \ -all-redirect all-am all installdirs mostlyclean-generic \ -distclean-generic clean-generic maintainer-clean-generic clean \ -mostlyclean distclean maintainer-clean - - -puzzle: $(puzzle_LDADD) $(puzzle_SOURCES) - $(SLINK) $(puzzle_LDADD) -o $@ - -sml1.o: ml1.c ml.h util.h - $(SCOMPILE) -c ml1.c && mv ml1.o $@ -sml2.o: ml2.c ml.h util.h - $(SCOMPILE) -c ml2.c && mv ml2.o $@ -sml3.o: ml3.c ml.h util.h gamma.h - $(SCOMPILE) -c ml3.c && mv ml3.o $@ -smodel1.o: model1.c ml.h util.h - $(SCOMPILE) -c model1.c && mv model1.o $@ -smodel2.o: model2.c ml.h util.h - $(SCOMPILE) -c model2.c && mv model2.o $@ -spuzzle1.o: puzzle1.c ml.h util.h puzzle.h gamma.h ppuzzle.h - $(SCOMPILE) -c puzzle1.c && mv puzzle1.o $@ -spuzzle2.o: puzzle2.c ml.h util.h puzzle.h ppuzzle.h - $(SCOMPILE) -c puzzle2.c && mv puzzle2.o $@ -sutil.o: util.c util.h - $(SCOMPILE) -c util.c && mv util.o $@ -sgamma.o: gamma.c gamma.h util.h - $(SCOMPILE) -c gamma.c && mv gamma.o $@ - -ppuzzle: $(ppuzzle_LDADD) $(ppuzzle_SOURCES) - $(PLINK) $(ppuzzle_LDADD) -o $@ - -pml1.o: ml1.c ml.h util.h - $(PCOMPILE) -c ml1.c && mv ml1.o $@ -pml2.o: ml2.c ml.h util.h - $(PCOMPILE) -c ml2.c && mv ml2.o $@ -pml3.o: ml3.c ml.h util.h gamma.h - $(PCOMPILE) -c ml3.c && mv ml3.o $@ -pmodel1.o: model1.c ml.h util.h - $(PCOMPILE) -c model1.c && mv model1.o $@ -pmodel2.o: model2.c ml.h util.h - $(PCOMPILE) -c model2.c && mv model2.o $@ -ppuzzle1.o: puzzle1.c ml.h util.h puzzle.h gamma.h ppuzzle.h - $(PCOMPILE) -c puzzle1.c && mv puzzle1.o $@ -ppuzzle2.o: puzzle2.c ml.h util.h puzzle.h ppuzzle.h - $(PCOMPILE) -c puzzle2.c && mv puzzle2.o $@ -putil.o: util.c util.h - $(PCOMPILE) -c util.c && mv util.o $@ -pgamma.o: gamma.c gamma.h util.h - $(PCOMPILE) -c gamma.c && mv gamma.o $@ -psched.o: sched.c sched.h ppuzzle.h - $(PCOMPILE) -c sched.c && mv sched.o $@ -ppuzzle.o: ppuzzle.c ppuzzle.h ml.h util.h puzzle.h gamma.h sched.h - $(PCOMPILE) -c ppuzzle.c - -# Tell versions [3.59,3.63) of GNU make to not export all variables. -# Otherwise a system limit (for SysV at least) may be exceeded. -.NOEXPORT: diff --git a/forester/archive/RIO/others/puzzle_mod/src/Makefile.am b/forester/archive/RIO/others/puzzle_mod/src/Makefile.am deleted file mode 100644 index 3b88a39..0000000 --- a/forester/archive/RIO/others/puzzle_mod/src/Makefile.am +++ /dev/null @@ -1,77 +0,0 @@ -bin_PROGRAMS = puzzle @PPUZZLE@ -EXTRA_PROGRAMS = ppuzzle - -puzzle_SOURCES = gamma.c ml1.c ml2.c ml3.c model1.c model2.c puzzle1.c puzzle2.c util.c ml.h util.h puzzle.h gamma.h -puzzle_LDADD = sgamma.o sml1.o sml2.o sml3.o smodel1.o smodel2.o spuzzle1.o spuzzle2.o sutil.o - -SDEFS = -SCFLAGS = -SLDFLAGS = @LIBS@ - -SCOMPILE = $(CC) $(SDEFS) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(SCFLAGS) $(CFLAGS) -SCCLD = $(CC) -SLINK = $(SCCLD) $(AM_CFLAGS) $(CFLAGS) $(SLDFLAGS) $(LDFLAGS) - -ppuzzle_SOURCES = gamma.c ml1.c ml2.c ml3.c model1.c model2.c puzzle1.c puzzle2.c sched.c util.c ppuzzle.c ml.h util.h puzzle.h gamma.h ppuzzle.h sched.h -ppuzzle_LDADD = pgamma.o pml1.o pml2.o pml3.o pmodel1.o pmodel2.o ppuzzle1.o ppuzzle2.o psched.o putil.o ppuzzle.o - -PCC = @MPICC@ -PDEFS = -DPARALLEL -PCFLAGS = -PLDFLAGS = @LIBS@ @MPILIBS@ - -PCOMPILE = $(PCC) $(PDEFS) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(PCFLAGS) $(CFLAGS) -PCCLD = $(PCC) -PLINK = $(PCCLD) $(AM_CFLAGS) $(PCFLAGS) $(CFLAGS) $(PLDFLAGS) $(LDFLAGS) - - -puzzle: $(puzzle_LDADD) $(puzzle_SOURCES) - $(SLINK) $(puzzle_LDADD) -o $@ - -sml1.o: ml1.c ml.h util.h - $(SCOMPILE) -c ml1.c && mv ml1.o $@ -sml2.o: ml2.c ml.h util.h - $(SCOMPILE) -c ml2.c && mv ml2.o $@ -sml3.o: ml3.c ml.h util.h gamma.h - $(SCOMPILE) -c ml3.c && mv ml3.o $@ -smodel1.o: model1.c ml.h util.h - $(SCOMPILE) -c model1.c && mv model1.o $@ -smodel2.o: model2.c ml.h util.h - $(SCOMPILE) -c model2.c && mv model2.o $@ -spuzzle1.o: puzzle1.c ml.h util.h puzzle.h gamma.h ppuzzle.h - $(SCOMPILE) -c puzzle1.c && mv puzzle1.o $@ -spuzzle2.o: puzzle2.c ml.h util.h puzzle.h ppuzzle.h - $(SCOMPILE) -c puzzle2.c && mv puzzle2.o $@ -sutil.o: util.c util.h - $(SCOMPILE) -c util.c && mv util.o $@ -sgamma.o: gamma.c gamma.h util.h - $(SCOMPILE) -c gamma.c && mv gamma.o $@ - - - -ppuzzle: $(ppuzzle_LDADD) $(ppuzzle_SOURCES) - $(PLINK) $(ppuzzle_LDADD) -o $@ - -pml1.o: ml1.c ml.h util.h - $(PCOMPILE) -c ml1.c && mv ml1.o $@ -pml2.o: ml2.c ml.h util.h - $(PCOMPILE) -c ml2.c && mv ml2.o $@ -pml3.o: ml3.c ml.h util.h gamma.h - $(PCOMPILE) -c ml3.c && mv ml3.o $@ -pmodel1.o: model1.c ml.h util.h - $(PCOMPILE) -c model1.c && mv model1.o $@ -pmodel2.o: model2.c ml.h util.h - $(PCOMPILE) -c model2.c && mv model2.o $@ -ppuzzle1.o: puzzle1.c ml.h util.h puzzle.h gamma.h ppuzzle.h - $(PCOMPILE) -c puzzle1.c && mv puzzle1.o $@ -ppuzzle2.o: puzzle2.c ml.h util.h puzzle.h ppuzzle.h - $(PCOMPILE) -c puzzle2.c && mv puzzle2.o $@ -putil.o: util.c util.h - $(PCOMPILE) -c util.c && mv util.o $@ -pgamma.o: gamma.c gamma.h util.h - $(PCOMPILE) -c gamma.c && mv gamma.o $@ -psched.o: sched.c sched.h ppuzzle.h - $(PCOMPILE) -c sched.c && mv sched.o $@ -ppuzzle.o: ppuzzle.c ppuzzle.h ml.h util.h puzzle.h gamma.h sched.h - $(PCOMPILE) -c ppuzzle.c - diff --git a/forester/archive/RIO/others/puzzle_mod/src/Makefile.in b/forester/archive/RIO/others/puzzle_mod/src/Makefile.in deleted file mode 100644 index ab15dd4..0000000 --- a/forester/archive/RIO/others/puzzle_mod/src/Makefile.in +++ /dev/null @@ -1,356 +0,0 @@ -# Makefile.in generated automatically by automake 1.4 from Makefile.am - -# Copyright (C) 1994, 1995-8, 1999 Free Software Foundation, Inc. -# This Makefile.in is free software; the Free Software Foundation -# gives unlimited permission to copy and/or distribute it, -# with or without modifications, as long as this notice is preserved. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY, to the extent permitted by law; without -# even the implied warranty of MERCHANTABILITY or FITNESS FOR A -# PARTICULAR PURPOSE. - - -SHELL = @SHELL@ - -srcdir = @srcdir@ -top_srcdir = @top_srcdir@ -VPATH = @srcdir@ -prefix = @prefix@ -exec_prefix = @exec_prefix@ - -bindir = @bindir@ -sbindir = @sbindir@ -libexecdir = @libexecdir@ -datadir = @datadir@ -sysconfdir = @sysconfdir@ -sharedstatedir = @sharedstatedir@ -localstatedir = @localstatedir@ -libdir = @libdir@ -infodir = @infodir@ -mandir = @mandir@ -includedir = @includedir@ -oldincludedir = /usr/include - -DESTDIR = - -pkgdatadir = $(datadir)/@PACKAGE@ -pkglibdir = $(libdir)/@PACKAGE@ -pkgincludedir = $(includedir)/@PACKAGE@ - -top_builddir = .. - -ACLOCAL = @ACLOCAL@ -AUTOCONF = @AUTOCONF@ -AUTOMAKE = @AUTOMAKE@ -AUTOHEADER = @AUTOHEADER@ - -INSTALL = @INSTALL@ -INSTALL_PROGRAM = @INSTALL_PROGRAM@ $(AM_INSTALL_PROGRAM_FLAGS) -INSTALL_DATA = @INSTALL_DATA@ -INSTALL_SCRIPT = @INSTALL_SCRIPT@ -transform = @program_transform_name@ - -NORMAL_INSTALL = : -PRE_INSTALL = : -POST_INSTALL = : -NORMAL_UNINSTALL = : -PRE_UNINSTALL = : -POST_UNINSTALL = : -CC = @CC@ -MAKEINFO = @MAKEINFO@ -MPICC = @MPICC@ -MPICC0 = @MPICC0@ -MPICC1 = @MPICC1@ -MPICC2 = @MPICC2@ -MPICC3 = @MPICC3@ -MPICC4 = @MPICC4@ -MPICC5 = @MPICC5@ -MPICFLAGS = @MPICFLAGS@ -MPIDEFS = @MPIDEFS@ -MPILIBS = @MPILIBS@ -PACKAGE = @PACKAGE@ -PPUZZLE = @PPUZZLE@ -VERSION = @VERSION@ - -bin_PROGRAMS = puzzle @PPUZZLE@ -EXTRA_PROGRAMS = ppuzzle - -puzzle_SOURCES = gamma.c ml1.c ml2.c ml3.c model1.c model2.c puzzle1.c puzzle2.c util.c ml.h util.h puzzle.h gamma.h -puzzle_LDADD = sgamma.o sml1.o sml2.o sml3.o smodel1.o smodel2.o spuzzle1.o spuzzle2.o sutil.o - -SDEFS = -SCFLAGS = -SLDFLAGS = @LIBS@ - -SCOMPILE = $(CC) $(SDEFS) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(SCFLAGS) $(CFLAGS) -SCCLD = $(CC) -SLINK = $(SCCLD) $(AM_CFLAGS) $(CFLAGS) $(SLDFLAGS) $(LDFLAGS) - -ppuzzle_SOURCES = gamma.c ml1.c ml2.c ml3.c model1.c model2.c puzzle1.c puzzle2.c sched.c util.c ppuzzle.c ml.h util.h puzzle.h gamma.h ppuzzle.h sched.h -ppuzzle_LDADD = pgamma.o pml1.o pml2.o pml3.o pmodel1.o pmodel2.o ppuzzle1.o ppuzzle2.o psched.o putil.o ppuzzle.o - -PCC = @MPICC@ -PDEFS = -DPARALLEL -PCFLAGS = -PLDFLAGS = @LIBS@ @MPILIBS@ - -PCOMPILE = $(PCC) $(PDEFS) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(PCFLAGS) $(CFLAGS) -PCCLD = $(PCC) -PLINK = $(PCCLD) $(AM_CFLAGS) $(PCFLAGS) $(CFLAGS) $(PLDFLAGS) $(LDFLAGS) -mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs -CONFIG_CLEAN_FILES = test -PROGRAMS = $(bin_PROGRAMS) - - -DEFS = @DEFS@ -I. -I$(srcdir) -CPPFLAGS = @CPPFLAGS@ -LDFLAGS = @LDFLAGS@ -LIBS = @LIBS@ -ppuzzle_OBJECTS = gamma.o ml1.o ml2.o ml3.o model1.o model2.o puzzle1.o \ -puzzle2.o sched.o util.o ppuzzle.o -ppuzzle_DEPENDENCIES = pgamma.o pml1.o pml2.o pml3.o pmodel1.o \ -pmodel2.o ppuzzle1.o ppuzzle2.o psched.o putil.o ppuzzle.o -ppuzzle_LDFLAGS = -puzzle_OBJECTS = gamma.o ml1.o ml2.o ml3.o model1.o model2.o puzzle1.o \ -puzzle2.o util.o -puzzle_DEPENDENCIES = sgamma.o sml1.o sml2.o sml3.o smodel1.o smodel2.o \ -spuzzle1.o spuzzle2.o sutil.o -puzzle_LDFLAGS = -CFLAGS = @CFLAGS@ -COMPILE = $(CC) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -CCLD = $(CC) -LINK = $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(LDFLAGS) -o $@ -DIST_COMMON = README Makefile.am Makefile.in test.in - - -DISTFILES = $(DIST_COMMON) $(SOURCES) $(HEADERS) $(TEXINFOS) $(EXTRA_DIST) - -TAR = gtar -GZIP_ENV = --best -SOURCES = $(ppuzzle_SOURCES) $(puzzle_SOURCES) -OBJECTS = $(ppuzzle_OBJECTS) $(puzzle_OBJECTS) - -all: all-redirect -.SUFFIXES: -.SUFFIXES: .S .c .o .s -$(srcdir)/Makefile.in: Makefile.am $(top_srcdir)/configure.in $(ACLOCAL_M4) - cd $(top_srcdir) && $(AUTOMAKE) --gnu --include-deps src/Makefile - -Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status - cd $(top_builddir) \ - && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status - -test: $(top_builddir)/config.status test.in - cd $(top_builddir) && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status - -mostlyclean-binPROGRAMS: - -clean-binPROGRAMS: - -test -z "$(bin_PROGRAMS)" || rm -f $(bin_PROGRAMS) - -distclean-binPROGRAMS: - -maintainer-clean-binPROGRAMS: - -install-binPROGRAMS: $(bin_PROGRAMS) - @$(NORMAL_INSTALL) - $(mkinstalldirs) $(DESTDIR)$(bindir) - @list='$(bin_PROGRAMS)'; for p in $$list; do \ - if test -f $$p; then \ - echo " $(INSTALL_PROGRAM) $$p $(DESTDIR)$(bindir)/`echo $$p|sed 's/$(EXEEXT)$$//'|sed '$(transform)'|sed 's/$$/$(EXEEXT)/'`"; \ - $(INSTALL_PROGRAM) $$p $(DESTDIR)$(bindir)/`echo $$p|sed 's/$(EXEEXT)$$//'|sed '$(transform)'|sed 's/$$/$(EXEEXT)/'`; \ - else :; fi; \ - done - -uninstall-binPROGRAMS: - @$(NORMAL_UNINSTALL) - list='$(bin_PROGRAMS)'; for p in $$list; do \ - rm -f $(DESTDIR)$(bindir)/`echo $$p|sed 's/$(EXEEXT)$$//'|sed '$(transform)'|sed 's/$$/$(EXEEXT)/'`; \ - done - -.c.o: - $(COMPILE) -c $< - -.s.o: - $(COMPILE) -c $< - -.S.o: - $(COMPILE) -c $< - -mostlyclean-compile: - -rm -f *.o core *.core - -clean-compile: - -distclean-compile: - -rm -f *.tab.c - -maintainer-clean-compile: - -tags: TAGS - -ID: $(HEADERS) $(SOURCES) $(LISP) - list='$(SOURCES) $(HEADERS)'; \ - unique=`for i in $$list; do echo $$i; done | \ - awk ' { files[$$0] = 1; } \ - END { for (i in files) print i; }'`; \ - here=`pwd` && cd $(srcdir) \ - && mkid -f$$here/ID $$unique $(LISP) - -TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) $(LISP) - tags=; \ - here=`pwd`; \ - list='$(SOURCES) $(HEADERS)'; \ - unique=`for i in $$list; do echo $$i; done | \ - awk ' { files[$$0] = 1; } \ - END { for (i in files) print i; }'`; \ - test -z "$(ETAGS_ARGS)$$unique$(LISP)$$tags" \ - || (cd $(srcdir) && etags $(ETAGS_ARGS) $$tags $$unique $(LISP) -o $$here/TAGS) - -mostlyclean-tags: - -clean-tags: - -distclean-tags: - -rm -f TAGS ID - -maintainer-clean-tags: - -distdir = $(top_builddir)/$(PACKAGE)-$(VERSION)/$(subdir) - -subdir = src - -distdir: $(DISTFILES) - @for file in $(DISTFILES); do \ - d=$(srcdir); \ - if test -d $$d/$$file; then \ - cp -pr $$d/$$file $(distdir)/$$file; \ - else \ - test -f $(distdir)/$$file \ - || ln $$d/$$file $(distdir)/$$file 2> /dev/null \ - || cp -p $$d/$$file $(distdir)/$$file || :; \ - fi; \ - done -info-am: -info: info-am -dvi-am: -dvi: dvi-am -check-am: all-am -check: check-am -installcheck-am: -installcheck: installcheck-am -install-exec-am: install-binPROGRAMS -install-exec: install-exec-am - -install-data-am: -install-data: install-data-am - -install-am: all-am - @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am -install: install-am -uninstall-am: uninstall-binPROGRAMS -uninstall: uninstall-am -all-am: Makefile $(PROGRAMS) -all-redirect: all-am -install-strip: - $(MAKE) $(AM_MAKEFLAGS) AM_INSTALL_PROGRAM_FLAGS=-s install -installdirs: - $(mkinstalldirs) $(DESTDIR)$(bindir) - - -mostlyclean-generic: - -clean-generic: - -distclean-generic: - -rm -f Makefile $(CONFIG_CLEAN_FILES) - -rm -f config.cache config.log stamp-h stamp-h[0-9]* - -maintainer-clean-generic: -mostlyclean-am: mostlyclean-binPROGRAMS mostlyclean-compile \ - mostlyclean-tags mostlyclean-generic - -mostlyclean: mostlyclean-am - -clean-am: clean-binPROGRAMS clean-compile clean-tags clean-generic \ - mostlyclean-am - -clean: clean-am - -distclean-am: distclean-binPROGRAMS distclean-compile distclean-tags \ - distclean-generic clean-am - -distclean: distclean-am - -maintainer-clean-am: maintainer-clean-binPROGRAMS \ - maintainer-clean-compile maintainer-clean-tags \ - maintainer-clean-generic distclean-am - @echo "This command is intended for maintainers to use;" - @echo "it deletes files that may require special tools to rebuild." - -maintainer-clean: maintainer-clean-am - -.PHONY: mostlyclean-binPROGRAMS distclean-binPROGRAMS clean-binPROGRAMS \ -maintainer-clean-binPROGRAMS uninstall-binPROGRAMS install-binPROGRAMS \ -mostlyclean-compile distclean-compile clean-compile \ -maintainer-clean-compile tags mostlyclean-tags distclean-tags \ -clean-tags maintainer-clean-tags distdir info-am info dvi-am dvi check \ -check-am installcheck-am installcheck install-exec-am install-exec \ -install-data-am install-data install-am install uninstall-am uninstall \ -all-redirect all-am all installdirs mostlyclean-generic \ -distclean-generic clean-generic maintainer-clean-generic clean \ -mostlyclean distclean maintainer-clean - - -puzzle: $(puzzle_LDADD) $(puzzle_SOURCES) - $(SLINK) $(puzzle_LDADD) -o $@ - -sml1.o: ml1.c ml.h util.h - $(SCOMPILE) -c ml1.c && mv ml1.o $@ -sml2.o: ml2.c ml.h util.h - $(SCOMPILE) -c ml2.c && mv ml2.o $@ -sml3.o: ml3.c ml.h util.h gamma.h - $(SCOMPILE) -c ml3.c && mv ml3.o $@ -smodel1.o: model1.c ml.h util.h - $(SCOMPILE) -c model1.c && mv model1.o $@ -smodel2.o: model2.c ml.h util.h - $(SCOMPILE) -c model2.c && mv model2.o $@ -spuzzle1.o: puzzle1.c ml.h util.h puzzle.h gamma.h ppuzzle.h - $(SCOMPILE) -c puzzle1.c && mv puzzle1.o $@ -spuzzle2.o: puzzle2.c ml.h util.h puzzle.h ppuzzle.h - $(SCOMPILE) -c puzzle2.c && mv puzzle2.o $@ -sutil.o: util.c util.h - $(SCOMPILE) -c util.c && mv util.o $@ -sgamma.o: gamma.c gamma.h util.h - $(SCOMPILE) -c gamma.c && mv gamma.o $@ - -ppuzzle: $(ppuzzle_LDADD) $(ppuzzle_SOURCES) - $(PLINK) $(ppuzzle_LDADD) -o $@ - -pml1.o: ml1.c ml.h util.h - $(PCOMPILE) -c ml1.c && mv ml1.o $@ -pml2.o: ml2.c ml.h util.h - $(PCOMPILE) -c ml2.c && mv ml2.o $@ -pml3.o: ml3.c ml.h util.h gamma.h - $(PCOMPILE) -c ml3.c && mv ml3.o $@ -pmodel1.o: model1.c ml.h util.h - $(PCOMPILE) -c model1.c && mv model1.o $@ -pmodel2.o: model2.c ml.h util.h - $(PCOMPILE) -c model2.c && mv model2.o $@ -ppuzzle1.o: puzzle1.c ml.h util.h puzzle.h gamma.h ppuzzle.h - $(PCOMPILE) -c puzzle1.c && mv puzzle1.o $@ -ppuzzle2.o: puzzle2.c ml.h util.h puzzle.h ppuzzle.h - $(PCOMPILE) -c puzzle2.c && mv puzzle2.o $@ -putil.o: util.c util.h - $(PCOMPILE) -c util.c && mv util.o $@ -pgamma.o: gamma.c gamma.h util.h - $(PCOMPILE) -c gamma.c && mv gamma.o $@ -psched.o: sched.c sched.h ppuzzle.h - $(PCOMPILE) -c sched.c && mv sched.o $@ -ppuzzle.o: ppuzzle.c ppuzzle.h ml.h util.h puzzle.h gamma.h sched.h - $(PCOMPILE) -c ppuzzle.c - -# Tell versions [3.59,3.63) of GNU make to not export all variables. -# Otherwise a system limit (for SysV at least) may be exceeded. -.NOEXPORT: diff --git a/forester/archive/RIO/others/puzzle_mod/src/README b/forester/archive/RIO/others/puzzle_mod/src/README deleted file mode 100644 index 9c89883..0000000 --- a/forester/archive/RIO/others/puzzle_mod/src/README +++ /dev/null @@ -1 +0,0 @@ -Sources of the TREE-PUZZLE package diff --git a/forester/archive/RIO/others/puzzle_mod/src/gamma.c b/forester/archive/RIO/others/puzzle_mod/src/gamma.c deleted file mode 100644 index ee1f6df..0000000 --- a/forester/archive/RIO/others/puzzle_mod/src/gamma.c +++ /dev/null @@ -1,346 +0,0 @@ -/* - * gamma.c - * - * - * Part of TREE-PUZZLE 5.0 (June 2000) - * - * (c) 1999-2000 by Heiko A. Schmidt, Korbinian Strimmer, - * M. Vingron, and Arndt von Haeseler - * (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler - * - * All parts of the source except where indicated are distributed under - * the GNU public licence. See http://www.opensource.org for details. - */ - -#include -#include "util.h" -#include "gamma.h" - -/* private prototypes */ -static double IncompleteGamma (double x, double alpha, double ln_gamma_alpha); -static double PointNormal (double prob); -static double PointChi2 (double prob, double v); - -/* Gamma density function */ -double densityGamma (double x, double shape) -{ - return pow (shape, shape) * pow (x, shape-1) / - exp (shape*x + LnGamma(shape)); -} - -/* Gamma cdf */ -double cdfGamma (double x, double shape) -{ - double result; - - result = IncompleteGamma (shape*x, shape, LnGamma(shape)); - - return result; -} - -/* Gamma inverse cdf */ -double icdfGamma (double y, double shape) -{ - double result; - - result = PointChi2 (y, 2.0*shape)/(2.0*shape); - - /* to avoid -1.0 */ - if (result < 0.0) - { - result = 0.0; - } - - return result; -} - -/* Gamma n-th moment */ -double momentGamma (int n, double shape) -{ - int i; - double tmp = 1.0; - - for (i = 1; i < n; i++) - { - tmp *= (shape + i)/shape; - } - - return tmp; -} - -/* The following code comes from tools.c in Yang's PAML package */ - -double LnGamma (double alpha) -{ -/* returns ln(gamma(alpha)) for alpha>0, accurate to 10 decimal places. - Stirling's formula is used for the central polynomial part of the procedure. - Pike MC & Hill ID (1966) Algorithm 291: Logarithm of the gamma function. - Communications of the Association for Computing Machinery, 9:684 -*/ - double x=alpha, f=0, z; - - if (x<7) { - f=1; z=x-1; - while (++z<7) f*=z; - x=z; f=-log(f); - } - z = 1/(x*x); - return f + (x-0.5)*log(x) - x + .918938533204673 - + (((-.000595238095238*z+.000793650793651)*z-.002777777777778)*z - +.083333333333333)/x; -} - -static double IncompleteGamma (double x, double alpha, double ln_gamma_alpha) -{ -/* returns the incomplete gamma ratio I(x,alpha) where x is the upper - limit of the integration and alpha is the shape parameter. - returns (-1) if in error - (1) series expansion if (alpha>x || x<=1) - (2) continued fraction otherwise - RATNEST FORTRAN by - Bhattacharjee GP (1970) The incomplete gamma integral. Applied Statistics, - 19: 285-287 (AS32) -*/ - int i; - double p=alpha, g=ln_gamma_alpha; - double accurate=1e-8, overflow=1e30; - double factor, gin=0, rn=0, a=0,b=0,an=0,dif=0, term=0, pn[6]; - - if (x==0) return (0); - if (x<0 || p<=0) return (-1); - - factor=exp(p*log(x)-x-g); - if (x>1 && x>=p) goto l30; - /* (1) series expansion */ - gin=1; term=1; rn=p; - l20: - rn++; - term*=x/rn; gin+=term; - - if (term > accurate) goto l20; - gin*=factor/p; - goto l50; - l30: - /* (2) continued fraction */ - a=1-p; b=a+x+1; term=0; - pn[0]=1; pn[1]=x; pn[2]=x+1; pn[3]=x*b; - gin=pn[2]/pn[3]; - l32: - a++; b+=2; term++; an=a*term; - for (i=0; i<2; i++) pn[i+4]=b*pn[i+2]-an*pn[i]; - if (pn[5] == 0) goto l35; - rn=pn[4]/pn[5]; dif=fabs(gin-rn); - if (dif>accurate) goto l34; - if (dif<=accurate*rn) goto l42; - l34: - gin=rn; - l35: - for (i=0; i<4; i++) pn[i]=pn[i+2]; - if (fabs(pn[4]) < overflow) goto l32; - for (i=0; i<4; i++) pn[i]/=overflow; - goto l32; - l42: - gin=1-factor*gin; - - l50: - return (gin); -} - - -/* functions concerning the CDF and percentage points of the gamma and - Chi2 distribution -*/ -static double PointNormal (double prob) -{ -/* returns z so that Prob{x.999998 || v<=0) return (-1); - - g = LnGamma (v/2); - xx=v/2; c=xx-1; - if (v >= -1.24*log(p)) goto l1; - - ch=pow((p*xx*exp(g+xx*aa)), 1/xx); - if (ch-e<0) return (ch); - goto l4; -l1: - if (v>.32) goto l3; - ch=0.4; a=log(1-p); -l2: - q=ch; p1=1+ch*(4.67+ch); p2=ch*(6.73+ch*(6.66+ch)); - t=-0.5+(4.67+2*ch)/p1 - (6.73+ch*(13.32+3*ch))/p2; - ch-=(1-exp(a+g+.5*ch+c*aa)*p2/p1)/t; - if (fabs(q/ch-1)-.01 <= 0) goto l4; - else goto l2; - -l3: - x=PointNormal (p); - p1=0.222222/v; ch=v*pow((x*sqrt(p1)+1-p1), 3.0); - if (ch>2.2*v+6) ch=-2*(log(1-p)-c*log(.5*ch)+g); -l4: - - do - { - q=ch; p1=.5*ch; - if ((t=IncompleteGamma (p1, xx, g))<0) { - return (-1); - } - p2=p-t; - t=p2*exp(xx*aa+g+p1-c*log(ch)); - b=t/ch; a=0.5*t-b*c; - - s1=(210+a*(140+a*(105+a*(84+a*(70+60*a))))) / 420; - s2=(420+a*(735+a*(966+a*(1141+1278*a))))/2520; - s3=(210+a*(462+a*(707+932*a)))/2520; - s4=(252+a*(672+1182*a)+c*(294+a*(889+1740*a)))/5040; - s5=(84+264*a+c*(175+606*a))/2520; - s6=(120+c*(346+127*c))/5040; - ch+=t*(1+0.5*t*s1-b*c*(s1-b*(s2-b*(s3-b*(s4-b*(s5-b*s6)))))); - } - while (fabs(q/ch-1) > e); - - return (ch); -} - - -/* Incomplete Gamma function Q(a,x) - - this is a cleanroom implementation of NRs gammq(a,x) -*/ -double IncompleteGammaQ (double a, double x) -{ - return 1.0-IncompleteGamma (x, a, LnGamma(a)); -} - - -/* probability that the observed chi-square - exceeds chi2 even if model is correct */ -double chi2prob (int deg, double chi2) -{ - return IncompleteGammaQ (0.5*deg, 0.5*chi2); -} - - - -/* chi square test - ef expected frequencies (sum up to 1 !!) - of observed frequencies (sum up to the number of samples) - numcat number of categories - returns critical significance level */ -double chi2test(double *ef, int *of, int numcat, int *chi2fail) -{ - double chi2, criticals, efn; - int i, below1, below5, reducedcat; - int samples; - - *chi2fail = FALSE; - reducedcat = numcat; - below1 = 0; - below5 = 0; - - /* compute number of samples */ - samples = 0; - for (i = 0; i < numcat; i++) - samples = samples + of[i]; - - /* compute chi square */ - chi2 = 0; - for (i = 0; i < numcat; i++) { - efn = ef[i]*((double) samples); - if (efn < 1.0) below1++; - if (efn < 5.0) below5++; - if (efn == 0.0) { - reducedcat--; - fprintf(stdout, "FPE error: samples=%d, ef[%d]=%f, of[%d]=%d, efn=%f, nc=%d, rc=%d\n", - samples, i, ef[i], i, of[i], efn, numcat, reducedcat); - fprintf(stdout, "PLEASE REPORT THIS ERROR TO DEVELOPERS !!!\n"); - fflush(stdout); - } else chi2 = chi2 + ((double) of[i]-efn)*((double) of[i]-efn)/efn; - } - - /* compute significance */ - criticals = chi2prob (numcat-1, chi2); - - /* no expected frequency category (sum up to # samples) below 1.0 */ - if (below1 > 0) *chi2fail = TRUE; - /* no more than 1/5 of the frequency categories below 5.0 */ - if (below5 > (int) floor(samples/5.0)) *chi2fail = TRUE; - - return criticals; -} - - -/* chi square test - ef expected frequencies (sum up to 1 !!) - of observed frequencies (sum up to the number of samples) - numcat number of categories - returns critical significance level */ -double altchi2test(double *ef, int *of, int numcat, int *chi2fail) -{ - double chi2, criticals, efn; - int i, below1, below5; - int samples; - - *chi2fail = FALSE; - below1 = 0; - below5 = 0; - - /* compute number of samples */ - samples = 0; - for (i = 0; i < numcat; i++) - samples = samples + of[i]; - - /* compute chi square */ - chi2 = 0; - for (i = 0; i < numcat; i++) { - efn = ef[i]*((double) samples); - if (efn < 1.0) below1++; - if (efn < 5.0) below5++; - chi2 = chi2 + ((double) of[i]-efn)*((double) of[i]-efn)/efn; - } - - /* compute significance */ - criticals = chi2prob (numcat-1, chi2); - - /* no expected frequency category (sum up to # samples) below 1.0 */ - if (below1 > 0) *chi2fail = TRUE; - /* no more than 1/5 of the frequency categories below 5.0 */ - if (below5 > (int) floor(samples/5.0)) *chi2fail = TRUE; - - return criticals; -} diff --git a/forester/archive/RIO/others/puzzle_mod/src/gamma.h b/forester/archive/RIO/others/puzzle_mod/src/gamma.h deleted file mode 100644 index 975f4ee..0000000 --- a/forester/archive/RIO/others/puzzle_mod/src/gamma.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - * gamma.h - * - * - * Part of TREE-PUZZLE 5.0 (June 2000) - * - * (c) 1999-2000 by Heiko A. Schmidt, Korbinian Strimmer, - * M. Vingron, and Arndt von Haeseler - * (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler - * - * All parts of the source except where indicated are distributed under - * the GNU public licence. See http://www.opensource.org for details. - */ - -#ifndef _GAMMA_ -#define _GAMMA_ - -double densityGamma (double, double); -double cdfGamma (double, double); -double icdfGamma (double, double); -double momentGamma (int, double); - -double LnGamma (double); -double IncompleteGammaQ (double, double); - -double chi2prob (int, double); -double chi2test (double *, int *, int , int *); - - -#endif /* _GAMMA_ */ diff --git a/forester/archive/RIO/others/puzzle_mod/src/ml.h b/forester/archive/RIO/others/puzzle_mod/src/ml.h deleted file mode 100644 index a0aa981..0000000 --- a/forester/archive/RIO/others/puzzle_mod/src/ml.h +++ /dev/null @@ -1,279 +0,0 @@ -/* - * ml.h - * - * - * Part of TREE-PUZZLE 5.0 (June 2000) - * - * (c) 1999-2000 by Heiko A. Schmidt, Korbinian Strimmer, - * M. Vingron, and Arndt von Haeseler - * (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler - * - * All parts of the source except where indicated are distributed under - * the GNU public licence. See http://www.opensource.org for details. - */ - - -#ifndef _ML_ -#define _ML_ - -/* definitions */ - -#define MINTS 0.20 /* Ts/Tv parameter */ -#define MAXTS 30.0 -#define MINYR 0.10 /* Y/R Ts parameter */ -#define MAXYR 6.00 -#define MINFI 0.00 /* fraction invariable sites */ -#define MAXFI 0.99 /* only for input */ -#define MINGE 0.01 /* rate heterogeneity parameter */ -#define MAXGE 0.99 -#define MINCAT 4 /* discrete Gamma categories */ -#define MAXCAT 16 - -#define RMHROOT 5.0 /* upper relative bound for height of root */ -#define MAXARC 900.0 /* upper limit on branch length (PAM) = 6.0 */ -#define MINARC 0.001 /* lower limit on branch length (PAM) = 0.00001 */ -#define EPSILON 0.0001 /* error in branch length (PAM) = 0.000001 */ -#define HEPSILON 0.0001 /* error in node and root heights */ -#define MAXIT 100 /* maximum number of iterates of smoothing */ -#define MINFDIFF 0.00002 /* lower limit on base frequency differences */ -#define MINFREQ 0.0001 /* lower limit on base frequencies = 0.01% */ -#define NUMQBRNCH 5 /* number of branches in a quartet */ -#define NUMQIBRNCH 1 /* number of internal branches in a quartet */ -#define NUMQSPC 4 /* number of sequences in a quartet */ - -/* 2D minimisation */ -#define PEPS1 0.01 /* epsilon substitution process estimation */ -#define PEPS2 0.01 /* epsilon rate heterogeneity estimation */ - -/* quartet series */ -#define MINPERTAXUM 2 -#define MAXPERTAXUM 6 -#define TSDIFF 0.20 -#define YRDIFF 0.10 - -/* type definitions */ - -typedef struct node -{ - struct node *isop; - struct node *kinp; - int descen; - int number; - double length; - double lengthc; - double varlen; - double height; - double varheight; - ivector paths; - cvector eprob; - dcube partials; /* partial likelihoods */ - char *label; /* internal labels */ -} Node; - -typedef struct tree -{ - Node *rootp; - Node **ebrnchp; /* list of pointers to external branches */ - Node **ibrnchp; /* list of pointers to internal branches */ - double lklhd; /* total log-likelihood */ - double lklhdc; /* total log-likelihood clock */ - dmatrix condlkl; /* likelihoods for each pattern and non-zero rate */ - double rssleast; -} Tree; - - -/* global variables */ - -EXTERN Node *chep; /* pointer to current height node */ -EXTERN Node *rootbr; /* pointer to root branch */ -EXTERN Node **heights; /* pointer to height nodes in unrooted tree */ -EXTERN int Numhts; /* number of height nodes in unrooted tree */ -EXTERN double hroot; /* height of root */ -EXTERN double varhroot; /* variance of height of root */ -EXTERN double maxhroot; /* maximal height of root */ -EXTERN int locroot; /* location of root */ -EXTERN int numbestroot; /* number of best locations for root */ -EXTERN int clockmode; /* clocklike vs. nonclocklike computation */ -EXTERN cmatrix Identif; /* sequence names */ -EXTERN cmatrix Seqchar; /* ML sequence data */ -EXTERN cmatrix Seqpat; /* ordered site patterns */ -EXTERN ivector constpat; /* indicates constant site patterns */ -EXTERN cvector seqchi; -EXTERN cvector seqchj; -EXTERN dcube partiali; -EXTERN dcube partialj; -EXTERN dcube ltprobr; /* transition probabilites (for all non-zero rates */ -EXTERN dmatrix Distanmat; /* matrix with maximum likelihood distances */ -EXTERN dmatrix Evec; /* Eigenvectors */ -EXTERN dmatrix Ievc; /* Inverse eigenvectors */ -EXTERN double TSparam; /* Ts/Tv parameter */ -EXTERN double tsmean, yrmean; -EXTERN double YRparam; /* Y/R Ts parameter */ -EXTERN double geerr; /* estimated error of rate heterogeneity */ -EXTERN double Geta; /* rate heterogeneity parameter */ -EXTERN double fracconst; /* fraction of constant sites */ -EXTERN double fracconstpat;/* fraction of constant patterns */ -EXTERN double Proportion; /* for tree drawing */ -EXTERN double tserr; /* estimated error of TSparam */ -EXTERN double yrerr; /* estimated error of YRparam */ -EXTERN double fracinv; /* fraction of invariable sites */ -EXTERN double fierr; /* estimated error of fracinv */ -EXTERN dvector Brnlength; -EXTERN dvector Distanvec; -EXTERN dvector Eval; /* Eigenvalues of 1 PAM rate matrix */ -EXTERN dvector Freqtpm; /* base frequencies */ -EXTERN dvector Rates; /* rate of each of the categories */ -EXTERN dmatrix iexp; -EXTERN imatrix Basecomp; /* base composition of each taxon */ -EXTERN ivector usedtaxa; /* list needed in the input treefile procedure */ -EXTERN int numtc; /* auxiliary variable for printing rooted tree */ -EXTERN int qcalg_optn; /* use quartet subsampling algorithm */ -EXTERN int approxp_optn; /* approximate parameter estimation */ -EXTERN int chi2fail; /* flag for chi2 test */ -EXTERN int Converg; /* flag for ML convergence (no clock) */ -EXTERN int Convergc; /* flag for ML convergence (clock) */ -EXTERN int data_optn; /* type of sequence input data */ -EXTERN int Dayhf_optn; /* Dayhoff model */ -EXTERN int HKY_optn; /* use HKY model */ -EXTERN int Jtt_optn; /* JTT model */ -EXTERN int blosum62_optn; /* BLOSUM 62 model */ -EXTERN int mtrev_optn; /* mtREV model */ -EXTERN int cprev_optn; /* cpREV model */ -EXTERN int vtmv_optn; /* VT model */ -EXTERN int wag_optn; /* WAG model */ -EXTERN int Maxsite; /* number of ML characters per taxum */ -EXTERN int Maxspc; /* number of sequences */ -EXTERN int mlmode; /* quartet ML or user defined tree ML */ -EXTERN int nuc_optn; /* nucleotide (4x4) models */ -EXTERN int Numbrnch; /* number of branches of current tree */ -EXTERN int numcats; /* number of rate categories */ -EXTERN int Numconst; /* number of constant sites */ -EXTERN int Numconstpat; /* number of constant patterns */ -EXTERN int Numibrnch; /* number of internal branches of current tree */ -EXTERN int Numitc; /* number of ML iterations assumning clock */ -EXTERN int Numit; /* number of ML iterations if there is convergence */ -EXTERN int Numptrn; /* number of site patterns */ -EXTERN int Numspc; /* number of sequences of current tree */ -EXTERN int optim_optn; /* optimize model parameters */ -EXTERN int grate_optim; /* optimize Gamma rate heterogeneity parameter */ -EXTERN int SH_optn; /* SH nucleotide (16x16) model */ -EXTERN int TN_optn; /* use TN model */ -EXTERN int tpmradix; /* number of different states */ -EXTERN int fracinv_optim; /* optimize fraction of invariable sites */ -EXTERN int typ_optn; /* type of PUZZLE analysis */ -EXTERN ivector Weight; /* weight of each site pattern */ -EXTERN Tree *Ctree; /* pointer to current tree */ -EXTERN ulivector badtaxon; /* involment of each taxon in a bad quartet */ -EXTERN int qca, qcb, qcc, qcd; /* quartet currently optimized */ -EXTERN ivector Alias; /* link site -> corresponding site pattern */ -EXTERN ivector bestrate; /* optimal assignment of rates to sequence sites */ - -EXTERN int bestratefound; - -/* function prototypes of all ml function */ - -void convfreq(dvector); -void radixsort(cmatrix, ivector, int, int, int *); -void condenceseq(cmatrix, ivector, cmatrix, ivector, int, int, int); -void countconstantsites(cmatrix, ivector, int, int, int *, int*); -void evaluateseqs(void); -void elmhes(dmatrix, ivector, int); -void eltran(dmatrix, dmatrix, ivector, int); -void mcdiv(double, double, double, double, double *, double *); -void hqr2(int, int, int, dmatrix, dmatrix, dvector, dvector); -void onepamratematrix(dmatrix); -void eigensystem(dvector, dmatrix); -void luinverse(dmatrix, dmatrix, int); -void checkevector(dmatrix, dmatrix, int); -void tranprobmat(void); -void tprobmtrx(double, dmatrix); -double comptotloglkl(dmatrix); -void allsitelkl(dmatrix, dvector); -double pairlkl(double); -double mldistance(int, int); -void initdistan(void); -void computedistan(void); -void productpartials(Node *); -void partialsinternal(Node *); -void partialsexternal(Node *); -void initpartials(Tree *); -double intlkl(double); -void optinternalbranch(Node *); -double extlkl(double); -void optexternalbranch(Node *); -void finishlkl(Node *); -double optlkl(Tree *); -double treelkl(Tree *); -void luequation(dmatrix, dvector, int); -void lslength(Tree *, dvector, int, int, dvector); - -void getusertree(FILE *, cvector, int); -Node *internalnode(Tree *, char **, int *); -void constructtree(Tree *, cvector); -void removebasalbif(cvector); -void makeusertree(FILE *); -Tree *new_tree(int, int, cmatrix); -Tree *new_quartet(int, cmatrix); -void free_tree(Tree *, int); -void make_quartet(int, int, int, int); -void changedistan(dmatrix, dvector, int); -double quartet_lklhd(int, int, int, int); -double quartet_alklhd(int, int, int, int); -void readusertree(FILE *); -double usertree_lklhd(void); -double usertree_alklhd(void); -void mlstart(void); -void distupdate(int, int, int, int); -void mlfinish(void); -void prbranch(Node *, int, int, int, ivector, ivector, FILE *); -void getproportion(double *, dvector, int); -void prtopology(FILE *); -void fputphylogeny(FILE *); -void resulttree(FILE *); -void njtree(FILE *); -void njdistantree(Tree *); -void findbestratecombination(void); -void printbestratecombination(FILE *); -int checkedge(int); -void fputsubstree(FILE *, Node *); -void fputrooted(FILE *, int); -void findheights(Node *); -void initclock(int); -double clock_alklhd(int); -double heightlkl(double); -void optheight(void); -double rheightlkl(double); -void optrheight(void); -double clock_lklhd(int); -int findrootedge(void); -void resultheights(FILE *); - -double homogentest(int); -void YangDiscreteGamma(double, int, double *); -void updaterates(void); -void computestat(double *, int, double *, double *); -double quartetml(int, int, int, int); -double opttsq(double); -double optyrq(double); -void optimseqevolparamsq(void); -double opttst(double); -double optyrt(double); -void optimseqevolparamst(void); -double optfi(double); -double optge(double); -void optimrateparams(void); - -int gettpmradix(void); -void rtfdata(dmatrix, double *); -int code2int(cvector); -char *int2code(int); - -void jttdata(dmatrix, double *); -void dyhfdata(dmatrix, double *); -void mtrevdata(dmatrix, double *); -void cprev45data(dmatrix, double *); -void blosum62data(dmatrix, double *); -void vtmvdata(dmatrix, double *); -void wagdata(dmatrix, double *); - -#endif diff --git a/forester/archive/RIO/others/puzzle_mod/src/ml1.c b/forester/archive/RIO/others/puzzle_mod/src/ml1.c deleted file mode 100644 index 0e905ef..0000000 --- a/forester/archive/RIO/others/puzzle_mod/src/ml1.c +++ /dev/null @@ -1,1734 +0,0 @@ -/* - * ml1.c - * - * - * Part of TREE-PUZZLE 5.0 (June 2000) - * - * (c) 1999-2000 by Heiko A. Schmidt, Korbinian Strimmer, - * M. Vingron, and Arndt von Haeseler - * (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler - * - * All parts of the source except where indicated are distributed under - * the GNU public licence. See http://www.opensource.org for details. - */ - - -/******************************************************************************/ -/* definitions and prototypes */ -/******************************************************************************/ - -#define EXTERN extern - -/* prototypes */ -#include -#include -#include -#include -#include "util.h" -#include "ml.h" - -#define STDOUT stdout -#ifndef PARALLEL /* because printf() runs significantly faster */ - /* than fprintf(stdout) on an Apple McIntosh */ - /* (HS) */ -# define FPRINTF printf -# define STDOUTFILE -#else -# define FPRINTF fprintf -# define STDOUTFILE STDOUT, -#endif - - -/******************************************************************************/ -/* compacting sequence data information */ -/******************************************************************************/ - - -/***************************** internal functions *****************************/ - - -/* make all frequencies a little different */ -void convfreq(dvector freqemp) -{ - int i, j, maxi=0; - double freq, maxfreq, sum; - - - sum = 0.0; - maxfreq = 0.0; - for (i = 0; i < tpmradix; i++) { - freq = freqemp[i]; - if (freq < MINFREQ) freqemp[i] = MINFREQ; - if (freq > maxfreq) { - maxfreq = freq; - maxi = i; - } - sum += freqemp[i]; - } - freqemp[maxi] += 1.0 - sum; - - for (i = 0; i < tpmradix - 1; i++) { - for (j = i + 1; j < tpmradix; j++) { - if (freqemp[i] == freqemp[j]) { - freqemp[i] += MINFDIFF/2.0; - freqemp[j] -= MINFDIFF/2.0; - } - } - } -} - -/* sort site patters of original input data */ -void radixsort(cmatrix seqchar, ivector ali, int maxspc, int maxsite, - int *numptrn) -{ - int i, j, k, l, n, pass; - int *awork; - int *count; - - - awork = new_ivector(maxsite); - count = new_ivector(tpmradix+1); - for (i = 0; i < maxsite; i++) - ali[i] = i; - for (pass = maxspc - 1; pass >= 0; pass--) { - for (j = 0; j < tpmradix+1; j++) - count[j] = 0; - for (i = 0; i < maxsite; i++) - count[(int) seqchar[pass][ali[i]]]++; - for (j = 1; j < tpmradix+1; j++) - count[j] += count[j-1]; - for (i = maxsite-1; i >= 0; i--) - awork[ --count[(int) seqchar[pass][ali[i]]] ] = ali[i]; - for (i = 0; i < maxsite; i++) - ali[i] = awork[i]; - } - free_ivector(awork); - free_ivector(count); - n = 1; - for (j = 1; j < maxsite; j++) { - k = ali[j]; - l = ali[j-1]; - for (i = 0; i < maxspc; i++) { - if (seqchar[i][l] != seqchar[i][k]) { - n++; - break; - } - } - } - *numptrn = n; -} - - -void condenceseq(cmatrix seqchar, ivector ali, cmatrix seqconint, - ivector weight, int maxspc, int maxsite, int numptrn) -{ - int i, j, k, n; - int agree_flag; /* boolean */ - - - n = 0; - k = ali[n]; - for (i = 0; i < maxspc; i++) { - seqconint[i][n] = seqchar[i][k]; - } - weight[n] = 1; - Alias[k] = 0; - for (j = 1; j < maxsite; j++) { - k = ali[j]; - agree_flag = TRUE; - for (i = 0; i < maxspc; i++) { - if (seqconint[i][n] != seqchar[i][k]) { - agree_flag = FALSE; - break; - } - } - if (agree_flag == FALSE) { - n++; - for (i = 0; i < maxspc; i++) { - seqconint[i][n] = seqchar[i][k]; - } - weight[n] = 1; - Alias[k] = n; - } else { - weight[n]++; - Alias[k] = n; - } - } - n++; - if (numptrn != n) { - /* Problem in condenceseq */ - FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR A TO DEVELOPERS\n\n\n"); - exit(1); - } -} - -void countconstantsites(cmatrix seqpat, ivector weight, int maxspc, int numptrn, - int *numconst, int *numconstpat) -{ - int character, s, i, constflag; - - *numconst = 0; - *numconstpat = 0; - for (s = 0; s < numptrn; s++) { /* check all patterns */ - constpat[s] = FALSE; - constflag = TRUE; - character = seqpat[0][s]; - for (i = 1; i < maxspc; i++) { - if (seqpat[i][s] != character) { - constflag = FALSE; - break; - } - } - if (character != tpmradix && constflag) { - (*numconst) = (*numconst) + weight[s]; - (*numconstpat)++; - constpat[s] = TRUE; - } - } -} - -/***************************** exported functions *****************************/ - - -void evaluateseqs() -{ - ivector ali; - - convfreq(Freqtpm); /* make all frequencies slightly different */ - ali = new_ivector(Maxsite); - radixsort(Seqchar, ali, Maxspc, Maxsite, &Numptrn); - Seqpat = new_cmatrix(Maxspc, Numptrn); - constpat = new_ivector(Numptrn); - Weight = new_ivector(Numptrn); - condenceseq(Seqchar, ali, Seqpat, Weight, Maxspc, Maxsite, Numptrn); - free_ivector(ali); - countconstantsites(Seqpat, Weight, Maxspc, Numptrn, &Numconst, &Numconstpat); - fracconstpat = (double) Numconstpat / (double) Numptrn; - fracconst = (double) Numconst / (double) Maxsite; -} - - -/******************************************************************************/ -/* computation of Pij(t) */ -/******************************************************************************/ - - -/***************************** internal functions *****************************/ - - -void elmhes(dmatrix a, ivector ordr, int n) -{ - int m, j, i; - double y, x; - - - for (i = 0; i < n; i++) - ordr[i] = 0; - for (m = 2; m < n; m++) { - x = 0.0; - i = m; - for (j = m; j <= n; j++) { - if (fabs(a[j - 1][m - 2]) > fabs(x)) { - x = a[j - 1][m - 2]; - i = j; - } - } - ordr[m - 1] = i; /* vector */ - if (i != m) { - for (j = m - 2; j < n; j++) { - y = a[i - 1][j]; - a[i - 1][j] = a[m - 1][j]; - a[m - 1][j] = y; - } - for (j = 0; j < n; j++) { - y = a[j][i - 1]; - a[j][i - 1] = a[j][m - 1]; - a[j][m - 1] = y; - } - } - if (x != 0.0) { - for (i = m; i < n; i++) { - y = a[i][m - 2]; - if (y != 0.0) { - y /= x; - a[i][m - 2] = y; - for (j = m - 1; j < n; j++) - a[i][j] -= y * a[m - 1][j]; - for (j = 0; j < n; j++) - a[j][m - 1] += y * a[j][i]; - } - } - } - } -} - - -void eltran(dmatrix a, dmatrix zz, ivector ordr, int n) -{ - int i, j, m; - - - for (i = 0; i < n; i++) { - for (j = i + 1; j < n; j++) { - zz[i][j] = 0.0; - zz[j][i] = 0.0; - } - zz[i][i] = 1.0; - } - if (n <= 2) - return; - for (m = n - 1; m >= 2; m--) { - for (i = m; i < n; i++) - zz[i][m - 1] = a[i][m - 2]; - i = ordr[m - 1]; - if (i != m) { - for (j = m - 1; j < n; j++) { - zz[m - 1][j] = zz[i - 1][j]; - zz[i - 1][j] = 0.0; - } - zz[i - 1][m - 1] = 1.0; - } - } -} - - -void mcdiv(double ar, double ai, double br, double bi, - double *cr, double *ci) -{ - double s, ars, ais, brs, bis; - - - s = fabs(br) + fabs(bi); - ars = ar / s; - ais = ai / s; - brs = br / s; - bis = bi / s; - s = brs * brs + bis * bis; - *cr = (ars * brs + ais * bis) / s; - *ci = (ais * brs - ars * bis) / s; -} - - -void hqr2(int n, int low, int hgh, dmatrix h, - dmatrix zz, dvector wr, dvector wi) -{ - int i, j, k, l=0, m, en, na, itn, its; - double p=0, q=0, r=0, s=0, t, w, x=0, y, ra, sa, vi, vr, z=0, norm, tst1, tst2; - int notlas; /* boolean */ - - - norm = 0.0; - k = 1; - /* store isolated roots and compute matrix norm */ - for (i = 0; i < n; i++) { - for (j = k - 1; j < n; j++) - norm += fabs(h[i][j]); - k = i + 1; - if (i + 1 < low || i + 1 > hgh) { - wr[i] = h[i][i]; - wi[i] = 0.0; - } - } - en = hgh; - t = 0.0; - itn = n * 30; - while (en >= low) { /* search for next eigenvalues */ - its = 0; - na = en - 1; - while (en >= 1) { - /* look for single small sub-diagonal element */ - for (l = en; l > low; l--) { - s = fabs(h[l - 2][l - 2]) + fabs(h[l - 1][l - 1]); - if (s == 0.0) - s = norm; - tst1 = s; - tst2 = tst1 + fabs(h[l - 1][l - 2]); - if (tst2 == tst1) - goto L100; - } - l = low; - L100: - x = h[en - 1][en - 1]; /* form shift */ - if (l == en || l == na) - break; - if (itn == 0) { - /* all eigenvalues have not converged */ - FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR B TO DEVELOPERS\n\n\n"); - exit(1); - } - y = h[na - 1][na - 1]; - w = h[en - 1][na - 1] * h[na - 1][en - 1]; - /* form exceptional shift */ - if (its == 10 || its == 20) { - t += x; - for (i = low - 1; i < en; i++) - h[i][i] -= x; - s = fabs(h[en - 1][na - 1]) + fabs(h[na - 1][en - 3]); - x = 0.75 * s; - y = x; - w = -0.4375 * s * s; - } - its++; - itn--; - /* look for two consecutive small sub-diagonal elements */ - for (m = en - 2; m >= l; m--) { - z = h[m - 1][m - 1]; - r = x - z; - s = y - z; - p = (r * s - w) / h[m][m - 1] + h[m - 1][m]; - q = h[m][m] - z - r - s; - r = h[m + 1][m]; - s = fabs(p) + fabs(q) + fabs(r); - p /= s; - q /= s; - r /= s; - if (m == l) - break; - tst1 = fabs(p) * - (fabs(h[m - 2][m - 2]) + fabs(z) + fabs(h[m][m])); - tst2 = tst1 + fabs(h[m - 1][m - 2]) * (fabs(q) + fabs(r)); - if (tst2 == tst1) - break; - } - for (i = m + 2; i <= en; i++) { - h[i - 1][i - 3] = 0.0; - if (i != m + 2) - h[i - 1][i - 4] = 0.0; - } - for (k = m; k <= na; k++) { - notlas = (k != na); - if (k != m) { - p = h[k - 1][k - 2]; - q = h[k][k - 2]; - r = 0.0; - if (notlas) - r = h[k + 1][k - 2]; - x = fabs(p) + fabs(q) + fabs(r); - if (x != 0.0) { - p /= x; - q /= x; - r /= x; - } - } - if (x != 0.0) { - if (p < 0.0) /* sign */ - s = - sqrt(p * p + q * q + r * r); - else - s = sqrt(p * p + q * q + r * r); - if (k != m) - h[k - 1][k - 2] = -s * x; - else { - if (l != m) - h[k - 1][k - 2] = -h[k - 1][k - 2]; - } - p += s; - x = p / s; - y = q / s; - z = r / s; - q /= p; - r /= p; - if (!notlas) { - for (j = k - 1; j < n; j++) { /* row modification */ - p = h[k - 1][j] + q * h[k][j]; - h[k - 1][j] -= p * x; - h[k][j] -= p * y; - } - j = (en < (k + 3)) ? en : (k + 3); /* min */ - for (i = 0; i < j; i++) { /* column modification */ - p = x * h[i][k - 1] + y * h[i][k]; - h[i][k - 1] -= p; - h[i][k] -= p * q; - } - /* accumulate transformations */ - for (i = low - 1; i < hgh; i++) { - p = x * zz[i][k - 1] + y * zz[i][k]; - zz[i][k - 1] -= p; - zz[i][k] -= p * q; - } - } else { - for (j = k - 1; j < n; j++) { /* row modification */ - p = h[k - 1][j] + q * h[k][j] + r * h[k + 1][j]; - h[k - 1][j] -= p * x; - h[k][j] -= p * y; - h[k + 1][j] -= p * z; - } - j = (en < (k + 3)) ? en : (k + 3); /* min */ - for (i = 0; i < j; i++) { /* column modification */ - p = x * h[i][k - 1] + y * h[i][k] + z * h[i][k + 1]; - h[i][k - 1] -= p; - h[i][k] -= p * q; - h[i][k + 1] -= p * r; - } - /* accumulate transformations */ - for (i = low - 1; i < hgh; i++) { - p = x * zz[i][k - 1] + y * zz[i][k] + - z * zz[i][k + 1]; - zz[i][k - 1] -= p; - zz[i][k] -= p * q; - zz[i][k + 1] -= p * r; - } - } - } - } /* for k */ - } /* while infinite loop */ - if (l == en) { /* one root found */ - h[en - 1][en - 1] = x + t; - wr[en - 1] = h[en - 1][en - 1]; - wi[en - 1] = 0.0; - en = na; - continue; - } - y = h[na - 1][na - 1]; - w = h[en - 1][na - 1] * h[na - 1][en - 1]; - p = (y - x) / 2.0; - q = p * p + w; - z = sqrt(fabs(q)); - h[en - 1][en - 1] = x + t; - x = h[en - 1][en - 1]; - h[na - 1][na - 1] = y + t; - if (q >= 0.0) { /* real pair */ - if (p < 0.0) /* sign */ - z = p - fabs(z); - else - z = p + fabs(z); - wr[na - 1] = x + z; - wr[en - 1] = wr[na - 1]; - if (z != 0.0) - wr[en - 1] = x - w / z; - wi[na - 1] = 0.0; - wi[en - 1] = 0.0; - x = h[en - 1][na - 1]; - s = fabs(x) + fabs(z); - p = x / s; - q = z / s; - r = sqrt(p * p + q * q); - p /= r; - q /= r; - for (j = na - 1; j < n; j++) { /* row modification */ - z = h[na - 1][j]; - h[na - 1][j] = q * z + p * h[en - 1][j]; - h[en - 1][j] = q * h[en - 1][j] - p * z; - } - for (i = 0; i < en; i++) { /* column modification */ - z = h[i][na - 1]; - h[i][na - 1] = q * z + p * h[i][en - 1]; - h[i][en - 1] = q * h[i][en - 1] - p * z; - } - /* accumulate transformations */ - for (i = low - 1; i < hgh; i++) { - z = zz[i][na - 1]; - zz[i][na - 1] = q * z + p * zz[i][en - 1]; - zz[i][en - 1] = q * zz[i][en - 1] - p * z; - } - } else { /* complex pair */ - wr[na - 1] = x + p; - wr[en - 1] = x + p; - wi[na - 1] = z; - wi[en - 1] = -z; - } - en -= 2; - } /* while en >= low */ - /* backsubstitute to find vectors of upper triangular form */ - if (norm != 0.0) { - for (en = n; en >= 1; en--) { - p = wr[en - 1]; - q = wi[en - 1]; - na = en - 1; - if (q == 0.0) {/* real vector */ - m = en; - h[en - 1][en - 1] = 1.0; - if (na != 0) { - for (i = en - 2; i >= 0; i--) { - w = h[i][i] - p; - r = 0.0; - for (j = m - 1; j < en; j++) - r += h[i][j] * h[j][en - 1]; - if (wi[i] < 0.0) { - z = w; - s = r; - } else { - m = i + 1; - if (wi[i] == 0.0) { - t = w; - if (t == 0.0) { - tst1 = norm; - t = tst1; - do { - t = 0.01 * t; - tst2 = norm + t; - } while (tst2 > tst1); - } - h[i][en - 1] = -(r / t); - } else { /* solve real equations */ - x = h[i][i + 1]; - y = h[i + 1][i]; - q = (wr[i] - p) * (wr[i] - p) + wi[i] * wi[i]; - t = (x * s - z * r) / q; - h[i][en - 1] = t; - if (fabs(x) > fabs(z)) - h[i + 1][en - 1] = (-r - w * t) / x; - else - h[i + 1][en - 1] = (-s - y * t) / z; - } - /* overflow control */ - t = fabs(h[i][en - 1]); - if (t != 0.0) { - tst1 = t; - tst2 = tst1 + 1.0 / tst1; - if (tst2 <= tst1) { - for (j = i; j < en; j++) - h[j][en - 1] /= t; - } - } - } - } - } - } else if (q > 0.0) { - m = na; - if (fabs(h[en - 1][na - 1]) > fabs(h[na - 1][en - 1])) { - h[na - 1][na - 1] = q / h[en - 1][na - 1]; - h[na - 1][en - 1] = (p - h[en - 1][en - 1]) / - h[en - 1][na - 1]; - } else - mcdiv(0.0, -h[na - 1][en - 1], h[na - 1][na - 1] - p, q, - &h[na - 1][na - 1], &h[na - 1][en - 1]); - h[en - 1][na - 1] = 0.0; - h[en - 1][en - 1] = 1.0; - if (en != 2) { - for (i = en - 3; i >= 0; i--) { - w = h[i][i] - p; - ra = 0.0; - sa = 0.0; - for (j = m - 1; j < en; j++) { - ra += h[i][j] * h[j][na - 1]; - sa += h[i][j] * h[j][en - 1]; - } - if (wi[i] < 0.0) { - z = w; - r = ra; - s = sa; - } else { - m = i + 1; - if (wi[i] == 0.0) - mcdiv(-ra, -sa, w, q, &h[i][na - 1], - &h[i][en - 1]); - else { /* solve complex equations */ - x = h[i][i + 1]; - y = h[i + 1][i]; - vr = (wr[i] - p) * (wr[i] - p); - vr = vr + wi[i] * wi[i] - q * q; - vi = (wr[i] - p) * 2.0 * q; - if (vr == 0.0 && vi == 0.0) { - tst1 = norm * (fabs(w) + fabs(q) + fabs(x) + - fabs(y) + fabs(z)); - vr = tst1; - do { - vr = 0.01 * vr; - tst2 = tst1 + vr; - } while (tst2 > tst1); - } - mcdiv(x * r - z * ra + q * sa, - x * s - z * sa - q * ra, vr, vi, - &h[i][na - 1], &h[i][en - 1]); - if (fabs(x) > fabs(z) + fabs(q)) { - h[i + 1] - [na - 1] = (q * h[i][en - 1] - - w * h[i][na - 1] - ra) / x; - h[i + 1][en - 1] = (-sa - w * h[i][en - 1] - - q * h[i][na - 1]) / x; - } else - mcdiv(-r - y * h[i][na - 1], - -s - y * h[i][en - 1], z, q, - &h[i + 1][na - 1], &h[i + 1][en - 1]); - } - /* overflow control */ - t = (fabs(h[i][na - 1]) > fabs(h[i][en - 1])) ? - fabs(h[i][na - 1]) : fabs(h[i][en - 1]); - if (t != 0.0) { - tst1 = t; - tst2 = tst1 + 1.0 / tst1; - if (tst2 <= tst1) { - for (j = i; j < en; j++) { - h[j][na - 1] /= t; - h[j][en - 1] /= t; - } - } - } - } - } - } - } - } - /* end back substitution. vectors of isolated roots */ - for (i = 0; i < n; i++) { - if (i + 1 < low || i + 1 > hgh) { - for (j = i; j < n; j++) - zz[i][j] = h[i][j]; - } - } - /* multiply by transformation matrix to give vectors of - * original full matrix. */ - for (j = n - 1; j >= low - 1; j--) { - m = ((j + 1) < hgh) ? (j + 1) : hgh; /* min */ - for (i = low - 1; i < hgh; i++) { - z = 0.0; - for (k = low - 1; k < m; k++) - z += zz[i][k] * h[k][j]; - zz[i][j] = z; - } - } - } - return; -} - - -/* make rate matrix with 0.01 expected substitutions per unit time */ -void onepamratematrix(dmatrix a) -{ - int i, j; - double delta, temp, sum; - dvector m; - - for (i = 0; i < tpmradix; i++) - { - for (j = 0; j < tpmradix; j++) - { - a[i][j] = Freqtpm[j]*a[i][j]; - } - } - - m = new_dvector(tpmradix); - for (i = 0, sum = 0.0; i < tpmradix; i++) - { - for (j = 0, temp = 0.0; j < tpmradix; j++) - temp += a[i][j]; - m[i] = temp; /* row sum */ - sum += temp*Freqtpm[i]; /* exp. rate */ - } - delta = 0.01 / sum; /* 0.01 subst. per unit time */ - for (i = 0; i < tpmradix; i++) { - for (j = 0; j < tpmradix; j++) { - if (i != j) - a[i][j] = delta * a[i][j]; - else - a[i][j] = delta * (-m[i]); - } - } - free_dvector(m); -} - - -void eigensystem(dvector eval, dmatrix evec) -{ - dvector evali, forg; - dmatrix a, b; - ivector ordr; - int i, j, k, error; - double zero; - - - ordr = new_ivector(tpmradix); - evali = new_dvector(tpmradix); - forg = new_dvector(tpmradix); - a = new_dmatrix(tpmradix,tpmradix); - b = new_dmatrix(tpmradix,tpmradix); - - rtfdata(a, forg); /* get relative transition matrix and frequencies */ - - onepamratematrix(a); /* make 1 PAM rate matrix */ - - /* copy a to b */ - for (i = 0; i < tpmradix; i++) - for (j = 0; j < tpmradix; j++) - b[i][j] = a[i][j]; - - elmhes(a, ordr, tpmradix); /* compute eigenvalues and eigenvectors */ - eltran(a, evec, ordr, tpmradix); - hqr2(tpmradix, 1, tpmradix, a, evec, eval, evali); - - /* check eigenvalue equation */ - error = FALSE; - for (j = 0; j < tpmradix; j++) { - for (i = 0, zero = 0.0; i < tpmradix; i++) { - for (k = 0; k < tpmradix; k++) zero += b[i][k] * evec[k][j]; - zero -= eval[j] * evec[i][j]; - if (fabs(zero) > 1.0e-5) - error = TRUE; - } - } - if (error) - FPRINTF(STDOUTFILE "\nWARNING: Eigensystem doesn't satisfy eigenvalue equation!\n"); - - free_ivector(ordr); - free_dvector(evali); - free_dvector(forg); - free_dmatrix(a); - free_dmatrix(b); -} - - -void luinverse(dmatrix inmat, dmatrix imtrx, int size) -{ - double eps = 1.0e-20; /* ! */ - int i, j, k, l, maxi=0, idx, ix, jx; - double sum, tmp, maxb, aw; - ivector index; - double *wk; - dmatrix omtrx; - - - index = new_ivector(tpmradix); - omtrx = new_dmatrix(tpmradix,tpmradix); - - /* copy inmat to omtrx */ - for (i = 0; i < tpmradix; i++) - for (j = 0; j < tpmradix; j++) - omtrx[i][j] = inmat[i][j]; - - wk = (double *) malloc((unsigned)size * sizeof(double)); - aw = 1.0; - for (i = 0; i < size; i++) { - maxb = 0.0; - for (j = 0; j < size; j++) { - if (fabs(omtrx[i][j]) > maxb) - maxb = fabs(omtrx[i][j]); - } - if (maxb == 0.0) { - /* Singular matrix */ - FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR C TO DEVELOPERS\n\n\n"); - exit(1); - } - wk[i] = 1.0 / maxb; - } - for (j = 0; j < size; j++) { - for (i = 0; i < j; i++) { - sum = omtrx[i][j]; - for (k = 0; k < i; k++) - sum -= omtrx[i][k] * omtrx[k][j]; - omtrx[i][j] = sum; - } - maxb = 0.0; - for (i = j; i < size; i++) { - sum = omtrx[i][j]; - for (k = 0; k < j; k++) - sum -= omtrx[i][k] * omtrx[k][j]; - omtrx[i][j] = sum; - tmp = wk[i] * fabs(sum); - if (tmp >= maxb) { - maxb = tmp; - maxi = i; - } - } - if (j != maxi) { - for (k = 0; k < size; k++) { - tmp = omtrx[maxi][k]; - omtrx[maxi][k] = omtrx[j][k]; - omtrx[j][k] = tmp; - } - aw = -aw; - wk[maxi] = wk[j]; - } - index[j] = maxi; - if (omtrx[j][j] == 0.0) - omtrx[j][j] = eps; - if (j != size - 1) { - tmp = 1.0 / omtrx[j][j]; - for (i = j + 1; i < size; i++) - omtrx[i][j] *= tmp; - } - } - for (jx = 0; jx < size; jx++) { - for (ix = 0; ix < size; ix++) - wk[ix] = 0.0; - wk[jx] = 1.0; - l = -1; - for (i = 0; i < size; i++) { - idx = index[i]; - sum = wk[idx]; - wk[idx] = wk[i]; - if (l != -1) { - for (j = l; j < i; j++) - sum -= omtrx[i][j] * wk[j]; - } else if (sum != 0.0) - l = i; - wk[i] = sum; - } - for (i = size - 1; i >= 0; i--) { - sum = wk[i]; - for (j = i + 1; j < size; j++) - sum -= omtrx[i][j] * wk[j]; - wk[i] = sum / omtrx[i][i]; - } - for (ix = 0; ix < size; ix++) - imtrx[ix][jx] = wk[ix]; - } - free((char *)wk); - wk = NULL; - free_ivector(index); - free_dmatrix(omtrx); -} - - -void checkevector(dmatrix evec, dmatrix ivec, int nn) -{ - int i, j, ia, ib, ic, error; - dmatrix matx; - double sum; - - - matx = new_dmatrix(nn, nn); - /* multiply matrix of eigenvectors and its inverse */ - for (ia = 0; ia < nn; ia++) { - for (ic = 0; ic < nn; ic++) { - sum = 0.0; - for (ib = 0; ib < nn; ib++) sum += evec[ia][ib] * ivec[ib][ic]; - matx[ia][ic] = sum; - } - } - /* check whether the unitary matrix is obtained */ - error = FALSE; - for (i = 0; i < nn; i++) { - for (j = 0; j < nn; j++) { - if (i == j) { - if (fabs(matx[i][j] - 1.0) > 1.0e-5) - error = TRUE; - } else { - if (fabs(matx[i][j]) > 1.0e-5) - error = TRUE; - } - } - } - if (error) { - FPRINTF(STDOUTFILE "\nWARNING: Inversion of eigenvector matrix not perfect!\n"); - } - free_dmatrix(matx); -} - - -/***************************** exported functions *****************************/ - - -/* compute 1 PAM rate matrix, its eigensystem, and the inverse matrix thereof */ -void tranprobmat() -{ - eigensystem(Eval, Evec); /* eigensystem of 1 PAM rate matrix */ - luinverse(Evec, Ievc, tpmradix); /* inverse eigenvectors are in Ievc */ - checkevector(Evec, Ievc, tpmradix); /* check whether inversion was OK */ -} - - -/* compute P(t) */ -void tprobmtrx(double arc, dmatrix tpr) -{ - register int i, j, k; - register double temp; - - - for (k = 0; k < tpmradix; k++) { - temp = exp(arc * Eval[k]); - for (j = 0; j < tpmradix; j++) - iexp[k][j] = Ievc[k][j] * temp; - } - for (i = 0; i < tpmradix; i++) { - for (j = 0; j < tpmradix; j++) { - temp = 0.0; - for (k = 0; k < tpmradix; k++) - temp += Evec[i][k] * iexp[k][j]; - tpr[i][j] = fabs(temp); - } - } -} - - -/******************************************************************************/ -/* estimation of maximum likelihood distances */ -/******************************************************************************/ - -/* compute total log-likelihood - input: likelihoods for each site and non-zero rate - output: total log-likelihood (incl. zero rate category) */ -double comptotloglkl(dmatrix cdl) -{ - int k, r; - double loglkl, fv, fv2, sitelkl; - - loglkl = 0.0; - fv = 1.0-fracinv; - fv2 = (1.0-fracinv)/(double) numcats; - - if (numcats == 1) { - - for (k = 0; k < Numptrn; k++) { - - /* compute likelihood for pattern k */ - sitelkl = cdl[0][k]*fv; - if (constpat[k] == TRUE) - sitelkl += fracinv*Freqtpm[(int) Seqpat[0][k]]; - - /* total log-likelihood */ - loglkl += log(sitelkl)*Weight[k]; - - } - - } else { - - for (k = 0; k < Numptrn; k++) { - - /* this general routine works always but it's better - to run it only when it's really necessary */ - - /* compute likelihood for pattern k */ - sitelkl = 0.0; - for (r = 0; r < numcats; r++) - sitelkl += cdl[r][k]; - sitelkl = fv2*sitelkl; - if (constpat[k] == TRUE) - sitelkl += fracinv*Freqtpm[(int) Seqpat[0][k]]; - - /* total log-likelihood */ - loglkl += log(sitelkl)*Weight[k]; - - } - - } - - return loglkl; -} - - -/* computes the site log-likelihoods - input: likelihoods for each site and non-zero rate - output: log-likelihood for each site */ -void allsitelkl(dmatrix cdl, dvector aslkl) -{ - int k, r; - double fv, fv2, sitelkl; - - fv = 1.0-fracinv; - fv2 = (1.0-fracinv)/(double) numcats; - - if (numcats == 1) { - - for (k = 0; k < Numptrn; k++) { - - /* compute likelihood for pattern k */ - sitelkl = cdl[0][k]*fv; - if (constpat[k] == TRUE) - sitelkl += fracinv*Freqtpm[(int) Seqpat[0][k]]; - - /* site log-likelihood */ - aslkl[k] = log(sitelkl); - } - - } else { - - for (k = 0; k < Numptrn; k++) { - - /* this general routine works always but it's better - to run it only when it's really necessary */ - - /* compute likelihood for pattern k */ - sitelkl = 0.0; - for (r = 0; r < numcats; r++) - sitelkl += cdl[r][k]; - sitelkl = fv2*sitelkl; - if (constpat[k] == TRUE) - sitelkl += fracinv*Freqtpm[(int) Seqpat[0][k]]; - - /* total log-likelihood */ - aslkl[k] = log(sitelkl); - - } - } -} - - -/***************************** internal functions *****************************/ - -/* compute negative log-likelihood of distance arc between sequences seqchi/j */ -double pairlkl(double arc) -{ - int k, r, ci, cj; - double loglkl, fv, sitelkl; - - - /* compute tpms */ - for (r = 0; r < numcats; r++) - /* compute tpm for rate category r */ - tprobmtrx(arc*Rates[r], ltprobr[r]); - - loglkl = 0.0; - fv = 1.0-fracinv; - - if (numcats == 1) { - - for (k = 0; k < Numptrn; k++) { - - /* compute likelihood for site k */ - ci = seqchi[k]; - cj = seqchj[k]; - if (ci != tpmradix && cj != tpmradix) - sitelkl = ltprobr[0][ci][cj]*fv; - else - sitelkl = fv; - if (ci == cj && ci != tpmradix) - sitelkl += fracinv*Freqtpm[ci]; - - /* total log-likelihood */ - loglkl += log(sitelkl)*Weight[k]; - - } - - } else { - - for (k = 0; k < Numptrn; k++) { - - /* this general routine works always but it's better - to run it only when it's really necessary */ - - /* compute likelihood for site k */ - ci = seqchi[k]; - cj = seqchj[k]; - if (ci != tpmradix && cj != tpmradix) { - sitelkl = 0.0; - for (r = 0; r < numcats; r++) - sitelkl += ltprobr[r][ci][cj]; - sitelkl = fv*sitelkl/(double) numcats; - } else - sitelkl = fv; - if (ci == cj && ci != tpmradix) - sitelkl += fracinv*Freqtpm[ci]; - - /* total log-likelihood */ - loglkl += log(sitelkl)*Weight[k]; - - } - - } - - /* return negative log-likelihood as we use a minimizing procedure */ - return -loglkl; -} - - -/***************************** exported functions *****************************/ - - -/* maximum likelihood distance between sequence i and j */ -double mldistance(int i, int j) -{ - double dist, fx, f2x; - - if (i == j) return 0.0; - - /* use old distance as start value */ - dist = Distanmat[i][j]; - - if (dist == 0.0) return 0.0; - - seqchi = Seqpat[i]; - seqchj = Seqpat[j]; - - if (dist <= MINARC) dist = MINARC+1.0; - if (dist >= MAXARC) dist = MAXARC-1.0; - - dist = onedimenmin(MINARC, dist, MAXARC, pairlkl, EPSILON, &fx, &f2x); - - return dist; -} - - -/* initialize distance matrix */ -void initdistan() -{ - int i, j, k, diff, x, y; - double obs, temp; - - for (i = 0; i < Maxspc; i++) { - Distanmat[i][i] = 0.0; - for (j = i + 1; j < Maxspc; j++) { - seqchi = Seqpat[i]; - seqchj = Seqpat[j]; - - /* count observed differences */ - diff = 0; - for (k = 0; k < Numptrn; k++) { - x = seqchi[k]; - y = seqchj[k]; - if (x != y && - x != tpmradix && - y != tpmradix) - diff += Weight[k]; - } - if (diff == 0) - Distanmat[i][j] = 0.0; - else { - /* use generalized JC correction to get first estimate - (for the SH model the observed distance is used) */ - /* observed distance */ - obs = (double) diff / (double) Maxsite; - temp = 1.0 - (double) obs*tpmradix/(tpmradix-1.0); - if (temp > 0.0 && !(data_optn == 0 && SH_optn)) - /* use JC corrected distance */ - Distanmat[i][j] = -100.0*(tpmradix-1.0)/tpmradix * log(temp); - else - /* use observed distance */ - Distanmat[i][j] = obs * 100.0; - if (Distanmat[i][j] < MINARC) Distanmat[i][j] = MINARC; - if (Distanmat[i][j] > MAXARC) Distanmat[i][j] = MAXARC; - } - Distanmat[j][i] = Distanmat[i][j]; - } - } -} - -/* compute distance matrix */ -void computedistan() -{ - int i, j; - - for (i = 0; i < Maxspc; i++) - for (j = i; j < Maxspc; j++) { - Distanmat[i][j] = mldistance(i, j); - Distanmat[j][i] = Distanmat[i][j]; - } -} - - -/******************************************************************************/ -/* computation of maximum likelihood edge lengths for a given tree */ -/******************************************************************************/ - - -/***************************** internal functions *****************************/ - - -/* multiply partial likelihoods */ -void productpartials(Node *op) -{ - Node *cp; - int i, j, r; - dcube opc, cpc; - - cp = op; - opc = op->partials; - while (cp->isop->isop != op) { - cp = cp->isop; - cpc = cp->partials; - for (r = 0; r < numcats; r++) - for (i = 0; i < Numptrn; i++) - for (j = 0; j < tpmradix; j++) - opc[r][i][j] *= cpc[r][i][j]; - } -} - - -/* compute internal partial likelihoods */ -void partialsinternal(Node *op) -{ - int i, j, k, r; - double sum; - dcube oprob, cprob; - - if (clockmode == 1) { /* clocklike branch lengths */ - for (r = 0; r < numcats; r++) { - tprobmtrx((op->lengthc)*Rates[r], ltprobr[r]); - } - } else { /* non-clocklike branch lengths */ - for (r = 0; r < numcats; r++) { - tprobmtrx((op->length)*Rates[r], ltprobr[r]); - } - } - - oprob = op->partials; - cprob = op->kinp->isop->partials; - for (r = 0; r < numcats; r++) { - for (k = 0; k < Numptrn; k++) { - for (i = 0; i < tpmradix; i++) { - sum = 0.0; - for (j = 0; j < tpmradix; j++) - sum += ltprobr[r][i][j] * cprob[r][k][j]; - oprob[r][k][i] = sum; - } - } - } -} - - -/* compute external partial likelihoods */ -void partialsexternal(Node *op) -{ - int i, j, k, r; - dcube oprob; - cvector dseqi; - - if (clockmode == 1) { /* clocklike branch lengths */ - for (r = 0; r < numcats; r++) { - tprobmtrx((op->lengthc)*Rates[r], ltprobr[r]); - } - } else { /* nonclocklike branch lengths */ - for (r = 0; r < numcats; r++) { - tprobmtrx((op->length)*Rates[r], ltprobr[r]); - } - } - - oprob = op->partials; - dseqi = op->kinp->eprob; - for (r = 0; r < numcats; r++) { - for (k = 0; k < Numptrn; k++) { - if ((j = dseqi[k]) == tpmradix) { - for (i = 0; i < tpmradix; i++) - oprob[r][k][i] = 1.0; - } else { - for (i = 0; i < tpmradix; i++) - oprob[r][k][i] = ltprobr[r][i][j]; - } - } - } -} - - -/* compute all partial likelihoods */ -void initpartials(Tree *tr) -{ - Node *cp, *rp; - - cp = rp = tr->rootp; - do { - cp = cp->isop->kinp; - if (cp->isop == NULL) { /* external node */ - cp = cp->kinp; /* not descen */ - partialsexternal(cp); - } else { /* internal node */ - if (!cp->descen) { - productpartials(cp->kinp->isop); - partialsinternal(cp); - } - } - } while (cp != rp); -} - - -/* compute log-likelihood given internal branch with length arc - between partials partiali and partials partialj */ -double intlkl(double arc) -{ - double sumlk, slk; - int r, s, i, j; - dmatrix cdl; - - cdl = Ctree->condlkl; - for (r = 0; r < numcats; r++) { - tprobmtrx(arc*Rates[r], ltprobr[r]); - } - for (r = 0; r < numcats; r++) { - for (s = 0; s < Numptrn; s++) { - sumlk = 0.0; - for (i = 0; i < tpmradix; i++) { - slk = 0.0; - for (j = 0; j < tpmradix; j++) - slk += partialj[r][s][j] * ltprobr[r][i][j]; - sumlk += Freqtpm[i] * partiali[r][s][i] * slk; - } - cdl[r][s] = sumlk; - } - } - - /* compute total log-likelihood for current tree */ - Ctree->lklhd = comptotloglkl(cdl); - - return -(Ctree->lklhd); /* we use a minimizing procedure */ -} - - -/* optimize internal branch */ -void optinternalbranch(Node *op) -{ - double arc, fx, f2x; - - partiali = op->isop->partials; - partialj = op->kinp->isop->partials; - arc = op->length; /* nonclocklike branch lengths */ - if (arc <= MINARC) arc = MINARC+1.0; - if (arc >= MAXARC) arc = MAXARC-1.0; - arc = onedimenmin(MINARC, arc, MAXARC, intlkl, EPSILON, &fx, &f2x); - op->kinp->length = arc; - op->length = arc; - - /* variance of branch length */ - f2x = fabs(f2x); - if (1.0/(MAXARC*MAXARC) < f2x) - op->varlen = 1.0/f2x; - else - op->varlen = MAXARC*MAXARC; -} - - -/* compute log-likelihood given external branch with length arc - between partials partiali and sequence seqchi */ -double extlkl(double arc) -{ - double sumlk; - int r, s, i, j; - dvector opb; - dmatrix cdl; - - cdl = Ctree->condlkl; - for (r = 0; r < numcats; r++) { - tprobmtrx(arc*Rates[r], ltprobr[r]); - } - for (r = 0; r < numcats; r++) { - for (s = 0; s < Numptrn; s++) { - opb = partiali[r][s]; - sumlk = 0.0; - if ((j = seqchi[s]) != tpmradix) { - for (i = 0; i < tpmradix; i++) - sumlk += (Freqtpm[i] * (opb[i] * ltprobr[r][i][j])); - } else { - for (i = 0; i < tpmradix; i++) - sumlk += Freqtpm[i] * opb[i]; - } - cdl[r][s] = sumlk; - } - } - - /* compute total log-likelihood for current tree */ - Ctree->lklhd = comptotloglkl(cdl); - - return -(Ctree->lklhd); /* we use a minimizing procedure */ -} - -/* optimize external branch */ -void optexternalbranch(Node *op) -{ - double arc, fx, f2x; - - partiali = op->isop->partials; - seqchi = op->kinp->eprob; - arc = op->length; /* nonclocklike branch lengths */ - if (arc <= MINARC) arc = MINARC+1.0; - if (arc >= MAXARC) arc = MAXARC-1.0; - arc = onedimenmin(MINARC, arc, MAXARC, extlkl, EPSILON, &fx, &f2x); - op->kinp->length = arc; - op->length = arc; - - /* variance of branch length */ - f2x = fabs(f2x); - if (1.0/(MAXARC*MAXARC) < f2x) - op->varlen = 1.0/f2x; - else - op->varlen = MAXARC*MAXARC; -} - - -/* finish likelihoods for each rate and site */ -void finishlkl(Node *op) -{ - int r, k, i, j; - double arc, sumlk, slk; - dmatrix cdl; - - partiali = op->isop->partials; - partialj = op->kinp->isop->partials; - cdl = Ctree->condlkl; - arc = op->length; /* nonclocklike branch lengths */ - for (r = 0; r < numcats; r++) { - tprobmtrx(arc*Rates[r], ltprobr[r]); - } - for (r = 0; r < numcats; r++) { - for (k = 0; k < Numptrn; k++) { - sumlk = 0.0; - for (i = 0; i < tpmradix; i++) { - slk = 0.0; - for (j = 0; j < tpmradix; j++) - slk += partialj[r][k][j] * ltprobr[r][i][j]; - sumlk += Freqtpm[i] * partiali[r][k][i] * slk; - } - cdl[r][k] = sumlk; - } - } -} - - -/***************************** exported functions *****************************/ - - -/* optimize branch lengths to get maximum likelihood (nonclocklike branchs) */ -double optlkl(Tree *tr) -{ - Node *cp, *rp; - int nconv; - double lendiff; - - clockmode = 0; /* nonclocklike branch lengths */ - nconv = 0; - Converg = FALSE; - initpartials(tr); - for (Numit = 1; (Numit <= MAXIT) && (!Converg); Numit++) { - - cp = rp = tr->rootp; - do { - cp = cp->isop->kinp; - productpartials(cp->kinp->isop); - if (cp->isop == NULL) { /* external node */ - cp = cp->kinp; /* not descen */ - - lendiff = cp->length; - optexternalbranch(cp); - lendiff = fabs(lendiff - cp->length); - if (lendiff < EPSILON) nconv++; - else nconv = 0; - - partialsexternal(cp); - } else { /* internal node */ - if (cp->descen) { - partialsinternal(cp); - } else { - - lendiff = cp->length; - optinternalbranch(cp); - lendiff = fabs(lendiff - cp->length); - if (lendiff < EPSILON) nconv++; - else nconv = 0; - - /* eventually compute likelihoods for each site */ - if ((cp->number == Numibrnch-1 && lendiff < EPSILON) || - Numit == MAXIT-1) finishlkl(cp); - - partialsinternal(cp); - } - } - if (nconv >= Numbrnch) { /* convergence */ - Converg = TRUE; - cp = rp; /* get out of here */ - } - } while (cp != rp); - } - - /* compute total log-likelihood for current tree */ - return comptotloglkl(tr->condlkl); -} - - -/* compute likelihood of tree for given branch lengths */ -double treelkl(Tree *tr) -{ - int i, k, r; - Node *cp; - dmatrix cdl; - dcube prob1, prob2; - double sumlk; - - /* compute for each site and rate log-likelihoods */ - initpartials(tr); - cp = tr->rootp; - productpartials(cp->isop); - prob1 = cp->partials; - prob2 = cp->isop->partials; - cdl = tr->condlkl; - for (r = 0; r < numcats; r++) { - for (k = 0; k < Numptrn; k++) { - sumlk = 0.0; - for (i = 0; i < tpmradix; i++) - sumlk += Freqtpm[i] * (prob1[r][k][i] * prob2[r][k][i]); - cdl[r][k] = sumlk; - } - } - - /* return total log-likelihood for current tree */ - return comptotloglkl(cdl); -} - - -/******************************************************************************/ -/* least-squares estimate of branch lengths */ -/******************************************************************************/ - - -/***************************** internal functions *****************************/ - - -void luequation(dmatrix amat, dvector yvec, int size) -{ - double eps = 1.0e-20; /* ! */ - int i, j, k, l, maxi=0, idx; - double sum, tmp, maxb, aw; - dvector wk; - ivector index; - - - wk = new_dvector(size); - index = new_ivector(size); - aw = 1.0; - for (i = 0; i < size; i++) { - maxb = 0.0; - for (j = 0; j < size; j++) { - if (fabs(amat[i][j]) > maxb) - maxb = fabs(amat[i][j]); - } - if (maxb == 0.0) { - /* Singular matrix */ - FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR D TO DEVELOPERS\n\n\n"); - exit(1); - } - wk[i] = 1.0 / maxb; - } - for (j = 0; j < size; j++) { - for (i = 0; i < j; i++) { - sum = amat[i][j]; - for (k = 0; k < i; k++) - sum -= amat[i][k] * amat[k][j]; - amat[i][j] = sum; - } - maxb = 0.0; - for (i = j; i < size; i++) { - sum = amat[i][j]; - for (k = 0; k < j; k++) - sum -= amat[i][k] * amat[k][j]; - amat[i][j] = sum; - tmp = wk[i] * fabs(sum); - if (tmp >= maxb) { - maxb = tmp; - maxi = i; - } - } - if (j != maxi) { - for (k = 0; k < size; k++) { - tmp = amat[maxi][k]; - amat[maxi][k] = amat[j][k]; - amat[j][k] = tmp; - } - aw = -aw; - wk[maxi] = wk[j]; - } - index[j] = maxi; - if (amat[j][j] == 0.0) - amat[j][j] = eps; - if (j != size - 1) { - tmp = 1.0 / amat[j][j]; - for (i = j + 1; i < size; i++) - amat[i][j] *= tmp; - } - } - l = -1; - for (i = 0; i < size; i++) { - idx = index[i]; - sum = yvec[idx]; - yvec[idx] = yvec[i]; - if (l != -1) { - for (j = l; j < i; j++) - sum -= amat[i][j] * yvec[j]; - } else if (sum != 0.0) - l = i; - yvec[i] = sum; - } - for (i = size - 1; i >= 0; i--) { - sum = yvec[i]; - for (j = i + 1; j < size; j++) - sum -= amat[i][j] * yvec[j]; - yvec[i] = sum / amat[i][i]; - } - free_ivector(index); - free_dvector(wk); -} - - -/* least square estimation of branch lengths - used for the approximate ML and as starting point - in the calculation of the exact value of the ML */ -void lslength(Tree *tr, dvector distanvec, int numspc, int numibrnch, dvector Brnlength) -{ - int i, i1, j, j1, j2, k, numbrnch, numpair; - double sum, leng, alllen, rss; - ivector pths; - dmatrix atmt, atamt; - Node **ebp, **ibp; - - numbrnch = numspc + numibrnch; - numpair = (numspc * (numspc - 1)) / 2; - atmt = new_dmatrix(numbrnch, numpair); - atamt = new_dmatrix(numbrnch, numbrnch); - ebp = tr->ebrnchp; - ibp = tr->ibrnchp; - for (i = 0; i < numspc; i++) { - for (j1 = 1, j = 0; j1 < numspc; j1++) { - if (j1 == i) { - for (j2 = 0; j2 < j1; j2++, j++) { - atmt[i][j] = 1.0; - } - } else { - for (j2 = 0; j2 < j1; j2++, j++) { - if (j2 == i) - atmt[i][j] = 1.0; - else - atmt[i][j] = 0.0; - } - } - } - } - for (i1 = 0, i = numspc; i1 < numibrnch; i1++, i++) { - pths = ibp[i1]->paths; - for (j1 = 1, j = 0; j1 < numspc; j1++) { - for (j2 = 0; j2 < j1; j2++, j++) { - if (pths[j1] != pths[j2]) - atmt[i][j] = 1.0; - else - atmt[i][j] = 0.0; - } - } - } - for (i = 0; i < numbrnch; i++) { - for (j = 0; j <= i; j++) { - for (k = 0, sum = 0.0; k < numpair; k++) - sum += atmt[i][k] * atmt[j][k]; - atamt[i][j] = sum; - atamt[j][i] = sum; - } - } - for (i = 0; i < numbrnch; i++) { - for (k = 0, sum = 0.0; k < numpair; k++) - sum += atmt[i][k] * distanvec[k]; - Brnlength[i] = sum; - } - luequation(atamt, Brnlength, numbrnch); - for (i = 0, rss = 0.0; i < numpair; i++) { - sum = distanvec[i]; - for (j = 0; j < numbrnch; j++) { - if (atmt[j][i] == 1.0 && Brnlength[j] > 0.0) - sum -= Brnlength[j]; - } - rss += sum * sum; - } - tr->rssleast = sqrt(rss); - alllen = 0.0; - for (i = 0; i < numspc; i++) { - leng = Brnlength[i]; - alllen += leng; - if (leng < MINARC) leng = MINARC; - if (leng > MAXARC) leng = MAXARC; - if (clockmode) { /* clock */ - ebp[i]->lengthc = leng; - ebp[i]->kinp->lengthc = leng; - } else { /* no clock */ - ebp[i]->length = leng; - ebp[i]->kinp->length = leng; - } - Brnlength[i] = leng; - } - for (i = 0, j = numspc; i < numibrnch; i++, j++) { - leng = Brnlength[j]; - alllen += leng; - if (leng < MINARC) leng = MINARC; - if (leng > MAXARC) leng = MAXARC; - if (clockmode) { /* clock */ - ibp[i]->lengthc = leng; - ibp[i]->kinp->lengthc = leng; - } else { /* no clock */ - ibp[i]->length = leng; - ibp[i]->kinp->length = leng; - } - Brnlength[j] = leng; - } - free_dmatrix(atmt); - free_dmatrix(atamt); -} diff --git a/forester/archive/RIO/others/puzzle_mod/src/ml2.c b/forester/archive/RIO/others/puzzle_mod/src/ml2.c deleted file mode 100644 index 7ad780d..0000000 --- a/forester/archive/RIO/others/puzzle_mod/src/ml2.c +++ /dev/null @@ -1,1871 +0,0 @@ -/* - * ml2.c - * - * - * Part of TREE-PUZZLE 5.0 (June 2000) - * - * (c) 1999-2000 by Heiko A. Schmidt, Korbinian Strimmer, - * M. Vingron, and Arndt von Haeseler - * (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler - * - * All parts of the source except where indicated are distributed under - * the GNU public licence. See http://www.opensource.org for details. - */ - - -/* Modified by Christian Zmasek to: - - Names of 26 chars. - - !WARNING: Use ONLY together with FORESTER/RIO! - !For all other puposes download the excellent original! - - last modification: 05/22/01 - - - Node *internalnode(Tree *tr, char **chpp, int *ninode): - - char ident[100], idcomp[11]; -> char ident[100], idcomp[27]; - - idcomp[10] = '\0'; -> idcomp[26] = '\0'; - - } while (!stop && (ff != 10)); -> } while (!stop && (ff != 26)); - - - -*/ - - - -#define EXTERN extern - -/* prototypes */ -#include -#include -#include -#include -#include -#include "util.h" -#include "ml.h" - -#define STDOUT stdout -#ifndef PARALLEL /* because printf() runs significantly faster */ - /* than fprintf(stdout) on an Apple McIntosh */ - /* (HS) */ -# define FPRINTF printf -# define STDOUTFILE -#else -# define FPRINTF fprintf -# define STDOUTFILE STDOUT, -#endif - -/* prototypes for two functions of puzzle2.c */ -void fputid10(FILE *, int); -int fputid(FILE *, int); - - -/******************************************************************************/ -/* user tree input */ -/******************************************************************************/ - -/* read user tree, drop all blanks, tabs, and newlines. - Drop edgelengths (after :) but keep internal - labels. Check whether all pairs of brackets match. */ -void getusertree(FILE *itfp, cvector tr, int maxlen) -{ - int n, brac, ci; - int comment = 0; - - /* look for opening bracket */ - n = 0; - brac = 0; - do { - ci = fgetc(itfp); - if (ci == EOF) { - FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (missing start bracket in tree)\n\n\n"); - exit(1); - } - if (ci == '[') comment = 1; - if ((ci == ']') && comment) { - comment = 0; - ci = fgetc(itfp); - } - } while (comment || ((char) ci != '(')); - tr[n] = (char) ci; - brac++; - - do { - /* get next character (skip blanks, newlines, and tabs) */ - do { - ci = fgetc(itfp); - if (ci == EOF) { - FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (no more characters in tree)\n\n\n"); - exit(1); - } - if (ci == '[') comment = 1; - if ((ci == ']') && comment) { - comment = 0; - ci = fgetc(itfp); - } - } while (comment || (char) ci == ' ' || (char) ci == '\n' || (char) ci == '\t'); - - if ((char) ci == ':') { /* skip characters until a ,) appears */ - do { - ci = fgetc(itfp); - if (ci == EOF) { - FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (missing ';' or ',' in tree)\n\n\n"); - exit(1); - } - if (ci == '[') comment = 1; - if ((ci == ']') && comment) { - comment = 0; - ci = fgetc(itfp); - } - } while (comment || ((char) ci != ',' && (char) ci != ')') ); - } - - if ((char) ci == '(') { - brac++; - } - if ((char) ci == ')') { - brac--; - } - - n++; - tr[n] = (char) ci; - - } while (((char) ci != ';') && (n != maxlen-2)); - - if (n == maxlen-2) { - FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (tree description too long)\n\n\n"); - exit(1); - } - - if (brac != 0) { - FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (brackets don't match in tree)\n\n\n"); - exit(1); - } - - n++; - tr[n] = '\0'; -} - - -Node *internalnode(Tree *tr, char **chpp, int *ninode) -{ - Node *xp, *np, *rp; - int i, j, dvg, ff, stop, numc; - char ident[100], idcomp[27]; /* CZ 05/22/01 */ - char *idp; - - (*chpp)++; - if (**chpp == '(') { /* process subgroup */ - - xp = internalnode(tr, chpp, ninode); - xp->isop = xp; - dvg = 1; - while (**chpp != ')') { - if (**chpp == '\0') { - FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (unexpected end of tree)\n\n\n"); - exit(1); - } - dvg++; - /* insert edges around node */ - np = internalnode(tr, chpp, ninode); - np->isop = xp->isop; - xp->isop = np; - xp = np; - } - /* closing bracket reached */ - - (*chpp)++; - if (dvg < 2) { - FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (only one OTU inside pair of brackets)\n\n\n"); - exit(1); - } - - if ((*ninode) >= Maxspc-3) { /* all internal nodes already used */ - FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (no unrooted tree)\n\n\n"); - exit(1); - } - - rp = tr->ibrnchp[*ninode]; - rp->isop = xp->isop; - xp->isop = rp; - - for (j = 0; j < Numspc; j++) - rp->paths[j] = 0; - xp = rp->isop; - while (xp != rp) { - for (j = 0; j < Numspc; j++) { - if (xp->paths[j] == 1) - rp->paths[j] = 1; - } - xp = xp->isop; - } - (*ninode)++; - - if ((**chpp) == ',' || (**chpp) == ')') return rp->kinp; - if ((**chpp) == '\0') { - FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (unexpected end of tree)\n\n\n"); - exit(1); - } - - /* read internal label into rp->label (max. 20 characters) */ - rp->label = new_cvector(21); - (rp->label)[0] = **chpp; - (rp->label)[1] = '\0'; - for (numc = 1; numc < 20; numc++) { - (*chpp)++; - if ((**chpp) == ',' || (**chpp) == ')') return rp->kinp; - if ((**chpp) == '\0') { - FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (unexpected end of tree)\n\n\n"); - exit(1); - } - (rp->label)[numc] = **chpp; - (rp->label)[numc+1] = '\0'; - } - do { /* skip the rest of the internal label */ - (*chpp)++; - if ((**chpp) == '\0') { - FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (unexpected end of tree)\n\n\n"); - exit(1); - } - } while (((**chpp) != ',' && (**chpp) != ')')); - - return rp->kinp; - - } else { /* process species names */ - /* read species name */ - for (idp = ident; **chpp != ',' && - **chpp != ')' && **chpp != '\0'; (*chpp)++) { - *idp++ = **chpp; - } - *idp = '\0'; - /* look for internal number */ - idcomp[26] = '\0'; /* CZ 05/22/01 */ - - for (i = 0; i < Maxspc; i++) { - ff = 0; - stop = FALSE; - do { - idcomp[ff] = Identif[i][ff]; - ff++; - if (idcomp[ff-1] == ' ') stop = TRUE; - } while (!stop && (ff != 26)); /* CZ 05/22/01 */ - if (stop) idcomp[ff-1] = '\0'; - - if (!strcmp(ident, idcomp)) { - if (usedtaxa[i]) { - FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (multiple occurence of sequence '"); - FPRINTF(STDOUTFILE "%s' in tree)\n\n\n", ident); - exit(1); - } - usedtaxa[i] = TRUE; - return tr->ebrnchp[i]->kinp; - } - } - - FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (unknown sequence '%s' in tree)\n\n\n", ident); - exit(1); - } - return NULL; /* never returned but without some compilers complain */ -} - -/* make tree structure, the tree description may contain internal - labels but no edge lengths */ -void constructtree(Tree *tr, cvector strtree) -{ - char *chp; - int ninode, i; - int dvg, numc; - Node *xp, *np; - - ninode = 0; - chp = strtree; - usedtaxa = new_ivector(Maxspc); - for (i = 0; i < Maxspc; i++) usedtaxa[i] = FALSE; - - xp = internalnode(tr, &chp, &ninode); - xp->isop = xp; - dvg = 1; - while (*chp != ')') { /* look for closing bracket */ - if (*chp == '\0') { - FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (unexpected end of tree)\n\n\n"); - exit(1); - } - dvg++; - /* insert edges around node */ - np = internalnode(tr, &chp, &ninode); - np->isop = xp->isop; - xp->isop = np; - xp = np; - } - - for (i = 0; i < Maxspc; i++) - if (usedtaxa[i] == FALSE) { - FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (sequences missing in tree)\n\n\n"); - exit(1); - } - - /* closing bracket reached */ - if (dvg < 3) { - FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (no unrooted tree)\n\n\n"); - exit(1); - } - tr->rootp = xp; - Numibrnch = ninode; - Numbrnch = Numspc + ninode; - - chp++; - if (*chp == ';' || *chp == '\0') { - free_ivector(usedtaxa); - return; - } - - /* copy last internal label (max. 20 characters) */ - xp->label = new_cvector(21); - (xp->label)[0] = *chp; - (xp->label)[1] = '\0'; - for (numc = 1; numc < 20; numc++) { - chp++; - if (*chp == ';' || *chp == '\0') { - free_ivector(usedtaxa); - return; - } else { - (xp->label)[numc] = *chp; - (xp->label)[numc+1] = '\0'; - } - } - free_ivector(usedtaxa); - return; -} - - -/* remove possible basal bifurcation */ -void removebasalbif(cvector strtree) -{ - int n, c, brak, cutflag, h; - - /* check how many OTUs on basal level */ - n = 0; - c = 0; - brak = 0; - do { - if (strtree[n] == '(') brak++; - if (strtree[n] == ')') brak--; - - if (strtree[n] == ',' && brak == 1) c++; /* number of commas in outer bracket */ - - n++; - } while (strtree[n] != '\0'); - - /* if only 1 OTU inside outer bracket stop now */ - if (c == 0) { - FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (Only 1 OTU inside outer bracket in tree)\n\n\n"); - exit(1); - } - - /* if only 2 OTUs inside outer bracket delete second pair of - brackets from the right to remove basal bifurcation */ - - if (c == 1) { - - n = 0; - brak = 0; - cutflag = 0; /* not yet cutted */ - h = 0; - do { - if (strtree[n] == '(') brak++; - if (strtree[n] == ')') brak--; - - if (brak == 2 && cutflag == 0) cutflag = 1; /* cutting */ - if (brak == 1 && cutflag == 1) { - cutflag = 2; /* cutted */ - /* leave out internal label */ - do { - h++; - } while (strtree[n+h] != ')' && strtree[n+h] != ','); - - } - - if (cutflag == 1) strtree[n] = strtree[n+1]; - if (cutflag == 2) strtree[n-1] = strtree[n+h]; - - n++; - } while (strtree[n] != '\0'); - } -} - - -void makeusertree(FILE *itfp) -{ - cvector strtree; - - strtree = new_cvector(23*Maxspc); /* for treefile */ - getusertree(itfp, strtree, 23*Maxspc); - removebasalbif(strtree); - constructtree(Ctree, strtree); - free_cvector(strtree); -} - - -/******************************************************************************/ -/* memory organisation for maximum likelihood tree */ -/******************************************************************************/ - -/* initialise new tree */ -Tree *new_tree(int maxspc, int numptrn, cmatrix seqconint) -{ - int n, i, maxibrnch; - Tree *tr; - Node *dp, *up; - - maxibrnch = maxspc - 3; - heights = (Node **) malloc((unsigned)(maxspc-2) * sizeof(Node *)); - if (heights == NULL) maerror("heights in new_tree"); - tr = (Tree *) malloc(sizeof(Tree)); - if (tr == NULL) maerror("tr in new_tree"); - tr->ebrnchp = (Node **) malloc((unsigned)maxspc * sizeof(Node *)); - if (tr->ebrnchp == NULL) maerror("ebrnchp in new_tree"); - tr->ibrnchp = (Node **) malloc((unsigned)maxibrnch * sizeof(Node *)); - if (tr->ibrnchp == NULL) maerror("ibrnchp in new_tree"); - tr->condlkl = new_dmatrix(numcats, numptrn); - for (n = 0; n < maxspc; n++) { - dp = (Node *) malloc(sizeof(Node)); - if (dp == NULL) maerror("dp in new_tree"); - up = (Node *) malloc(sizeof(Node)); - if (up == NULL) maerror("up in new_tree"); - dp->isop = NULL; - up->isop = NULL; - dp->kinp = up; - up->kinp = dp; - dp->descen = TRUE; - up->descen = FALSE; - dp->number = n; - up->number = n; - dp->length = 0.0; - up->length = 0.0; - dp->lengthc = 0.0; - up->lengthc = 0.0; - dp->varlen = 0.0; - up->varlen = 0.0; - dp->paths = new_ivector(maxspc); - up->paths = dp->paths; - for (i = 0; i < maxspc; i++) dp->paths[i] = 0; - dp->paths[n] = 1; - dp->eprob = seqconint[n]; - up->eprob = NULL; - dp->partials = NULL; - up->partials = new_dcube(numcats, numptrn, tpmradix); - tr->ebrnchp[n] = dp; - up->label = NULL; - dp->label = NULL; - } - for (n = 0; n < maxibrnch; n++) { - dp = (Node *) malloc(sizeof(Node)); - if (dp == NULL) maerror("dp in new_tree"); - up = (Node *) malloc(sizeof(Node)); - if (up == NULL) maerror("up in new_tree"); - dp->isop = NULL; - up->isop = NULL; - dp->kinp = up; - up->kinp = dp; - dp->descen = TRUE; - up->descen = FALSE; - dp->number = n; - up->number = n; - dp->length = 0.0; - up->length = 0.0; - dp->lengthc = 0.0; - up->lengthc = 0.0; - dp->varlen = 0.0; - up->varlen = 0.0; - dp->paths = new_ivector(maxspc); - up->paths = dp->paths; - for (i = 0; i < maxspc; i++) dp->paths[i] = 0; - dp->eprob = NULL; - up->eprob = NULL; - dp->partials = new_dcube(numcats, numptrn, tpmradix); - up->partials = new_dcube(numcats, numptrn, tpmradix); - tr->ibrnchp[n] = dp; - up->label = NULL; - dp->label = NULL; - } - tr->rootp = NULL; - - /* - * reserve memory for lengths of the tree branches - * and for the distance matrix as a vector - * (needed for LS estimation of tree branch lengths) - */ - - Brnlength = new_dvector(2 * maxspc - 3); - Distanvec = new_dvector((maxspc * (maxspc - 1)) / 2); - - return tr; -} - - -/* initialise quartet tree */ -Tree *new_quartet(int numptrn, cmatrix seqconint) -{ - int n, i; - Tree *tr; - Node *dp, *up; - - heights = (Node **) malloc((unsigned)2 * sizeof(Node *)); - if (heights == NULL) maerror("heights in new_quartet"); - /* reserve memory for tree */ - tr = (Tree *) malloc(sizeof(Tree)); - if (tr == NULL) maerror("tr in new_quartet"); - tr->ebrnchp = (Node **) malloc((unsigned) 4 * sizeof(Node *)); - if (tr->ebrnchp == NULL) maerror("ebrnchp in new_quartet"); - tr->ibrnchp = (Node **) malloc((unsigned) sizeof(Node *)); - if (tr->ibrnchp == NULL) maerror("ibrnchp in new_quartet"); - tr->condlkl = new_dmatrix(numcats, numptrn); - /* reserve memory for nodes */ - for (n = 0; n < 4; n++) { - dp = (Node *) malloc(sizeof(Node)); - if (dp == NULL) maerror("dp in new_quartet"); - up = (Node *) malloc(sizeof(Node)); - if (up == NULL) maerror("dp in new_quartet"); - dp->isop = NULL; - dp->kinp = up; - up->kinp = dp; - dp->descen = TRUE; - up->descen = FALSE; - dp->number = n; - up->number = n; - dp->length = 0.0; - up->length = 0.0; - dp->lengthc = 0.0; - up->lengthc = 0.0; - dp->varlen = 0.0; - up->varlen = 0.0; - dp->paths = new_ivector(4); - up->paths = dp->paths; - for (i = 0; i < 4; i++) dp->paths[i] = 0; - dp->paths[n] = 1; - dp->eprob = seqconint[n]; /* make quartet (0,1)-(2,3) as default */ - up->eprob = NULL; - dp->partials = NULL; - up->partials = new_dcube(numcats, numptrn, tpmradix); - tr->ebrnchp[n] = dp; - } - - /* reserve memory for internal branch */ - dp = (Node *) malloc(sizeof(Node)); - if (dp == NULL) maerror("dp in new_quartet"); - up = (Node *) malloc(sizeof(Node)); - if (up == NULL) maerror("dp in new_quartet"); - dp->isop = tr->ebrnchp[3]->kinp; /* connect internal branch */ - up->isop = tr->ebrnchp[0]->kinp; - dp->kinp = up; - up->kinp = dp; - dp->descen = TRUE; - up->descen = FALSE; - dp->number = 0; - up->number = 0; - dp->length = 0.0; - up->length = 0.0; - dp->lengthc = 0.0; - up->lengthc = 0.0; - dp->varlen = 0.0; - up->varlen = 0.0; - dp->paths = new_ivector(4); - up->paths = dp->paths; - up->paths[0] = 0; - up->paths[1] = 0; - up->paths[2] = 1; - up->paths[3] = 1; - dp->eprob = NULL; - up->eprob = NULL; - dp->partials = new_dcube(numcats, numptrn, tpmradix); - up->partials = new_dcube(numcats, numptrn, tpmradix); - tr->ibrnchp[0] = dp; - - /* place root */ - tr->rootp = up; - - /* connect external branches */ - tr->ebrnchp[0]->kinp->isop = tr->ebrnchp[1]->kinp; - tr->ebrnchp[1]->kinp->isop = tr->rootp; - tr->ebrnchp[3]->kinp->isop = tr->ebrnchp[2]->kinp; - tr->ebrnchp[2]->kinp->isop = tr->rootp->kinp; - - /* - * reserve memory for lengths of the five branches - * of a quartet and for the six possible distances - * (needed for LS estimation of branch lengths) - */ - Brnlength = new_dvector(NUMQBRNCH); - Distanvec = new_dvector(NUMQSPC*(NUMQSPC-1)/2); - - return tr; -} - - -/* free tree memory */ -void free_tree(Tree *tr, int taxa) -{ - int n; - Node *dp, *up; - - free(heights); - free_dmatrix(tr->condlkl); - for (n = 0; n < taxa; n++) { - dp = tr->ebrnchp[n]; - up = dp->kinp; - free_ivector(dp->paths); - free_dcube(up->partials); - free(dp); - free(up); - } - free(tr->ebrnchp); - for (n = 0; n < (taxa-3); n++) { - dp = tr->ibrnchp[n]; - up = dp->kinp; - free_dcube(dp->partials); - free_dcube(up->partials); - free_ivector(dp->paths); - free(dp); - free(up); - } - free(tr->ibrnchp); - free(tr); - free_dvector(Brnlength); /* branch lengths (for LS estimation) */ - free_dvector(Distanvec); /* distances (for LS estimation) */ -} - - -/* make (a,b)-(c,d) quartet - - a ---+ +--- c - +-----+ - b ---+ +--- d - - species numbers range from 0 to Maxspc - 1 */ - -void make_quartet(int a, int b, int c, int d) -{ - /* place sequences */ - Ctree->ebrnchp[0]->eprob = Seqpat[a]; - Ctree->ebrnchp[1]->eprob = Seqpat[b]; - Ctree->ebrnchp[2]->eprob = Seqpat[c]; - Ctree->ebrnchp[3]->eprob = Seqpat[d]; - - /* make distance vector */ - Distanvec[0] = Distanmat[b][a]; - Distanvec[1] = Distanmat[c][a]; - Distanvec[2] = Distanmat[c][b]; - Distanvec[3] = Distanmat[d][a]; - Distanvec[4] = Distanmat[d][b]; - Distanvec[5] = Distanmat[d][c]; -} - -/* write distance matrix as vector */ -void changedistan(dmatrix distanmat, dvector distanvec, int numspc) -{ - int i, j, k; - - for (k = 0, i = 1; i < numspc; i++) { - for (j = 0; j < i; j++, k++) - distanvec[k] = distanmat[i][j]; - } -} - - -/******************************************************************************/ -/* computation of maximum likelihood tree */ -/******************************************************************************/ - - -/* compute the likelihood for (a,b)-(c,d) quartet */ -double quartet_lklhd(int a, int b, int c, int d) -{ - /* reserve memory for quartet if necessary */ - if (mlmode != 1) { /* no quartet tree */ - if (Ctree != NULL) - free_tree(Ctree, Numspc); - Ctree = new_quartet(Numptrn, Seqpat); - Numbrnch = NUMQBRNCH; - Numibrnch = NUMQIBRNCH; - Numspc = NUMQSPC; - mlmode = 1; - } - - /* make (a,b)-(c,d) quartet */ - make_quartet(a,b,c,d); - - clockmode = 0; /* nonclocklike branch lengths */ - - /* least square estimate for branch length */ - lslength(Ctree, Distanvec, Numspc, Numibrnch, Brnlength); - - /* compute likelihood */ - Ctree->lklhd = optlkl(Ctree); - - return Ctree->lklhd; -} - - -/* compute the approximate likelihood for (a,b)-(c,d) quartet */ -double quartet_alklhd(int a, int b, int c, int d) -{ - /* reserve memory for quartet if necessary */ - if (mlmode != 1) { /* no quartet tree */ - if (Ctree != NULL) - free_tree(Ctree, Numspc); - Ctree = new_quartet(Numptrn, Seqpat); - Numbrnch = NUMQBRNCH; - Numibrnch = NUMQIBRNCH; - Numspc = NUMQSPC; - mlmode = 1; - } - - /* make (a,b)-(c,d) quartet */ - make_quartet(a,b,c,d); - - clockmode = 0; /* nonclocklike branch lengths */ - - /* least square estimate for branch length */ - lslength(Ctree, Distanvec, Numspc, Numibrnch, Brnlength); - - /* compute likelihood */ - Ctree->lklhd = treelkl(Ctree); - - return Ctree->lklhd; -} - - -/* read usertree from file to memory */ -void readusertree(FILE *ifp) -{ - /* reserve memory for tree if necessary */ - if (mlmode != 2) { /* no tree */ - if (Ctree != NULL) - free_tree(Ctree, Numspc); - Ctree = new_tree(Maxspc, Numptrn, Seqpat); - Numbrnch = 2*Maxspc-3; - Numibrnch = Maxspc-3; - Numspc = Maxspc; - mlmode = 2; - } - - /* read tree */ - makeusertree(ifp); -} - - -/* compute the likelihood of a usertree */ -double usertree_lklhd() -{ - /* be sure to have a usertree in memory and - to have pairwise distances computed */ - - clockmode = 0; /* nonclocklike branch lengths */ - - /* least square estimate for branch length */ - changedistan(Distanmat, Distanvec, Numspc); - lslength(Ctree, Distanvec, Numspc, Numibrnch, Brnlength); - - /* compute likelihood */ - Ctree->lklhd = optlkl(Ctree); - - return Ctree->lklhd; -} - - -/* compute the approximate likelihood of a usertree */ -double usertree_alklhd() -{ - /* be sure to have a usertree in memory and - to have pairwise distances computed */ - - clockmode = 0; /* nonclocklike branch lengths */ - - /* least square estimate for branch length */ - changedistan(Distanmat, Distanvec, Numspc); - lslength(Ctree, Distanvec, Numspc, Numibrnch, Brnlength); - - /* compute likelihood */ - Ctree->lklhd = treelkl(Ctree); - - return Ctree->lklhd; -} - - -/* preparation for ML analysis */ -void mlstart() -{ - /* number of states and code length */ - tpmradix = gettpmradix(); - - /* declare variables */ - Eval = new_dvector(tpmradix); - Evec = new_dmatrix(tpmradix,tpmradix); - Ievc = new_dmatrix(tpmradix,tpmradix); - iexp = new_dmatrix(tpmradix,tpmradix); - Alias = new_ivector(Maxsite); - - /* process sequence information */ - evaluateseqs(); - bestrate = new_ivector(Numptrn); - - /* compute transition probability matrix */ - tranprobmat(); - - /* non-zero rate categories */ - Rates = new_dvector(numcats); - updaterates(); - ltprobr = new_dcube(numcats, tpmradix,tpmradix); - - /* compute distance matrix */ - Distanmat = new_dmatrix(Maxspc, Maxspc); - initdistan(); - - /* initialize tree pointer for quartet tree */ - mlmode = 1; - Ctree = new_quartet(Numptrn, Seqpat); - Numbrnch = NUMQBRNCH; - Numibrnch = NUMQIBRNCH; - Numspc = NUMQSPC; - - /* computing ML distances */ - computedistan(); -} - - -/* recompute ml distances for quartet only */ -void distupdate(int a, int b, int c, int d) -{ - /* update distance matrix */ - /* consider only entries relevant to quartet */ - Distanmat[a][b] = mldistance(a, b); - Distanmat[b][a] = Distanmat[a][b]; - Distanmat[a][c] = mldistance(a, c); - Distanmat[c][a] = Distanmat[a][c]; - Distanmat[a][d] = mldistance(a, d); - Distanmat[d][a] = Distanmat[a][d]; - Distanmat[b][c] = mldistance(b, c); - Distanmat[c][b] = Distanmat[b][c]; - Distanmat[b][d] = mldistance(b, d); - Distanmat[d][b] = Distanmat[b][d]; - Distanmat[c][d] = mldistance(c, d); - Distanmat[d][c] = Distanmat[c][d]; -} - - -/* cleanup after ML analysis */ -void mlfinish() -{ - if (Ctree != NULL) - free_tree(Ctree, Numspc); - free_ivector(bestrate); - free_ivector(Alias); - free_cmatrix(Seqpat); - free_ivector(constpat); - free_ivector(Weight); - free_dmatrix(Distanmat); - free_dvector(Eval); - free_dmatrix(Evec); - free_dmatrix(Ievc); - free_dvector(Rates); - free_dcube(ltprobr); - free_dmatrix(iexp); -} - - -/******************************************************************************/ -/* tree output */ -/******************************************************************************/ - - -#define MAXOVER 50 -#define MAXLENG 30 -#define MAXCOLUMN 80 - - -void prbranch(Node *up, int depth, int m, int maxm, - ivector umbrella, ivector column, FILE *outfp) -{ - int i, num, n, maxn, lim; - Node *cp; - char bch; - - if ((int)((clockmode ? up->lengthc : up->length) * Proportion) >= MAXOVER) { - column[depth] = MAXLENG; - bch = '+'; - } else { - column[depth] = (int)((clockmode ? up->lengthc : up->length) * Proportion) + 3; - bch = '-'; - } - - if (up->isop == NULL) { /* external branch */ - num = up->number + 1; /* offset */ - if (m == 1) umbrella[depth - 1] = TRUE; - for (i = 0; i < depth; i++) { - if (umbrella[i]) - fprintf(outfp, "%*c", column[i], ':'); - else - fprintf(outfp, "%*c", column[i], ' '); - } - if (m == maxm) - umbrella[depth - 1] = FALSE; - for (i = 0, lim = column[depth] - 3; i < lim; i++) - fputc(bch, outfp); - fprintf(outfp, "-%d ", num); - - fputid(outfp, up->number); - - - fputc('\n', outfp); - fputc(' ', outfp); - return; - } - - num = up->number + 1 + Numspc; /* offset, internal branch */ - for (cp = up->isop, maxn = 0; cp != up; cp = cp->isop, maxn++) - ; - for (cp = up->isop, n = 1; cp != up; cp = cp->isop, n++) { - prbranch(cp->kinp, depth + 1, n, maxn, umbrella, column, outfp); - if (m == 1 && n == maxn / 2) umbrella[depth - 1] = TRUE; - if (n != maxn) { - for (i = 0; i < depth; i++) { - if (umbrella[i]) - fprintf(outfp, "%*c", column[i], ':'); - else - fprintf(outfp, "%*c", column[i], ' '); - } - if (n == maxn / 2) { /* internal branch */ - for (i = 0, lim = column[depth] - 3; i < lim; i++) - fputc(bch, outfp); - if (num < 10) - fprintf(outfp, "--%d", num); - else if (num < 100) - fprintf(outfp, "-%2d", num); - else - fprintf(outfp, "%3d", num); - } else { - if (umbrella[depth]) - fprintf(outfp, "%*c", column[depth], ':'); - else - fprintf(outfp, "%*c", column[depth], ' '); - } - fputc('\n', outfp); - fputc(' ', outfp); - } - if (m == maxm) umbrella[depth - 1] = FALSE; - } - return; -} - - -void getproportion(double *proportion, dvector distanvec, int numspc) -{ - int i, maxpair; - double maxdis; - - maxpair = (numspc*(numspc-1))/2; - - maxdis = 0.0; - for (i = 0; i < maxpair; i++) { - if (distanvec[i] > maxdis) { - maxdis = distanvec[i]; - } - } - *proportion = (double) MAXCOLUMN / (maxdis * 3.0); - if (*proportion > 1.0) *proportion = 1.0; -} - - -void prtopology(FILE *outfp) -{ - int n, maxn, depth; - ivector umbrella; - ivector column; - Node *cp, *rp; - - getproportion(&Proportion, Distanvec, Numspc); - - umbrella = new_ivector(Numspc); - column = new_ivector(Numspc); - - for (n = 0; n < Numspc; n++) { - umbrella[n] = FALSE; - column[n] = 3; - } - column[0] = 1; - - fputc(' ', outfp); - - /* original code: rp = Ctree->rootp */ - /* but we want to print the first group in the - trichotomy as outgroup at the bottom! */ - rp = Ctree->rootp->isop; - - for (maxn = 1, cp = rp->isop; cp != rp; cp = cp->isop, maxn++) - ; - depth = 1; - n = 0; - - cp = rp; - do { - cp = cp->isop; - n++; - prbranch(cp->kinp, depth, n, maxn, umbrella, column, outfp); - if (cp != rp) fprintf(outfp, "%*c\n ", column[0], ':'); - } while (cp != rp); - - free_ivector(umbrella); - free_ivector(column); -} - - -/* print unrooted tree file with branch lengths */ -void fputphylogeny(FILE *fp) -{ - Node *cp, *rp; - int n; - - cp = rp = Ctree->rootp; - putc('(', fp); - n = 1; - do { - cp = cp->isop->kinp; - if (cp->isop == NULL) { /* external node */ - if (n > 60) { - fprintf(fp, "\n"); - n = 2; - } - n += fputid(fp, cp->number); - fprintf(fp, ":%.5f", ((clockmode ? cp->lengthc : cp->length))*0.01); - n += 7; - cp = cp->kinp; - } else { /* internal node */ - if (cp->descen) { - if (n > 60) { - fprintf(fp, "\n"); - n = 1; - } - putc('(', fp); - n++; - } else { - putc(')', fp); - n++; - if (n > 60) { - fprintf(fp, "\n"); - n = 1; - } - /* internal label */ - if (cp->kinp->label != NULL) { - fprintf(fp, "%s", cp->kinp->label); - n += strlen(cp->kinp->label); - } - fprintf(fp, ":%.5f", ((clockmode ? cp->lengthc : cp->length))*0.01); - n += 7; - } - } - if (!cp->descen && !cp->isop->descen && cp != rp) { - putc(',', fp); /* not last subtree */ - n++; - } - } while (cp != rp); - fprintf(fp, ")"); - /* internal label */ - if (cp->label != NULL) - fprintf(fp, "%s", cp->label); - fprintf(fp, ";\n"); -} - - -void resulttree(FILE *outfp) -{ - int n, ne, closeflag; - Node *ep, *ip; - double blen; - - closeflag = FALSE; - - if (clockmode) { - fprintf(outfp, "\n branch length nc/c"); - fprintf(outfp, " branch length nc/c (= non-clock/clock)\n"); - } else { - fprintf(outfp, "\n branch length S.E."); - fprintf(outfp, " branch length S.E.\n"); - } - for (n = 0; n < Numspc; n++) { - ep = Ctree->ebrnchp[n]; - ne = ep->number; - fputid10(outfp, ne); - fputs(" ", outfp); - fprintf(outfp, "%3d", ne + 1); - blen = (clockmode ? ep->lengthc : ep->length); - fprintf(outfp, "%9.5f", blen*0.01); - if (blen < 5.0*MINARC || blen > 0.95*MAXARC) closeflag = TRUE; - if (clockmode) - fprintf(outfp, "%9.3f", (ep->length)/(ep->lengthc)); - else - fprintf(outfp, "%9.5f", 0.01*sqrt(ep->kinp->varlen)); - if (n < Numibrnch) { - ip = Ctree->ibrnchp[n]; - fprintf(outfp, "%8d", n + 1 + Numspc); - blen = (clockmode ? ip->lengthc : ip->length); - fprintf(outfp, "%9.5f", blen*0.01); - if (blen < 5.0*MINARC || blen > 0.95*MAXARC) closeflag = TRUE; - if (clockmode) - fprintf(outfp, "%9.3f", (ip->length)/(ip->lengthc)); - else - fprintf(outfp, "%9.5f", 0.01*sqrt(ip->kinp->varlen)); - fputc('\n', outfp); - } else { - if (n == Numspc - 3) { - fputc('\n', outfp); - } else if (n == Numspc - 2) { - if (clockmode) { - if (!Convergc) - fprintf(outfp, " No convergence after %d iterations!\n", Numitc); - else - fprintf(outfp, " %d iterations until convergence\n", Numitc); - } else { - if (!Converg) - fprintf(outfp, " No convergence after %d iterations!\n", Numit); - else - fprintf(outfp, " %d iterations until convergence\n", Numit); - } - } else if (n == Numspc - 1) { - fprintf(outfp, " log L: %.2f\n", (clockmode ? Ctree->lklhdc : Ctree->lklhd)); - } else { - fputc('\n', outfp); - } - } - } - if(closeflag) - fprintf(outfp, "\nWARNING --- at least one branch length is close to an internal boundary!\n"); -} - - -/******************************************************************************/ -/* Neighbor-joining tree */ -/******************************************************************************/ - - -/* compute NJ tree and write to file */ -void njtree(FILE *fp) -{ - /* reserve memory for tree if necessary */ - if (mlmode != 3) { /* no tree */ - if (Ctree != NULL) - free_tree(Ctree, Numspc); - Ctree = new_tree(Maxspc, Numptrn, Seqpat); - Numbrnch = 2*Maxspc-3; - Numibrnch = Maxspc-3; - Numspc = Maxspc; - mlmode = 3; - } - - /* construct NJ tree from distance matrix */ - njdistantree(Ctree); - - fputphylogeny(fp); -} - - -/* construct NJ tree from distance matrix */ -void njdistantree(Tree *tr) -{ - int i, j, otui=0, otuj=0, otuk, nsp2, cinode, step, restsp, k; - double dij, bix, bjx, bkx, sij, smax, dnsp2; - dvector r; - dmatrix distan; - Node **psotu, *cp, *ip, *jp, *kp; - - distan = new_dmatrix(Maxspc,Maxspc); - for (i = 0; i < Maxspc; i++) - for (j = 0; j < Maxspc; j++) - distan[i][j] = Distanmat[i][j]; - - nsp2 = Maxspc - 2; - dnsp2 = 1.0 / nsp2; - - r = new_dvector(Maxspc); - - psotu = (Node **) malloc((unsigned)Maxspc * sizeof(Node *)); - if (psotu == NULL) maerror("psotu in njdistantree"); - - /* external branches are start OTUs */ - for (i = 0; i < Maxspc; i++) - psotu[i] = tr->ebrnchp[i]->kinp; - - restsp = Maxspc; - cinode = 0; /* counter for internal nodes */ - - for (step = 0; restsp > 3; step++) { /* NJ clustering steps */ - - for (i = 0; i < Maxspc; i++) { - if (psotu[i] != NULL) { - for (j = 0, r[i] = 0.0; j < Maxspc; j++) - if (psotu[j] != NULL) - r[i] += distan[i][j]; - } - } - - smax = -1.0; - for (i = 0; i < Maxspc-1; i++) { - if (psotu[i] != NULL) { - - for (j = i+1; j < Maxspc; j++) { - if (psotu[j] != NULL) - { - sij = ( r[i] + r[j] ) * dnsp2 - distan[i][j]; - - if (sij > smax) { - smax = sij; - otui = i; - otuj = j; - } - } - } - } - } - - /* new pair: otui and otuj */ - - dij = distan[otui][otuj]; - bix = (dij + r[otui]/nsp2 - r[otuj]/nsp2) * 0.5; - bjx = dij - bix; - - cp = tr->ibrnchp[cinode]; - - ip = psotu[otui]; - jp = psotu[otuj]; - cp->isop = ip; - ip->isop = jp; - jp->isop = cp; - ip->length = bix; - jp->length = bjx; - ip->kinp->length = ip->length; - jp->kinp->length = jp->length; - - cp = cp->kinp; - - for (k = 0; k < Maxspc; k++) - { - if (psotu[k] != NULL && k != otui && k != otuj) - { - dij = (distan[otui][k] + distan[otuj][k] - distan[otui][otuj]) * 0.5; - distan[otui][k] = dij; - distan[k][otui] = dij; - } - } - distan[otui][otui] = 0.0; - - psotu[otui] = cp; - psotu[otuj] = NULL; - - cinode++; - - restsp--; - nsp2--; - dnsp2 = 1.0 / nsp2; - } - - otui = otuj = otuk = -1; - for (i = 0; i < Maxspc; i++) - { - if (psotu[i] != NULL) { - if (otui == -1) otui = i; - else if (otuj == -1) otuj = i; - else otuk = i; - } - } - bix = (distan[otui][otuj] + distan[otui][otuk] - distan[otuj][otuk]) * 0.5; - bjx = distan[otui][otuj] - bix; - bkx = distan[otui][otuk] - bix; - ip = psotu[otui]; - jp = psotu[otuj]; - kp = psotu[otuk]; - ip->isop = jp; - jp->isop = kp; - kp->isop = ip; - ip->length = bix; - jp->length = bjx; - kp->length = bkx; - ip->kinp->length = ip->length; - jp->kinp->length = jp->length; - kp->kinp->length = kp->length; - - tr->rootp = kp; - - free_dvector(r); - free_dmatrix(distan); - free((Node *) psotu); -} - -/******************************************************************************/ -/* find best assignment of rate categories */ -/******************************************************************************/ - -/* find best assignment of rate categories */ -void findbestratecombination() -{ - int k, u; - double bestvalue, fv2; - dvector catprob; - dmatrix cdl; - - cdl = Ctree->condlkl; - catprob = new_dvector(numcats+1); - fv2 = (1.0-fracinv)/(double) numcats; - - for (k = 0; k < Numptrn; k++) { - /* zero rate */ - if (constpat[k] == TRUE) - catprob[0] = fracinv*Freqtpm[(int) Seqpat[0][k]]; - else - catprob[0] = 0.0; - /* non-zero-rates */ - for (u = 1; u < numcats+1; u++) - catprob[u] = fv2*cdl[u-1][k]; - /* find best */ - bestvalue = catprob[0]; - bestrate[k] = 0; - for (u = 1; u < numcats+1; u++) - if (catprob[u] >= bestvalue) { - bestvalue = catprob[u]; - bestrate[k] = u; - } - } - free_dvector(catprob); - bestratefound = 1; -} - -/* print best assignment of rate categories */ -void printbestratecombination(FILE *fp) -{ - int s, k; - - for (s = 0; s < Maxsite; s++) { - k = Alias[s]; - fprintf(fp, "%2d", bestrate[k]); - if ((s+1) % 30 == 0) - fprintf(fp, "\n"); - else if ((s+1) % 10 == 0) - fprintf(fp, " "); - } - if (s % 70 != 0) - fprintf(fp, "\n"); -} - - -/******************************************************************************/ -/* computation of clocklike branch lengths */ -/******************************************************************************/ - -/* checks wether e is a valid edge specification */ -int checkedge(int e) -{ - /* there are Numspc external branches: - 0 - Numspc-1 - there are Numibrnch internal branches: - Numspc - Numspc+Numibrnch-1 - */ - - if (e < 0) return FALSE; - if (e < Numspc+Numibrnch) return TRUE; - else return FALSE; -} - -/* print topology of subtree */ -void fputsubstree(FILE *fp, Node *ip) -{ - Node *cp; - - if (ip->isop == NULL) { /* terminal nodes */ - numtc += fputid(fp, ip->number); - } else { - cp = ip; - fprintf(fp, "("); - numtc += 1; - do { - cp = cp->isop->kinp; - if (cp->isop == NULL) { /* external node */ - numtc += fputid(fp, cp->number); - fprintf(fp, ":%.5f", (cp->lengthc)*0.01); - numtc += 7; - cp = cp->kinp; - } else { /* internal node */ - if (cp->height > 0.0) { - fprintf(fp, "("); - numtc += 1; - } else if (cp->height < 0.0) { - fprintf(fp, ")"); - numtc += 1; - if (numtc > 60) { - fprintf(fp, "\n"); - numtc = 1; - } - /* internal label */ - if (cp->kinp->label != NULL) { - fprintf(fp, "%s", cp->kinp->label); - numtc += strlen(cp->kinp->label); - } - if (numtc > 60) { - fprintf(fp, "\n"); - numtc = 1; - } - fprintf(fp, ":%.5f", (cp->lengthc)*0.01); - numtc += 6; - if (numtc > 60) { - fprintf(fp, "\n"); - numtc = 1; - } - } - } - if (cp->height <= 0.0 && cp->isop->height <= 0.0 && - cp->isop != ip) { - putc(',', fp); /* not last subtree */ - numtc += 1; - if (numtc > 60) { - fprintf(fp, "\n"); - numtc = 1; - } - } - } while (cp->isop != ip); - fprintf(fp, ")"); - numtc += 1; - } - if (numtc > 60) { - fprintf(fp, "\n"); - numtc = 1; - } - -} - -/* print rooted tree file */ -void fputrooted(FILE *fp, int e) -{ - Node *rootbr; - - /* to be called only after clocklike branch - lengths have been computed */ - - /* pointer to root branch */ - if (e < Numspc) rootbr = Ctree->ebrnchp[e]; - else rootbr = Ctree->ibrnchp[e - Numspc]; - - fprintf(fp, "("); - numtc = 2; - fputsubstree(fp, rootbr); - /* internal label */ - if (rootbr->label != NULL) { - fprintf(fp, "%s", rootbr->label); - numtc += strlen(rootbr->label); - } - if (numtc > 60) { - fprintf(fp, "\n"); - numtc = 1; - } - fprintf(fp, ":%.5f,", (hroot - rootbr->height)*0.01); - numtc += 7; - if (numtc > 60) { - fprintf(fp, "\n"); - numtc = 1; - } - fputsubstree(fp, rootbr->kinp); - /* internal label */ - if (rootbr->kinp->label != NULL) { - fprintf(fp, "%s", rootbr->kinp->label); - numtc += strlen(rootbr->kinp->label); - } - if (numtc > 60) { - fprintf(fp, "\n"); - numtc = 1; - } - fprintf(fp, ":%.5f);\n", (hroot - rootbr->kinp->height)*0.01); -} - -/* finds heights in subtree */ -void findheights(Node *ip) -{ - Node *cp, *rp; - - if (ip->isop != NULL) { /* forget terminal nodes */ - - cp = ip; - - /* initialise node */ - cp->height = 1.0; /* up */ - rp = cp; - while (rp->isop != cp) { - rp = rp->isop; - rp->height = -1.0; /* down */ - } - - do { - cp = cp->isop->kinp; - if (cp->isop == NULL) { /* external node */ - cp = cp->kinp; - } else { /* internal node */ - if (cp->height == 0.0) { /* node not yet visited */ - cp->height = 1.0; /* up */ - rp = cp; - while (rp->isop != cp) { - rp = rp->isop; - rp->height = -1.0; /* down */ - } - } else if (cp->kinp->height == 1.0) { - /* cp->kinp is next height pointer */ - heights[Numhts] = cp->kinp; - Numhts++; - } - } - } while (cp->isop != ip); - /* ip is last height pointer */ - heights[Numhts] = ip; - Numhts++; - } -} - - -/* initialise clocklike branch lengths (with root on edge e) */ -void initclock(int e) -{ - int n, h, count; - Node *cp, *rp; - double sum, minh, aveh, len; - - /* be sure to have a Ctree in memory and - to have pairwise distances computed */ - - clockmode = 1; /* clocklike branch lengths */ - - /* least square estimate for branch length */ - changedistan(Distanmat, Distanvec, Numspc); - lslength(Ctree, Distanvec, Numspc, Numibrnch, Brnlength); - - /* pointer to root branch */ - if (e < Numspc) rootbr = Ctree->ebrnchp[e]; - else rootbr = Ctree->ibrnchp[e - Numspc]; - - /* clear all heights */ - for (n = 0; n < Numspc; n++) { - Ctree->ebrnchp[n]->height = 0.0; - Ctree->ebrnchp[n]->kinp->height = 0.0; - Ctree->ebrnchp[n]->varheight = 0.0; - Ctree->ebrnchp[n]->kinp->varheight = 0.0; - if (n < Numibrnch) { - Ctree->ibrnchp[n]->height = 0.0; - Ctree->ibrnchp[n]->kinp->height = 0.0; - Ctree->ibrnchp[n]->varheight = 0.0; - Ctree->ibrnchp[n]->kinp->varheight = 0.0; - } - } - - /* collect pointers to height nodes */ - Numhts = 0; - findheights(rootbr); /* one side */ - findheights(rootbr->kinp); /* other side */ - - /* assign preliminary approximate heights and - corresponding branch lengths */ - for (h = 0; h < Numhts; h++) { - - cp = rp = heights[h]; - sum = 0; - count = 0; - minh = 0.0; - while (rp->isop != cp) { - count++; - rp = rp->isop; - sum += rp->lengthc + rp->kinp->height; - if (rp->kinp->height > minh) minh = rp->kinp->height; - } - aveh = sum / (double) count; - if (aveh < minh + MINARC) aveh = minh + MINARC; - cp->height = aveh; - rp = cp; - while (rp->isop != cp) { - rp = rp->isop; - len = aveh - rp->kinp->height; - rp->kinp->lengthc = len; - rp->lengthc = len; - } - - } - if (rootbr->height > rootbr->kinp->height) minh = rootbr->height; - else minh = rootbr->kinp->height; - aveh = 0.5*(rootbr->lengthc + rootbr->height + rootbr->kinp->height); - if (aveh < minh + MINARC) aveh = minh + MINARC; - hroot = aveh; - maxhroot = RMHROOT*hroot; /* maximal possible hroot */ - len = (hroot - rootbr->height) + (hroot - rootbr->kinp->height); - rootbr->lengthc = len; - rootbr->kinp->lengthc = len; -} - -/* approximate likelihood under the constaining assumption of - clocklike branch lengths (with root on edge e) */ -double clock_alklhd(int e) -{ - initclock(e); - Ctree->lklhdc = treelkl(Ctree); - - return Ctree->lklhdc; -} - -/* log-likelihood given height ht at node pointed to by chep */ -double heightlkl(double ht) -{ - Node *rp; - double len; - - /* adjust branch lengths */ - chep->height = ht; - /* descendent branches */ - rp = chep; - while (rp->isop != chep) { - rp = rp->isop; - len = chep->height - rp->kinp->height; - rp->kinp->lengthc = len; - rp->lengthc = len; - } - /* upward branch */ - if (chep == rootbr || chep->kinp == rootbr) { - len = (hroot - chep->height) + (hroot - chep->kinp->height); - chep->lengthc = len; - chep->kinp->lengthc = len; - } else { - rp = chep->kinp; - while (rp->isop->height <= 0.0) - rp = rp->isop; - chep->lengthc = rp->isop->height - chep->height; - chep->kinp->lengthc = rp->isop->height - chep->height; - } - - /* compute likelihood */ - Ctree->lklhdc = treelkl(Ctree); - - return -(Ctree->lklhdc); /* we use a minimizing procedure */ -} - -/* optimize current height */ -void optheight(void) -{ - double he, fx, f2x, minh, maxh, len; - Node *rp; - - /* current height */ - he = chep->height; - - /* minimum */ - minh = 0.0; - rp = chep; - while (rp->isop != chep) { - rp = rp->isop; - if (rp->kinp->height > minh) - minh = rp->kinp->height; - } - minh += MINARC; - - /* maximum */ - if (chep == rootbr || chep->kinp == rootbr) { - maxh = hroot; - } else { - rp = chep->kinp; - while (rp->isop->height <= 0.0) - rp = rp->isop; - maxh = rp->isop->height; - } - maxh -= MINARC; - - /* check borders for height */ - if (he < minh) he = minh; - if (he > maxh) he = maxh; - - /* optimization */ - if (!(he == minh && he == maxh)) - he = onedimenmin(minh, he, maxh, heightlkl, HEPSILON, &fx, &f2x); - - /* variance of height */ - f2x = fabs(f2x); - if (1.0/(maxhroot*maxhroot) < f2x) - chep->varheight = 1.0/f2x; - else - chep->varheight = maxhroot*maxhroot; - - /* adjust branch lengths */ - chep->height = he; - /* descendent branches */ - rp = chep; - while (rp->isop != chep) { - rp = rp->isop; - len = chep->height - rp->kinp->height; - rp->kinp->lengthc = len; - rp->lengthc = len; - } - /* upward branch */ - if (chep == rootbr || chep->kinp == rootbr) { - len = (hroot - chep->height) + (hroot - chep->kinp->height); - chep->lengthc = len; - chep->kinp->lengthc = len; - } else { - rp = chep->kinp; - while (rp->isop->height <= 0.0) - rp = rp->isop; - chep->lengthc = rp->isop->height - chep->height; - chep->kinp->lengthc = rp->isop->height - chep->height; - } -} - -/* log-likelihood given height ht at root */ -double rheightlkl(double ht) -{ - double len; - - /* adjust branch lengths */ - hroot = ht; - len = (hroot - rootbr->height) + (hroot - rootbr->kinp->height); - rootbr->lengthc = len; - rootbr->kinp->lengthc = len; - - /* compute likelihood */ - Ctree->lklhdc = treelkl(Ctree); - - return -(Ctree->lklhdc); /* we use a minimizing procedure */ -} - -/* optimize height of root */ -void optrheight(void) -{ - double he, fx, f2x, minh, len; - - /* current height */ - he = hroot; - - /* minimum */ - if (rootbr->height > rootbr->kinp->height) - minh = rootbr->height; - else - minh = rootbr->kinp->height; - minh += MINARC; - - /* check borders for height */ - if (he < minh) he = minh; - if (he > maxhroot) he = maxhroot; - - /* optimization */ - he = onedimenmin(minh, he, maxhroot, rheightlkl, HEPSILON, &fx, &f2x); - - /* variance of height of root */ - f2x = fabs(f2x); - if (1.0/(maxhroot*maxhroot) < f2x) - varhroot = 1.0/f2x; - else - varhroot = maxhroot*maxhroot; - - /* adjust branch lengths */ - hroot = he; - len = (hroot - rootbr->height) + (hroot - rootbr->kinp->height); - rootbr->lengthc = len; - rootbr->kinp->lengthc = len; -} - -/* exact likelihood under the constaining assumption of - clocklike branch lengths (with root on edge e) */ -double clock_lklhd(int e) -{ - int h, nconv; - double old; - - Numitc = 0; - Convergc = FALSE; - - initclock(e); - - do { - - Numitc++; - nconv = 0; - - /* optimize height of root */ - old = hroot; - optrheight(); - if (fabs(old - hroot) < HEPSILON) nconv++; - - /* optimize height of nodes */ - for (h = Numhts-1; h >= 0; h--) { - - /* pointer chep to current height node */ - chep = heights[h]; - - /* store old value */ - old = chep->height; - - /* find better height */ - optheight(); - - /* converged ? */ - if (fabs(old - chep->height) < HEPSILON) nconv++; - } - - if (nconv == Numhts+1) Convergc = TRUE; - - } while (Numitc < MAXIT && !Convergc); - - /* compute final likelihood */ - Ctree->lklhdc = treelkl(Ctree); - - return Ctree->lklhdc; -} - -/* find out the edge containing the root */ -int findrootedge() -{ - int e, ebest; - double logbest, logtest; - - /* compute the likelihood for all edges and take the edge with - best likelihood (using approximate ML) */ - - ebest = 0; - logbest = clock_alklhd(0); - numbestroot = 1; - for (e = 1; e < Numspc+Numibrnch; e++) { - logtest = clock_alklhd(e); - if (logtest > logbest) { - ebest = e; - logbest = logtest; - numbestroot = 1; - } else if (logtest == logbest) { - numbestroot++; - } - } - - return ebest; -} - -/* show heights and corresponding standard errors */ -void resultheights(FILE *fp) -{ - int h, num; - Node *cp; - - fprintf(fp, " height S.E. of node common to branches\n"); - for (h = 0; h < Numhts; h++) { - fprintf(fp, "%.5f %.5f ", (heights[h]->height)*0.01, - sqrt(heights[h]->varheight)*0.01); - cp = heights[h]; - do { - num = (cp->number) + 1; - if (cp->kinp->isop != NULL) num += Numspc; /* internal branch */ - fprintf(fp, "%d ", num); - cp = cp->isop; - } while (cp != heights[h]); - fprintf(fp, "\n"); - - } - fprintf(fp, "%.5f %.5f of root at branch %d\n", - hroot*0.01, sqrt(varhroot)*0.01, locroot+1); -} - diff --git a/forester/archive/RIO/others/puzzle_mod/src/ml3.c b/forester/archive/RIO/others/puzzle_mod/src/ml3.c deleted file mode 100644 index a68a054..0000000 --- a/forester/archive/RIO/others/puzzle_mod/src/ml3.c +++ /dev/null @@ -1,350 +0,0 @@ -/* - * ml3.c - * - * - * Part of TREE-PUZZLE 5.0 (June 2000) - * - * (c) 1999-2000 by Heiko A. Schmidt, Korbinian Strimmer, - * M. Vingron, and Arndt von Haeseler - * (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler - * - * All parts of the source except where indicated are distributed under - * the GNU public licence. See http://www.opensource.org for details. - */ - - -#define EXTERN extern - - -/* prototypes */ -#include -#include -#include -#include "util.h" -#include "ml.h" -#include "gamma.h" - - - -/******************************************************************************/ -/* discrete Gamma-distribution and related stuff */ -/******************************************************************************/ - -/* compare general base frequencies with frequencies of taxon i with chi square */ -double homogentest(int taxon) -{ - return chi2test(Freqtpm, Basecomp[taxon], gettpmradix(), &chi2fail); -} - - -/* discrete Gamma according to Yang 1994 (JME 39:306-314) */ -void YangDiscreteGamma (double shape, int c, dvector x) -{ - double twoc, mu; - int i; - - twoc = 2.0*c; - mu = 0.0; - for (i = 0; i < c; i++) - { - /* corresponding rates */ - x[i] = icdfGamma ( (2.0*i+1.0)/twoc, shape); - mu += x[i]; - } - mu = mu/c; - - /* rescale for avarage rate of 1.0 */ - for (i = 0; i < c; i++) - { - x[i] /= mu; - } -} - -/* compute rates of each category when rates are Gamma-distributed */ -void updaterates() -{ - int i; - double alpha; - - if (numcats == 1) - { - Rates[0] = 1.0; - return; - } - if (Geta == 0.0) - { - for (i = 0; i < numcats; i++) - Rates[i] = 1.0; - return; - } - alpha = (1.0 - Geta)/Geta; - - YangDiscreteGamma (alpha, numcats, Rates); - - /* if invariable sites are present */ - for (i = 0; i < numcats; i++) - Rates[i] = Rates[i]/(1.0-fracinv); - - /* check for very small rates */ - for (i = 0; i < numcats; i++) - if (Rates[i] < 0.000001) Rates[i] = 0.000001; -} - - - -/******************************************************************************/ -/* parameter estimation */ -/******************************************************************************/ - -/* compute sample mean and standard deviation of sample mean */ -void computestat(double *data, int n, double *mean, double *err) -{ - int i; - double sum; - - sum = 0; - for (i = 0; i < n; i++) sum += data[i]; - (*mean) = sum/(double) n; - - sum = 0; - for (i = 0; i < n; i++) sum += (data[i] - (*mean))*(data[i] - (*mean)); - if (n != 1) - (*err) = sqrt(sum)/sqrt((double)(n-1)*n); /* unbiased estimator */ - else - (*err) = 0.0; /* if n == 1 */ -} - -/* compute ML value of quartet (a,b,c,d) */ -double quartetml(int a, int b, int c, int d) -{ - double d1, d2, d3; - - /* compute ML for all topologies */ - if (approxp_optn) { /* approximate parameter mode */ - d1 = quartet_alklhd(a,b,c,d); /* (a,b)-(c,d) */ - d2 = quartet_alklhd(a,c,b,d); /* (a,c)-(b,d) */ - d3 = quartet_alklhd(a,d,b,c); /* (a,d)-(b,c) */ - } else { - d1 = quartet_lklhd(a,b,c,d); /* (a,b)-(c,d) */ - d2 = quartet_lklhd(a,c,b,d); /* (a,c)-(b,d) */ - d3 = quartet_lklhd(a,d,b,c); /* (a,d)-(b,c) */ - } - - /* looking for max(d1, d2, d3) */ - if (d1 < d2) { /* d2 > d1 */ - if (d2 < d3) { /* d3 > d2 > d1 */ - /* d3 maximum */ - return d3; - } else { /* d2 >= d3 > d1 */ - /* d2 maximum */ - return d2; - } - } else { /* d1 >= d2 */ - if (d1 < d3) { /* d3 > d1 >= d2 */ - /* d3 maximum */ - return d3; - } else { /* d1 >= d2 && d1 >= d3 */ - /* d1 maximum */ - return d1; - } - } -} - -/* optimization function TSparam - quartets */ -double opttsq(double x) -{ - if (x < MINTS) TSparam = MINTS; - else if (x > MAXTS) TSparam = MAXTS; - else TSparam = x; - tranprobmat(); - distupdate(qca, qcb, qcc, qcd); - return (-quartetml(qca, qcb, qcc, qcd)); -} - -/* optimization function YRparam - quartets */ -double optyrq(double x) -{ - if (x < MINYR) YRparam = MINYR; - else if (x > MAXYR) YRparam = MAXYR; - else YRparam = x; - tranprobmat(); - distupdate(qca, qcb, qcc, qcd); - return (-quartetml(qca, qcb, qcc, qcd)); -} - -/* estimate substitution process parameters - random quartets */ -void optimseqevolparamsq() -{ - double tsmeanold, yrmeanold; - dvector tslist, yrlist; - int fin; - ivector taxon; - uli minqts, maxqts, n; - - - taxon = new_ivector(4); - - /* number of quartets to be investigated */ - minqts = (uli) floor(0.25 * MINPERTAXUM * Maxspc) + 1; - maxqts = (uli) floor(0.25 * MAXPERTAXUM * Maxspc) + 1; - if (Maxspc == 4) { - minqts = (uli) 1; - maxqts = (uli) 1; - } - - tslist = new_dvector(maxqts); - yrlist = new_dvector(maxqts); - - /* initialize averages */ - tsmean = TSparam; - yrmean = YRparam; - - fin = FALSE; - - /* investigate maxqts random quartets */ - for (n = 0; n < maxqts; n++) { - - /* choose random quartet */ - chooser(Maxspc, 4, taxon); - - /* - * optimize parameters on this quartet - */ - - qca = taxon[0]; - qcb = taxon[1]; - qcc = taxon[2]; - qcd = taxon[3]; - - /* initialize start values with average value */ - if ((SH_optn || nuc_optn) && optim_optn && (data_optn == 0)) TSparam = tsmean; - if ((nuc_optn && TN_optn) && optim_optn && (data_optn == 0)) YRparam = yrmean; - - /* estimation */ - twodimenmin(PEPS1, - (SH_optn || nuc_optn) && optim_optn && (data_optn == 0), - MINTS, &TSparam, MAXTS, opttsq, &tserr, - (nuc_optn && TN_optn) && optim_optn && (data_optn == 0), - MINYR, &YRparam, MAXYR, optyrq, &yrerr); - - - tsmeanold = tsmean; - yrmeanold = yrmean; - tslist[n] = TSparam; - yrlist[n] = YRparam; - computestat(tslist, n+1 , &tsmean, &tserr); - computestat(yrlist, n+1 , &yrmean, &yrerr); - - /* check whether the means are converging */ - if (n > minqts-2) { - if ((fabs(tsmean-tsmeanold) < TSDIFF) && - (fabs(yrmean-yrmeanold) < YRDIFF)) - fin = TRUE; - } - - /* investigate at least minqts quartets */ - if (n > minqts-2 && (fin || n > maxqts-2)) break; - } - - /* round estimated numbers to 2 digits after the decimal point */ - if (tserr != 0.0) tsmean = floor(100.0*tsmean+0.5)/100.0; - if (yrerr != 0.0) yrmean = floor(100.0*yrmean+0.5)/100.0; - - /* update ML engine */ - TSparam = tsmean; - YRparam = yrmean; - tranprobmat(); - - free_ivector(taxon); -} - -/* optimization function TSparam - tree */ -double opttst(double x) -{ - double result; - - if (x < MINTS) TSparam = MINTS; - else if (x > MAXTS) TSparam = MAXTS; - else TSparam = x; - tranprobmat(); - computedistan(); - if (approxp_optn) result = usertree_alklhd(); - else result = usertree_lklhd(); - - return (-result); -} - -/* optimization function YRparam - tree */ -double optyrt(double x) -{ - double result; - - if (x < MINYR) YRparam = MINYR; - else if (x > MAXYR) YRparam = MAXYR; - else YRparam = x; - tranprobmat(); - computedistan(); - if (approxp_optn) result = usertree_alklhd(); - else result = usertree_lklhd(); - - return (-result); -} - - -/* optimize substitution process parameters - tree */ -void optimseqevolparamst() -{ - twodimenmin(PEPS1, - (SH_optn || nuc_optn) && optim_optn && (data_optn == 0), - MINTS, &TSparam, MAXTS, opttst, &tserr, - (nuc_optn && TN_optn) && optim_optn && (data_optn == 0), - MINYR, &YRparam, MAXYR, optyrt, &yrerr); -} - - -/* optimization function fracinv */ -double optfi(double x) -{ - double result; - - if (x < MINFI) fracinv = MINFI; - else if (x > MAXFI) fracinv = MAXFI; - else fracinv = x; - - computedistan(); - if (approxp_optn) result = usertree_alklhd(); - else result = usertree_lklhd(); - - return (-result); -} - - -/* optimization function Geta */ -double optge(double x) -{ - double result; - - if (x < MINGE) Geta = MINGE; - else if (x > MAXGE) Geta = MAXGE; - else Geta = x; - - updaterates(); - - computedistan(); - if (approxp_optn) result = usertree_alklhd(); - else result = usertree_lklhd(); - - return (-result); -} - - -/* optimize rate heterogeneity parameters */ -void optimrateparams() -{ - twodimenmin(PEPS2, - fracinv_optim, - MINFI, &fracinv, fracconst, optfi, &fierr, - grate_optim, - MINGE, &Geta, MAXGE, optge, &geerr); - -} diff --git a/forester/archive/RIO/others/puzzle_mod/src/model1.c b/forester/archive/RIO/others/puzzle_mod/src/model1.c deleted file mode 100644 index 54fb889..0000000 --- a/forester/archive/RIO/others/puzzle_mod/src/model1.c +++ /dev/null @@ -1,326 +0,0 @@ -/* - * model1.c - * - * - * Part of TREE-PUZZLE 5.0 (June 2000) - * - * (c) 1999-2000 by Heiko A. Schmidt, Korbinian Strimmer, - * M. Vingron, and Arndt von Haeseler - * (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler - * - * All parts of the source except where indicated are distributed under - * the GNU public licence. See http://www.opensource.org for details. - */ - - -/* definitions */ -#define EXTERN extern - -/* prototypes */ -#include -#include "util.h" -#include "ml.h" - -/* number of states of the selected model */ -int gettpmradix() -{ - if (data_optn == 0) { /* nucleotides */ - if (nuc_optn) return 4; - if (SH_optn) return 16; - } else if (data_optn == 1) { /* amino acids */ - return 20; - } else { /* two-state model */ - return 2; - } - return 1; -} - -/* relative transition frequencies */ -void rtfdata(dmatrix q, double *f) -{ - double alp, alpy, alpr; - int i, j; - - if (data_optn == 0) - { /* nucleotides */ - - if (nuc_optn) - { /* 4x4 nucleotides */ - alp = 2.0*TSparam; - alpr = (alp * 2.0) / (YRparam + 1.0); - alpy = YRparam * alpr; - - q[0][1] = 1; q[0][2] = alpr; q[0][3] = 1; - q[1][2] = 1; q[1][3] = alpy; - q[2][3] = 1; - - f[0] = 0.25; f[1] = 0.25; f[2] = 0.25; f[3] = 0.25; - } - - if (SH_optn) - { /* 16x16 nucleotides */ - - alp = 2.0*TSparam; - - q[0][1] = 1; q[0][2] = alp; q[0][3] = 1; q[0][4] = 1; - q[0][5] = 0; q[0][6] = 0; q[0][7] = 0; q[0][8] = alp; - q[0][9] = 0; q[0][10] = 0; q[0][11] = 0; q[0][12] = 1; - q[0][13] = 0; q[0][14] = 0; q[0][15] = 0; - - q[1][2] = 1; q[1][3] = alp; q[1][4] = 0; q[1][5] = 1; - q[1][6] = 0; q[1][7] = 0; q[1][8] = 0; q[1][9] = alp; - q[1][10] = 0; q[1][11] = 0; q[1][12] = 0; q[1][13] = 1; - q[1][14] = 0; q[1][15] = 0; - - q[2][3] = 1; q[2][4] = 0; q[2][5] = 0; q[2][6] = 1; - q[2][7] = 0; q[2][8] = 0; q[2][9] = 0; q[2][10] = alp; - q[2][11] = 0; q[2][12] = 0; q[2][13] = 0; q[2][14] = 1; - q[2][15] = 0; - - q[3][4] = 0; q[3][5] = 0; q[3][6] = 0; q[3][7] = 1; - q[3][8] = 0; q[3][9] = 0; q[3][10] = 0; q[3][11] = alp; - q[3][12] = 0; q[3][13] = 0; q[3][14] = 0; q[3][15] = 1; - - q[4][5] = 1; q[4][6] = alp; q[4][7] = 1; q[4][8] = 1; - q[4][9] = 0; q[4][10] = 0; q[4][11] = 0; q[4][12] = alp; - q[4][13] = 0; q[4][14] = 0; q[4][15] = 0; - - q[5][6] = 1; q[5][7] = alp; q[5][8] = 0; q[5][9] = 1; - q[5][10] = 0; q[5][11] = 0; q[5][12] = 0; q[5][13] = alp; - q[5][14] = 0; q[5][15] = 0; - - q[6][7] = 1; q[6][8] = 0; q[6][9] = 0; q[6][10] = 1; - q[6][11] = 0; q[6][12] = 0; q[6][13] = 0; q[6][14] = alp; - q[6][15] = 0; - - q[7][8] = 0; q[7][9] = 0; q[7][10] = 0; q[7][11] = 1; - q[7][12] = 0; q[7][13] = 0; q[7][14] = 0; q[7][15] = alp; - - q[8][9] = 1; q[8][10] = alp; q[8][11] = 1; q[8][12] = 1; - q[8][13] = 0; q[8][14] = 0; q[8][15] = 0; - - q[9][10] = 1; q[9][11] = alp; q[9][12] = 0; q[9][13] = 1; - q[9][14] = 0; q[9][15] = 0; - - q[10][11] = 1; q[10][12] = 0; q[10][13] = 0; q[10][14] = 1; - q[10][15] = 0; - - q[11][12] = 0; q[11][13] = 0; q[11][14] = 0; q[11][15] = 1; - - q[12][13] = 1; q[12][14] = alp; q[12][15] = 1; - - q[13][14] = 1; q[13][15] = alp; - - q[14][15] = 1; - - - for (i = 0; i < 16; i++) f[i] = 0.0625; - } - } - else if (data_optn == 1) - { /* amino acids */ - if (Dayhf_optn) /* Dayhoff model */ - { - dyhfdata(q, f); - } - else if (Jtt_optn) /* JTT model */ - { - jttdata(q, f); - } - else if (blosum62_optn) /* BLOSUM 62 model */ - { - blosum62data(q, f); - } - else if (mtrev_optn) /* mtREV model */ - { - mtrevdata(q, f); - } - else if (cprev_optn) /* cpREV model */ - { - cprev45data(q, f); - } - else if (vtmv_optn) /* VT model */ - { - vtmvdata(q, f); - } - else /* if (wag_optn) */ /* WAG model */ - { - wagdata(q, f); - } - - } - else /* two-state model */ - { - q[0][1] = 1.0; - - f[0] = 0.5; f[1] = 0.5; - } - - /* fill matrix from upper triangle */ - for (i = 0; i < tpmradix; i++) - { - q[i][i] = 0.0; - for (j = i+1; j < tpmradix; j++) - { - q[j][i] = q[i][j]; - } - } -} - -/* transform letter codes to state numbers */ -int code2int(cvector c) -{ if (data_optn == 0) { /* nucleotides */ - if (nuc_optn) { /* 4x4 */ - switch (c[0]) { - case 'A': return 0; - case 'C': return 1; - case 'G': return 2; - case 'T': return 3; - case 'U': return 3; - default : return 4; - } - } - if (SH_optn) { /* 16x16 */ - if (c[0] == 'A') { - switch (c[1]) { - case 'A': return 0; /* AA */ - case 'C': return 1; /* AC */ - case 'G': return 2; /* AG */ - case 'T': return 3; /* AT */ - case 'U': return 3; /* AT */ - default: return 16; - } - } - if (c[0] == 'C') { - switch (c[1]) { - case 'A': return 4; /* CA */ - case 'C': return 5; /* CC */ - case 'G': return 6; /* CG */ - case 'T': return 7; /* CT */ - case 'U': return 7; /* CT */ - default: return 16; - } - } - if (c[0] == 'G') { - switch (c[1]) { - case 'A': return 8; /* GA */ - case 'C': return 9; /* GC */ - case 'G': return 10; /* GG */ - case 'T': return 11; /* GT */ - case 'U': return 11; /* GT */ - default: return 16; - } - } - if (c[0] == 'T' || c[0] == 'U') { - switch (c[1]) { - case 'A': return 12; /* TA */ - case 'C': return 13; /* TC */ - case 'G': return 14; /* TG */ - case 'T': return 15; /* TT */ - case 'U': return 15; /* TT */ - default: return 16; - } - } - return 16; - } - } else if (data_optn == 1) { /* amino acids */ - switch (c[0]) { - case 'A': return 0; - case 'C': return 4; - case 'D': return 3; - case 'E': return 6; - case 'F': return 13; - case 'G': return 7; - case 'H': return 8; - case 'I': return 9; - case 'K': return 11; - case 'L': return 10; - case 'M': return 12; - case 'N': return 2; - case 'P': return 14; - case 'Q': return 5; - case 'R': return 1; - case 'S': return 15; - case 'T': return 16; - case 'V': return 19; - case 'W': return 17; - case 'Y': return 18; - default : return 20; - } - } else { /* two-state model */ - switch (c[0]) { - case '0': return 0; - case '1': return 1; - default : return 2; - } - } - return 0; -} - -/* return letter code belonging to state number */ -char *int2code(int s) -{ - if (data_optn == 0) { /* nucleotides */ - if (nuc_optn) { /* 4x4 */ - switch (s) { - case 0: return "A"; - case 1: return "C"; - case 2: return "G"; - case 3: return "T"; - default : return "?"; - } - } - if (SH_optn) { /* 16x16 */ - switch (s) { - case 0: return "AA"; - case 1: return "AC"; - case 2: return "AG"; - case 3: return "AT"; - case 4: return "CA"; - case 5: return "CC"; - case 6: return "CG"; - case 7: return "CT"; - case 8: return "GA"; - case 9: return "GC"; - case 10: return "GG"; - case 11: return "GT"; - case 12: return "TA"; - case 13: return "TC"; - case 14: return "TG"; - case 15: return "TT"; - default : return "??"; - } - } - } else if (data_optn == 1) { /* amino acids */ - switch (s) { - case 0: return "A"; - case 1: return "R"; - case 2: return "N"; - case 3: return "D"; - case 4: return "C"; - case 5: return "Q"; - case 6: return "E"; - case 7: return "G"; - case 8: return "H"; - case 9: return "I"; - case 10: return "L"; - case 11: return "K"; - case 12: return "M"; - case 13: return "F"; - case 14: return "P"; - case 15: return "S"; - case 16: return "T"; - case 17: return "W"; - case 18: return "Y"; - case 19: return "V"; - default : return "?"; - } - } else { /* two-state model */ - switch (s) { - case 0: return "0"; - case 1: return "1"; - default : return "?"; - } - } - return "?"; -} diff --git a/forester/archive/RIO/others/puzzle_mod/src/model2.c b/forester/archive/RIO/others/puzzle_mod/src/model2.c deleted file mode 100644 index 9e2197f..0000000 --- a/forester/archive/RIO/others/puzzle_mod/src/model2.c +++ /dev/null @@ -1,1125 +0,0 @@ -/* - * model2.c - * - * - * Part of TREE-PUZZLE 5.0 (June 2000) - * - * (c) 1999-2000 by Heiko A. Schmidt, Korbinian Strimmer, - * M. Vingron, and Arndt von Haeseler - * (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler - * - * All parts of the source except where indicated are distributed under - * the GNU public licence. See http://www.opensource.org for details. - */ - - -/* definitions */ -#define EXTERN extern - -/* prototypes */ -#include -#include "util.h" -#include "ml.h" - - -void jttdata(dmatrix q, double *f) -{ - /* - * JTT model for amino acid evolution - * D.T. Jones, W.R. Taylor, and J.M. Thornton - * "The rapid generation of mutation data matrices from protein sequences" - * CABIOS vol. 8 no. 3 1992 pp. 275-282 - */ - - q[0][1]=3.1628651460584e+00; q[0][2]=3.2804935927860e+00; - q[0][3]=4.8477237048666e+00; q[0][4]=3.4612244897959e+00; - q[0][5]=3.3130910900946e+00; q[0][6]=6.3199473337722e+00; - q[0][7]=1.0440154440154e+01; q[0][8]=1.3061224489796e+00; - q[0][9]=2.1726844583987e+00; q[0][10]=1.8443597219107e+00; - q[0][11]=2.2137668626773e+00; q[0][12]=2.7210884353741e+00; - q[0][13]=8.3265306122449e-01; q[0][14]=1.1537414965986e+01; - q[0][15]=2.2838213546288e+01; q[0][16]=2.7007955724663e+01; - q[0][17]=5.1311953352770e-01; q[0][18]=8.3673469387755e-01; - q[0][19]=1.7474335188621e+01; - - q[1][2]=2.6598918637222e+00; q[1][3]=9.1014867485456e-01; - q[1][4]=6.1624649859944e+00; q[1][5]=1.8036482885837e+01; - q[1][6]=1.8924731182796e+00; q[1][7]=8.1810886516769e+00; - q[1][8]=1.9119717452198e+01; q[1][9]=1.4410687351864e+00; - q[1][10]=2.2211961707760e+00; q[1][11]=3.9239234676922e+01; - q[1][12]=2.5060690943044e+00; q[1][13]=3.9439775910364e-01; - q[1][14]=4.1953094963476e+00; q[1][15]=5.9016766126741e+00; - q[1][16]=3.8437069743152e+00; q[1][17]=7.6766706682673e+00; - q[1][18]=1.4173669467787e+00; q[1][19]=1.0308123249300e+00; - - q[2][3]=3.2226935854843e+01; q[2][4]=1.8710963455150e+00; - q[2][5]=4.5351268130622e+00; q[2][6]=3.3951344979102e+00; - q[2][7]=4.5987249708180e+00; q[2][8]=2.3693774375271e+01; - q[2][9]=2.9235880398671e+00; q[2][10]=8.0960899565551e-01; - q[2][11]=1.5024269384537e+01; q[2][12]=1.9003322259136e+00; - q[2][13]=4.3853820598007e-01; q[2][14]=7.1083317047749e-01; - q[2][15]=2.9456208772690e+01; q[2][16]=1.3735908553410e+01; - q[2][17]=1.6706217370669e-01; q[2][18]=4.1661129568106e+00; - q[2][19]=9.7452934662237e-01; - - q[3][4]=6.2857142857143e-01; q[3][5]=3.0662020905923e+00; - q[3][6]=4.5450549450549e+01; q[3][7]=7.5402435402435e+00; - q[3][8]=6.0544672718586e+00; q[3][9]=6.8808114961961e-01; - q[3][10]=3.6130902064968e-01; q[3][11]=1.6718197057180e+00; - q[3][12]=1.0879120879121e+00; q[3][13]=1.9340659340659e-01; - q[3][14]=7.3949579831933e-01; q[3][15]=3.4196528109572e+00; - q[3][16]=2.4749487800335e+00; q[3][17]=3.4536891679749e-01; - q[3][18]=2.6895604395604e+00; q[3][19]=1.8608058608059e+00; - - q[4][5]=5.5191637630662e-01; q[4][6]=3.2442396313364e-01; - q[4][7]=3.3297297297297e+00; q[4][8]=4.3726708074534e+00; - q[4][9]=9.1868131868132e-01; q[4][10]=9.9466248037677e-01; - q[4][11]=2.9830508474576e-01; q[4][12]=2.4095238095238e+00; - q[4][13]=4.1485714285714e+00; q[4][14]=7.3949579831933e-01; - q[4][15]=1.2862939958592e+01; q[4][16]=2.8125907990315e+00; - q[4][17]=6.8244897959184e+00; q[4][18]=1.2885714285714e+01; - q[4][19]=3.7714285714286e+00; - - q[5][6]=2.0316061593796e+01; q[5][7]=1.3922214897825e+00; - q[5][8]=3.3861536130889e+01; q[5][9]=4.7172339855267e-01; - q[5][10]=4.2320327755868e+00; q[5][11]=1.7835941652395e+01; - q[5][12]=2.6573751451800e+00; q[5][13]=2.7595818815331e-01; - q[5][14]=9.4992143198743e+00; q[5][15]=3.2350653941322e+00; - q[5][16]=3.0973838067678e+00; q[5][17]=1.0512692882031e+00; - q[5][18]=1.5331010452962e+00; q[5][19]=1.0778164924506e+00; - - q[6][7]=6.6857641051189e+00; q[6][8]=1.4458024443999e+00; - q[6][9]=6.7068415455512e-01; q[6][10]=5.7932850559579e-01; - q[6][11]=1.0365070686558e+01; q[6][12]=1.0138248847926e+00; - q[6][13]=2.6359447004608e-01; q[6][14]=1.1291226167887e+00; - q[6][15]=1.8337006611901e+00; q[6][16]=1.9520424900414e+00; - q[6][17]=6.9519420671494e-01; q[6][18]=3.8018433179723e-01; - q[6][19]=2.7772657450077e+00; - - q[7][8]=1.2113479939567e+00; q[7][9]=3.2670032670033e-01; - q[7][10]=4.1817641817642e-01; q[7][11]=1.6354950592239e+00; - q[7][12]=7.6447876447876e-01; q[7][13]=3.0579150579151e-01; - q[7][14]=1.2391551215081e+00; q[7][15]=1.1138492529797e+01; - q[7][16]=1.8888816176952e+00; q[7][17]=3.3491450634308e+00; - q[7][18]=3.1853281853282e-01; q[7][19]=2.8416988416988e+00; - - q[8][9]=1.0931677018634e+00; q[8][10]=3.2194389461470e+00; - q[8][11]=3.1498052426571e+00; q[8][12]=1.9130434782609e+00; - q[8][13]=2.7329192546584e+00; q[8][14]=6.7304834977469e+00; - q[8][15]=4.3726708074534e+00; q[8][16]=2.8162964522581e+00; - q[8][17]=7.8083407275954e-01; q[8][18]=3.5118012422360e+01; - q[8][19]=7.2877846790890e-01; - - q[9][10]=1.4069798333535e+01; q[9][11]=1.2292791953809e+00; - q[9][12]=2.8366300366300e+01; q[9][13]=4.7384615384615e+00; - q[9][14]=5.8780435251023e-01; q[9][15]=2.4105749323141e+00; - q[9][16]=1.5243062022723e+01; q[9][17]=8.2888540031397e-01; - q[9][18]=1.8434065934066e+00; q[9][19]=5.7699633699634e+01; - - q[10][11]=8.8039805231089e-01; q[10][12]=2.2425954997384e+01; - q[10][13]=1.5099529042386e+01; q[10][14]=6.2626896912611e+00; - q[10][15]=3.4917298022888e+00; q[10][16]=1.6109411169944e+00; - q[10][17]=3.2366001345593e+00; q[10][18]=1.4505494505495e+00; - q[10][19]=1.0557823129252e+01; - - q[11][12]=3.6577885391445e+00; q[11][13]=1.4915254237288e-01; - q[11][14]=1.2868062479229e+00; q[11][15]=2.8162964522581e+00; - q[11][16]=5.7494151926786e+00; q[11][17]=5.4790729851263e-01; - q[11][18]=5.3268765133172e-01; q[11][19]=7.4899112187248e-01; - - q[12][13]=2.5666666666667e+00; q[12][14]=9.4491129785247e-01; - q[12][15]=1.6397515527950e+00; q[12][16]=1.2180790960452e+01; - q[12][17]=1.1972789115646e+00; q[12][18]=1.1130952380952e+00; - q[12][19]=1.7746031746032e+01; - - q[13][14]=8.8739495798319e-01; q[13][15]=5.6298136645963e+00; - q[13][16]=8.3099273607748e-01; q[13][17]=3.3224489795918e+00; - q[13][18]=3.3392857142857e+01; q[13][19]=3.6000000000000e+00; - - q[14][15]=1.6261762676085e+01; q[14][16]=6.8852490148602e+00; - q[14][17]=4.2256902761104e-01; q[14][18]=6.7787114845938e-01; - q[14][19]=1.2549019607843e+00; - - q[15][16]=2.7891216619293e+01; q[15][17]=1.8740017746229e+00; - q[15][18]=3.7349896480331e+00; q[15][19]=2.4182194616977e+00; - - q[16][17]=4.8702870978900e-01; q[16][18]=1.1985472154964e+00; - q[16][19]=6.7925746569814e+00; - - q[17][18]=4.6020408163265e+00; q[17][19]=1.4693877551020e+00; - - q[18][19]=1.0000000000000e+00; - - - f[0] = 0.077; f[1] = 0.051; f[2] = 0.043; f[3] = 0.052; - f[4] = 0.02; f[5] = 0.041; f[6] = 0.062; f[7] = 0.074; - f[8] = 0.023; f[9] = 0.052; f[10] = 0.091; f[11] = 0.059; - f[12] = 0.024; f[13] = 0.04; f[14] = 0.051; f[15] = 0.069; - f[16] = 0.059; f[17] = 0.014; f[18] = 0.032; f[19] = 0.066; -} - -void dyhfdata(dmatrix q, double *f) -{ - /* - * Dayhoff model for amino acid evolution - * Dayhoff, M.O., Schwartz, R.M., Orcutt, B.C. (1978) - * "A model of evolutionary change in proteins." - * Dayhoff, M.O. (ed.) Atlas of Protein Sequence Structur., Vol5, Suppl. 3, - * National Biomedical Research Foundation, Washington DC, pp. 345-352. - */ - - q[0][1]=9.6472567159749e-01; q[0][2]=3.5927991886410e+00; - q[0][3]=4.3200552414656e+00; q[0][4]=1.3184584178499e+00; - q[0][5]=3.2267534963169e+00; q[0][6]=7.0141987829615e+00; - q[0][7]=8.5773867857875e+00; q[0][8]=8.1434196396611e-01; - q[0][9]=2.3518447453539e+00; q[0][10]=1.4735711728911e+00; - q[0][11]=9.3940162271805e-01; q[0][12]=2.5490196078431e+00; - q[0][13]=6.5922920892495e-01; q[0][14]=8.9189834148670e+00; - q[0][15]=1.4540712836859e+01; q[0][16]=1.3411904595370e+01; - q[0][17]=3.8517964118027e-02; q[0][18]=8.7897227856660e-01; - q[0][19]=7.4036511156187e+00; - - q[1][2]=1.1890243902439e+00; q[1][3]=5.9525626545377e-02; - q[1][4]=8.4778922655537e-01; q[1][5]=8.8348561504191e+00; - q[1][6]=5.5954088952654e-02; q[1][7]=3.1434881434075e-01; - q[1][8]=8.4753987678285e+00; q[1][9]=2.2684090115941e+00; - q[1][10]=5.5954088952654e-01; q[1][11]=1.6681312769010e+01; - q[1][12]=3.1707317073171e+00; q[1][13]=4.8959827833572e-01; - q[1][14]=3.6754156468900e+00; q[1][15]=5.4755072760812e+00; - q[1][16]=9.6472567159749e-01; q[1][17]=7.5538020086083e+00; - q[1][18]=2.7977044476327e-01; q[1][19]=8.6083213773314e-01; - - q[2][3]=3.2459324155194e+01; q[2][4]=7.3852625416383e-02; - q[2][5]=3.7732198142415e+00; q[2][6]=5.3911764705882e+00; - q[2][7]=5.0264375413087e+00; q[2][8]=1.9061418685121e+01; - q[2][9]=2.7901430842607e+00; q[2][10]=1.2482698961938e+00; - q[2][11]=1.1542279411765e+01; q[2][12]=1.9117647058824e-01; - q[2][13]=5.0183823529412e-01; q[2][14]=1.5181660899654e+00; - q[2][15]=1.7697478991597e+01; q[2][16]=8.3557302231237e+00; - q[2][17]=8.6029411764706e-01; q[2][18]=3.4411764705882e+00; - q[2][19]=5.7352941176471e-01; - - q[3][4]=2.5534152404601e-02; q[3][5]=4.8811013767209e+00; - q[3][6]=4.0561952440551e+01; q[3][7]=4.4423506911730e+00; - q[3][8]=3.0865788117500e+00; q[3][9]=8.5749078239692e-01; - q[3][10]=2.5926985518518e-02; q[3][11]=2.5930851063830e+00; - q[3][12]=1.1667143483333e-01; q[3][13]=1.2963492759259e-02; - q[3][14]=4.7853935065891e-01; q[3][15]=3.4167709637046e+00; - q[3][16]=2.3984722282163e+00; q[3][17]=3.2408731898147e-02; - q[3][18]=8.1351689612015e-02; q[3][19]=6.3829787234043e-01; - - q[4][5]=2.1864264103535e-02; q[4][6]=1.4770525083277e-02; - q[4][7]=3.9055458751427e-01; q[4][8]=1.0223340673168e+00; - q[4][9]=1.5970515970516e+00; q[4][10]=3.9098448749850e-02; - q[4][11]=8.0776309049169e-03; q[4][12]=1.4155086538140e-01; - q[4][13]=8.6898395721925e-02; q[4][14]=6.8155604487784e-01; - q[4][15]=5.8097784568373e+00; q[4][16]=5.9929928084086e-01; - q[4][17]=3.4759358288770e-01; q[4][18]=3.4759358288770e+00; - q[4][19]=1.7647058823529e+00; - - q[5][6]=2.5476780185759e+01; q[5][7]=1.0174974779977e+00; - q[5][8]=2.1573939173192e+01; q[5][9]=6.5266504894988e-01; - q[5][10]=2.6634492806410e+00; q[5][11]=5.5466331269350e+00; - q[5][12]=4.0247678018576e+00; q[5][13]=1.8038017885416e-02; - q[5][14]=5.5044618466582e+00; q[5][15]=2.0267580716497e+00; - q[5][16]=1.9256432155439e+00; q[5][17]=9.6202762055552e-02; - q[5][18]=1.0061919504644e-01; q[5][19]=1.2538699690402e+00; - - q[6][7]=2.8869795109055e+00; q[6][8]=1.5519031141869e+00; - q[6][9]=2.1701112877583e+00; q[6][10]=4.0484429065744e-01; - q[6][11]=2.9823529411765e+00; q[6][12]=1.0705882352941e+00; - q[6][13]=1.9801735189768e-02; q[6][14]=1.7993079584775e+00; - q[6][15]=2.8184873949580e+00; q[6][16]=1.2261663286004e+00; - q[6][17]=7.3114099162219e-02; q[6][18]=7.6470588235294e-01; - q[6][19]=1.3058823529412e+00; - - q[7][8]=3.7906768788150e-01; q[7][9]=2.3128004846840e-02; - q[7][10]=2.5776602775942e-01; q[7][11]=9.6662260409782e-01; - q[7][12]=6.0145406477198e-01; q[7][13]=5.4775280898876e-01; - q[7][14]=1.2382877804129e+00; q[7][15]=8.2853366065527e+00; - q[7][16]=1.1110604644803e+00; q[7][17]=1.2888301387971e-01; - q[7][18]=1.7114723586662e-02; q[7][19]=1.9233311302049e+00; - - q[8][9]=2.7354343963341e-01; q[8][10]=1.5876246692449e+00; - q[8][11]=9.6993944636678e-01; q[8][12]=1.2544085640577e-01; - q[8][13]=1.6868512110727e+00; q[8][14]=3.3075513942601e+00; - q[8][15]=1.2530894710826e+00; q[8][16]=8.1434196396611e-01; - q[8][17]=1.0121107266436e+00; q[8][18]=4.4982698961938e+00; - q[8][19]=1.5570934256055e+00; - - q[9][10]=9.2275320303002e+00; q[9][11]=1.6663354531002e+00; - q[9][12]=1.1780604133545e+01; q[9][13]=6.9753577106518e+00; - q[9][14]=4.2551201720752e-01; q[9][15]=8.8575970928912e-01; - q[9][16]=6.8951811852420e+00; q[9][17]=9.8802836705702e-02; - q[9][18]=1.3434022257552e+00; q[9][19]=3.1526232114467e+01; - - q[10][11]=6.5787197231834e-01; q[10][12]=1.8622837370242e+01; - q[10][13]=5.6340830449827e+00; q[10][14]=1.1377976796255e+00; - q[10][15]=6.1690558576372e-01; q[10][16]=1.2098794893211e+00; - q[10][17]=1.7543252595156e+00; q[10][18]=1.0346020761246e+00; - q[10][19]=6.2906574394464e+00; - - q[11][12]=8.6029411764706e+00; q[11][13]=6.6640454965565e-03; - q[11][14]=1.2089100346021e+00; q[11][15]=3.4411764705882e+00; - q[11][16]=4.9442190669371e+00; q[11][17]=3.4272233982290e-02; - q[11][18]=4.7794117647059e-01; q[11][19]=3.7500000000000e-01; - - q[12][13]=3.2500000000000e+00; q[12][14]=5.9976931949250e-01; - q[12][15]=2.1848739495798e+00; q[12][16]=3.6916835699797e+00; - q[12][17]=1.6247577591604e-01; q[12][18]=1.1508700794053e-01; - q[12][19]=9.0588235294118e+00; - - q[13][14]=3.9359861591695e-01; q[13][15]=1.6386554621849e+00; - q[13][16]=4.9442190669371e-01; q[13][17]=2.8676470588235e+00; - q[13][18]=2.4852941176471e+01; q[13][19]=4.4117647058824e-01; - - q[14][15]=8.6431043005437e+00; q[14][16]=2.8308077795013e+00; - q[14][17]=3.5840244687362e-02; q[14][18]=4.3804743506776e-02; - q[14][19]=1.7301038062284e+00; - - q[15][16]=1.9663865546218e+01; q[15][17]=2.7857142857143e+00; - q[15][18]=1.2016806722689e+00; q[15][19]=1.0840336134454e+00; - - q[16][17]=4.2019597219666e-02; q[16][18]=1.5162271805274e+00; - q[16][19]=5.6592292089249e+00; - - q[17][18]=2.2941176470588e+00; q[17][19]=1.2654363316538e-01; - - q[18][19]=1.0000000000000e+00; - - - f[0] = 0.087; f[1] = 0.041; f[2] = 0.040; f[3] = 0.047; - f[4] = 0.033; f[5] = 0.038; f[6] = 0.05; f[7] = 0.089; - f[8] = 0.034; f[9] = 0.037; f[10] = 0.085; f[11] = 0.08; - f[12] = 0.015; f[13] = 0.04; f[14] = 0.051; f[15] = 0.07; - f[16] = 0.058; f[17] = 0.01; f[18] = 0.03; f[19] = 0.065; -} - -void mtrevdata(dmatrix q, double *f) -{ - /* - * mtREV24 model of amino acid evolution - * (complete sequence data of mtDNA from 24 vertebrate species) - * Adachi, J. and Hasegawa, M. (1996) - */ - - q[0][1]=1.2199217606346e+01; q[0][2]=1.4182139942122e+01; - q[0][3]=9.2985091873208e+00; q[0][4]=3.1542792981957e+01; - q[0][5]=1.0025852846688e+00; q[0][6]=5.1418866803338e+00; - q[0][7]=6.3531246495131e+01; q[0][8]=7.3137132861715e+00; - q[0][9]=5.0782382656186e+01; q[0][10]=1.3399741808481e+01; - q[0][11]=4.4021672780560e+00; q[0][12]=7.4673480520104e+01; - q[0][13]=3.3513021631978e+00; q[0][14]=2.8582502221773e+01; - q[0][15]=2.0413623195312e+02; q[0][16]=2.5301305153906e+02; - q[0][17]=1.0000000000000e+00; q[0][18]=3.4084158197615e+00; - q[0][19]=1.0266468401249e+02; - - q[1][2]=6.9661274444534e+00; q[1][3]=1.0000000000000e+00; - q[1][4]=5.4384584796568e+01; q[1][5]=1.1631134513343e+02; - q[1][6]=1.0000000000000e+00; q[1][7]=1.2122831341194e+01; - q[1][8]=8.6961067087353e+01; q[1][9]=1.0000000000000e+00; - q[1][10]=8.1976829394538e+00; q[1][11]=7.4423215395318e+01; - q[1][12]=1.0000000000000e+00; q[1][13]=2.4659158338099e+00; - q[1][14]=1.2439947713615e+01; q[1][15]=3.1791814866372e+00; - q[1][16]=1.0935327216119e+00; q[1][17]=1.1550775790126e+01; - q[1][18]=1.0000000000000e+00; q[1][19]=4.0211417480338e+00; - - q[2][3]=4.1809325468160e+02; q[2][4]=3.1020979842967e+01; - q[2][5]=9.1349622725361e+01; q[2][6]=3.3185663516310e+01; - q[2][7]=2.8052324651124e+01; q[2][8]=2.6112087577885e+02; - q[2][9]=1.4261453863336e+01; q[2][10]=7.9775653461977e+00; - q[2][11]=3.2036829276162e+02; q[2][12]=3.4424354918739e+01; - q[2][13]=7.9996445145608e+00; q[2][14]=3.8586541461044e+01; - q[2][15]=2.6020426225852e+02; q[2][16]=1.2550758780474e+02; - q[2][17]=5.6207759736659e+00; q[2][18]=1.0071406219571e+02; - q[2][19]=1.0000000000000e+00; - - q[3][4]=1.0000000000000e+00; q[3][5]=2.9097352675564e+01; - q[3][6]=3.0713149855302e+02; q[3][7]=2.9877072751897e+01; - q[3][8]=5.9995408885817e+01; q[3][9]=2.2827096245105e+00; - q[3][10]=1.0000000000000e+00; q[3][11]=1.2183938185384e+00; - q[3][12]=1.0000000000000e+00; q[3][13]=2.6221929413096e+00; - q[3][14]=7.0708004204733e+00; q[3][15]=3.6327934317139e+01; - q[3][16]=1.4743408713748e+01; q[3][17]=1.0453246057102e+01; - q[3][18]=1.1165627147496e+01; q[3][19]=1.0000000000000e+00; - - q[4][5]=3.9599394038972e+01; q[4][6]=1.0000000000000e+00; - q[4][7]=1.6163581056674e+01; q[4][8]=7.4467985406234e+01; - q[4][9]=3.3018175376623e+01; q[4][10]=1.3500725995091e+01; - q[4][11]=1.0000000000000e+00; q[4][12]=3.2504095376923e+00; - q[4][13]=3.7264767083096e+01; q[4][14]=1.6454136037822e+01; - q[4][15]=1.4581783243113e+02; q[4][16]=9.4720031458442e+01; - q[4][17]=1.7684087896962e+01; q[4][18]=1.3409157685926e+02; - q[4][19]=1.0000000000000e+00; - - q[5][6]=1.6503249008836e+02; q[5][7]=3.5530760735494e+00; - q[5][8]=3.0652523140859e+02; q[5][9]=4.3905393139325e+00; - q[5][10]=2.0895470525345e+01; q[5][11]=2.4504076430724e+02; - q[5][12]=2.4931300477797e+01; q[5][13]=1.0059428264289e+01; - q[5][14]=7.2256314165467e+01; q[5][15]=2.8480937892158e+01; - q[5][16]=4.9962974409828e+01; q[5][17]=1.0000000000000e+00; - q[5][18]=2.0430790980529e+01; q[5][19]=9.9986289000676e+00; - - q[6][7]=1.4884496769963e+01; q[6][8]=2.5853576435567e+01; - q[6][9]=1.7418201388328e+00; q[6][10]=1.0000000000000e+00; - q[6][11]=1.6519126809071e+02; q[6][12]=1.0000000000000e+00; - q[6][13]=1.4067850525292e+00; q[6][14]=6.7547121641947e+00; - q[6][15]=2.8794794140840e+01; q[6][16]=7.8001372062558e+00; - q[6][17]=1.0000000000000e+00; q[6][18]=6.9067239183061e+00; - q[6][19]=1.1127702362585e+01; - - q[7][8]=1.0000000000000e+00; q[7][9]=3.1466649021550e+00; - q[7][10]=1.2699794194865e+00; q[7][11]=1.1962111069278e+01; - q[7][12]=1.0000000000000e+00; q[7][13]=1.0000000000000e+00; - q[7][14]=1.0000000000000e+00; q[7][15]=6.6277950574411e+01; - q[7][16]=5.8800079133028e+00; q[7][17]=5.7494182626674e+00; - q[7][18]=1.6887657206208e+00; q[7][19]=1.3320553471351e+00; - - q[8][9]=6.4536986087271e+00; q[8][10]=6.0472584534958e+00; - q[8][11]=6.7197196398961e+01; q[8][12]=6.2977633277779e+00; - q[8][13]=2.5347805183364e+01; q[8][14]=3.2089868698728e+01; - q[8][15]=4.0766987134407e+01; q[8][16]=2.3570850628539e+01; - q[8][17]=3.7286635325194e+00; q[8][18]=3.5270764890474e+02; - q[8][19]=1.0000000000000e+00; - - q[9][10]=1.7320653206333e+02; q[9][11]=1.0298655619743e+01; - q[9][12]=2.7262244199514e+02; q[9][13]=4.4561065036310e+01; - q[9][14]=1.0856482766156e+01; q[9][15]=2.5107659603898e+01; - q[9][16]=1.9391167162525e+02; q[9][17]=1.0000000000000e+00; - q[9][18]=1.3161329199391e+01; q[9][19]=6.4365086389428e+02; - - q[10][11]=7.8314019154706e+00; q[10][12]=2.8290920517725e+02; - q[10][13]=1.1371735519833e+02; q[10][14]=2.1105885757279e+01; - q[10][15]=3.8741359395934e+01; q[10][16]=6.6524559321657e+01; - q[10][17]=1.7071378554833e+01; q[10][18]=2.3234516108847e+01; - q[10][19]=4.8247261078055e+01; - - q[11][12]=4.8092094826036e+01; q[11][13]=3.3887559483420e+00; - q[11][14]=2.6368577564199e+01; q[11][15]=5.5679895711418e+01; - q[11][16]=7.1750284708933e+01; q[11][17]=1.2631893872825e+01; - q[11][18]=2.6932728996777e+01; q[11][19]=1.0000000000000e+00; - - q[12][13]=4.7798798034572e+01; q[12][14]=9.9165053447429e+00; - q[12][15]=5.8505442466161e+01; q[12][16]=2.7798190504760e+02; - q[12][17]=1.1427000119701e+01; q[12][18]=2.1029990530586e+01; - q[12][19]=2.0397078683768e+02; - - q[13][14]=9.1089574817139e+00; q[13][15]=3.3835737720574e+01; - q[13][16]=1.7815549567056e+01; q[13][17]=4.1272404968214e+00; - q[13][18]=2.4504156395152e+02; q[13][19]=3.3435675442163e+00; - - q[14][15]=8.9421193040709e+01; q[14][16]=6.7485067008375e+01; - q[14][17]=2.2161693733113e+00; q[14][18]=8.5338209390745e+00; - q[14][19]=4.3342126659660e+00; - - q[15][16]=3.1432036618746e+02; q[15][17]=2.0305343047059e+01; - q[15][18]=3.4167877957799e+01; q[15][19]=1.0000000000000e+00; - - q[16][17]=5.2559565123081e+00; q[16][18]=2.0382362288681e+01; - q[16][19]=1.0765527137500e+02; - - q[17][18]=1.3814733274637e+01; q[17][19]=2.8259139240676e+00; - - q[18][19]=1.0000000000000e+00; - - - /* amino acid frequencies */ - f[0]=0.072; f[1]=0.019; f[2]=0.039; f[3]=0.019; f[4]=0.006; - f[5]=0.025; f[6]=0.024; f[7]=0.056; f[8]=0.028; f[9]=0.088; - f[10]=0.168; f[11]=0.023; f[12]=0.054; f[13]=0.061; f[14]=0.054; - f[15]=0.072; f[16]=0.086; f[17]=0.029; f[18]=0.033; f[19]=0.043; -} - -void blosum62data(dmatrix q, double *f) -{ - /* - * BLOSUM62 model of amino acid evolution - * - * S. Henikoff and J. G. Henikoff. 1992. PNAS USA 89:10915-10919. - * - */ - - q[0][1]=7.3579038969751e-01; q[0][2]=4.8539105546575e-01; - q[0][3]=5.4316182089867e-01; q[0][4]=1.4599953104700e+00; - q[0][5]=1.1997057046020e+00; q[0][6]=1.1709490427999e+00; - q[0][7]=1.9558835749595e+00; q[0][8]=7.1624144499779e-01; - q[0][9]=6.0589900368677e-01; q[0][10]=8.0001653051838e-01; - q[0][11]=1.2952012667833e+00; q[0][12]=1.2537582666635e+00; - q[0][13]=4.9296467974759e-01; q[0][14]=1.1732759009239e+00; - q[0][15]=4.3250926870566e+00; q[0][16]=1.7291780194850e+00; - q[0][17]=4.6583936772479e-01; q[0][18]=7.1820669758623e-01; - q[0][19]=2.1877745220045e+00; - - q[1][2]=1.2974467051337e+00; q[1][3]=5.0096440855513e-01; - q[1][4]=2.2782657420895e-01; q[1][5]=3.0208336100636e+00; - q[1][6]=1.3605741904203e+00; q[1][7]=4.1876330851753e-01; - q[1][8]=1.4561411663360e+00; q[1][9]=2.3203644514174e-01; - q[1][10]=6.2271166969249e-01; q[1][11]=5.4111151414889e+00; - q[1][12]=9.8369298745695e-01; q[1][13]=3.7164469320875e-01; - q[1][14]=4.4813366171831e-01; q[1][15]=1.1227831042096e+00; - q[1][16]=9.1466595456337e-01; q[1][17]=4.2638231012175e-01; - q[1][18]=7.2051744121611e-01; q[1][19]=4.3838834377202e-01; - - q[2][3]=3.1801000482161e+00; q[2][4]=3.9735894989702e-01; - q[2][5]=1.8392161469920e+00; q[2][6]=1.2404885086396e+00; - q[2][7]=1.3558723444845e+00; q[2][8]=2.4145014342081e+00; - q[2][9]=2.8301732627800e-01; q[2][10]=2.1188815961519e-01; - q[2][11]=1.5931370434574e+00; q[2][12]=6.4844127878707e-01; - q[2][13]=3.5486124922252e-01; q[2][14]=4.9488704370192e-01; - q[2][15]=2.9041016564560e+00; q[2][16]=1.8981736345332e+00; - q[2][17]=1.9148204624678e-01; q[2][18]=5.3822251903674e-01; - q[2][19]=3.1285879799342e-01; - - q[3][4]=2.4083661480204e-01; q[3][5]=1.1909457033960e+00; - q[3][6]=3.7616252083685e+00; q[3][7]=7.9847324896839e-01; - q[3][8]=7.7814266402188e-01; q[3][9]=4.1855573246161e-01; - q[3][10]=2.1813157759360e-01; q[3][11]=1.0324479249521e+00; - q[3][12]=2.2262189795786e-01; q[3][13]=2.8173069420651e-01; - q[3][14]=7.3062827299842e-01; q[3][15]=1.5827541420653e+00; - q[3][16]=9.3418750943056e-01; q[3][17]=1.4534504627853e-01; - q[3][18]=2.6142220896504e-01; q[3][19]=2.5812928941763e-01; - - q[4][5]=3.2980150463028e-01; q[4][6]=1.4074889181440e-01; - q[4][7]=4.1820319228376e-01; q[4][8]=3.5405810983129e-01; - q[4][9]=7.7489402279418e-01; q[4][10]=8.3184264014158e-01; - q[4][11]=2.8507880090648e-01; q[4][12]=7.6768882347954e-01; - q[4][13]=4.4133747118660e-01; q[4][14]=3.5600849876863e-01; - q[4][15]=1.1971884150942e+00; q[4][16]=1.1198313585160e+00; - q[4][17]=5.2766441887169e-01; q[4][18]=4.7023773369610e-01; - q[4][19]=1.1163524786062e+00; - - q[5][6]=5.5289191779282e+00; q[5][7]=6.0984630538281e-01; - q[5][8]=2.4353411311401e+00; q[5][9]=2.3620245120365e-01; - q[5][10]=5.8073709318144e-01; q[5][11]=3.9452776745146e+00; - q[5][12]=2.4948960771127e+00; q[5][13]=1.4435695975031e-01; - q[5][14]=8.5857057567418e-01; q[5][15]=1.9348709245965e+00; - q[5][16]=1.2774802945956e+00; q[5][17]=7.5865380864172e-01; - q[5][18]=9.5898974285014e-01; q[5][19]=5.3078579012486e-01; - - q[6][7]=4.2357999217628e-01; q[6][8]=1.6268910569817e+00; - q[6][9]=1.8684804693170e-01; q[6][10]=3.7262517508685e-01; - q[6][11]=2.8024271516787e+00; q[6][12]=5.5541539747043e-01; - q[6][13]=2.9140908416530e-01; q[6][14]=9.2656393484598e-01; - q[6][15]=1.7698932389373e+00; q[6][16]=1.0710972360073e+00; - q[6][17]=4.0763564893830e-01; q[6][18]=5.9671930034577e-01; - q[6][19]=5.2425384633796e-01; - - q[7][8]=5.3985912495418e-01; q[7][9]=1.8929629237636e-01; - q[7][10]=2.1772115923623e-01; q[7][11]=7.5204244030271e-01; - q[7][12]=4.5943617357855e-01; q[7][13]=3.6816646445253e-01; - q[7][14]=5.0408659952683e-01; q[7][15]=1.5093262532236e+00; - q[7][16]=6.4143601140497e-01; q[7][17]=5.0835892463812e-01; - q[7][18]=3.0805573703500e-01; q[7][19]=2.5334079019018e-01; - - q[8][9]=2.5271844788492e-01; q[8][10]=3.4807220979697e-01; - q[8][11]=1.0225070358890e+00; q[8][12]=9.8431152535870e-01; - q[8][13]=7.1453370392764e-01; q[8][14]=5.2700733915060e-01; - q[8][15]=1.1170297629105e+00; q[8][16]=5.8540709022472e-01; - q[8][17]=3.0124860078016e-01; q[8][18]=4.2189539693890e+00; - q[8][19]=2.0155597175031e-01; - - q[9][10]=3.8909637733035e+00; q[9][11]=4.0619358664202e-01; - q[9][12]=3.3647977631042e+00; q[9][13]=1.5173593259539e+00; - q[9][14]=3.8835540920564e-01; q[9][15]=3.5754441245967e-01; - q[9][16]=1.1790911972601e+00; q[9][17]=3.4198578754023e-01; - q[9][18]=6.7461709322842e-01; q[9][19]=8.3118394054582e+00; - - q[10][11]=4.4557027426059e-01; q[10][12]=6.0305593795716e+00; - q[10][13]=2.0648397032375e+00; q[10][14]=3.7455568747097e-01; - q[10][15]=3.5296918452729e-01; q[10][16]=9.1525985769421e-01; - q[10][17]=6.9147463459998e-01; q[10][18]=8.1124585632307e-01; - q[10][19]=2.2314056889131e+00; - - q[11][12]=1.0730611843319e+00; q[11][13]=2.6692475051102e-01; - q[11][14]=1.0473834507215e+00; q[11][15]=1.7521659178195e+00; - q[11][16]=1.3038752007987e+00; q[11][17]=3.3224304063396e-01; - q[11][18]=7.1799348690032e-01; q[11][19]=4.9813847530407e-01; - - q[12][13]=1.7738551688305e+00; q[12][14]=4.5412362510273e-01; - q[12][15]=9.1872341574605e-01; q[12][16]=1.4885480537218e+00; - q[12][17]=8.8810109815193e-01; q[12][18]=9.5168216224591e-01; - q[12][19]=2.5758507553153e+00; - - q[13][14]=2.3359790962888e-01; q[13][15]=5.4002764482413e-01; - q[13][16]=4.8820611879305e-01; q[13][17]=2.0743248934965e+00; - q[13][18]=6.7472604308008e+00; q[13][19]=8.3811961017754e-01; - - q[14][15]=1.1691295777157e+00; q[14][16]=1.0054516831488e+00; - q[14][17]=2.5221483002727e-01; q[14][18]=3.6940531935451e-01; - q[14][19]=4.9690841067567e-01; - - q[15][16]=5.1515562922704e+00; q[15][17]=3.8792562209837e-01; - q[15][18]=7.9675152076106e-01; q[15][19]=5.6192545744165e-01; - - q[16][17]=5.1312812689059e-01; q[16][18]=8.0101024319939e-01; - q[16][19]=2.2530740511763e+00; - - q[17][18]=4.0544190065580e+00; q[17][19]=2.6650873142646e-01; - - q[18][19]=1.0000000000000e+00; - - - f[0]=0.074; f[1]=0.052; f[2]=0.045; f[3]=0.054; - f[4]=0.025; f[5]=0.034; f[6]=0.054; f[7]=0.074; - f[8]=0.026; f[9]=0.068; f[10]=0.099; f[11]=0.058; - f[12]=0.025; f[13]=0.047; f[14]=0.039; f[15]=0.057; - f[16]=0.051; f[17]=0.013; f[18]=0.032; f[19]=0.073; -} - - - -void vtmvdata(dmatrix q, double *f) -{ - /* - * variable time (VT) model for amino acid evolution - * Mueller, T. and Vingron, M. (1999) - * "Modeling Amino Acid Replacement" - * Journal of Comp. Biology - */ - -/* amino acid frequencies */ - -f[0]=0.078837 ; -f[1]=0.051238 ; -f[2]=0.042313 ; -f[3]=0.053066 ; -f[4]=0.015175 ; -f[5]=0.036713 ; -f[6]=0.061924 ; -f[7]=0.070852 ; -f[8]=0.023082 ; -f[9]=0.062056 ; -f[10]=0.096371 ; -f[11]=0.057324 ; -f[12]=0.023771 ; -f[13]=0.043296 ; -f[14]=0.043911 ; -f[15]=0.063403 ; -f[16]=0.055897 ; -f[17]=0.013272 ; -f[18]=0.034399 ; -f[19]=0.073101 ; - - -q[0][1] = 0.233108 ; -q[0][2] = 0.199097 ; -q[0][3] = 0.265145 ; -q[0][4] = 0.227333 ; -q[0][5] = 0.310084 ; -q[0][6] = 0.567957 ; -q[0][7] = 0.876213 ; -q[0][8] = 0.078692 ; -q[0][9] = 0.222972 ; -q[0][10] = 0.424630 ; -q[0][11] = 0.393245 ; -q[0][12] = 0.211550 ; -q[0][13] = 0.116646 ; -q[0][14] = 0.399143 ; -q[0][15] = 1.817198 ; -q[0][16] = 0.877877 ; -q[0][17] = 0.030309 ; -q[0][18] = 0.087061 ; -q[0][19] = 1.230985 ; - -q[1][2] = 0.210797 ; -q[1][3] = 0.105191 ; -q[1][4] = 0.031726 ; -q[1][5] = 0.493763 ; -q[1][6] = 0.255240 ; -q[1][7] = 0.156945 ; -q[1][8] = 0.213164 ; -q[1][9] = 0.081510 ; -q[1][10] = 0.192364 ; -q[1][11] = 1.755838 ; -q[1][12] = 0.087930 ; -q[1][13] = 0.042569 ; -q[1][14] = 0.128480 ; -q[1][15] = 0.292327 ; -q[1][16] = 0.204109 ; -q[1][17] = 0.046417 ; -q[1][18] = 0.097010 ; -q[1][19] = 0.113146 ; - -q[2][3] = 0.883422 ; -q[2][4] = 0.027495 ; -q[2][5] = 0.275700 ; -q[2][6] = 0.270417 ; -q[2][7] = 0.362028 ; -q[2][8] = 0.290006 ; -q[2][9] = 0.087225 ; -q[2][10] = 0.069245 ; -q[2][11] = 0.503060 ; -q[2][12] = 0.057420 ; -q[2][13] = 0.039769 ; -q[2][14] = 0.083956 ; -q[2][15] = 0.847049 ; -q[2][16] = 0.471268 ; -q[2][17] = 0.010459 ; -q[2][18] = 0.093268 ; -q[2][19] = 0.049824 ; - -q[3][4] = 0.010313 ; -q[3][5] = 0.205842 ; -q[3][6] = 1.599461 ; -q[3][7] = 0.311718 ; -q[3][8] = 0.134252 ; -q[3][9] = 0.011720 ; -q[3][10] = 0.060863 ; -q[3][11] = 0.261101 ; -q[3][12] = 0.012182 ; -q[3][13] = 0.016577 ; -q[3][14] = 0.160063 ; -q[3][15] = 0.461519 ; -q[3][16] = 0.178197 ; -q[3][17] = 0.011393 ; -q[3][18] = 0.051664 ; -q[3][19] = 0.048769 ; - -q[4][5] = 0.004315 ; -q[4][6] = 0.005321 ; -q[4][7] = 0.050876 ; -q[4][8] = 0.016695 ; -q[4][9] = 0.046398 ; -q[4][10] = 0.091709 ; -q[4][11] = 0.004067 ; -q[4][12] = 0.023690 ; -q[4][13] = 0.051127 ; -q[4][14] = 0.011137 ; -q[4][15] = 0.175270 ; -q[4][16] = 0.079511 ; -q[4][17] = 0.007732 ; -q[4][18] = 0.042823 ; -q[4][19] = 0.163831 ; - -q[5][6] = 0.960976 ; -q[5][7] = 0.128660 ; -q[5][8] = 0.315521 ; -q[5][9] = 0.054602 ; -q[5][10] = 0.243530 ; -q[5][11] = 0.738208 ; -q[5][12] = 0.120801 ; -q[5][13] = 0.026235 ; -q[5][14] = 0.156570 ; -q[5][15] = 0.358017 ; -q[5][16] = 0.248992 ; -q[5][17] = 0.021248 ; -q[5][18] = 0.062544 ; -q[5][19] = 0.112027 ; - -q[6][7] = 0.250447 ; -q[6][8] = 0.104458 ; -q[6][9] = 0.046589 ; -q[6][10] = 0.151924 ; -q[6][11] = 0.888630 ; -q[6][12] = 0.058643 ; -q[6][13] = 0.028168 ; -q[6][14] = 0.205134 ; -q[6][15] = 0.406035 ; -q[6][16] = 0.321028 ; -q[6][17] = 0.018844 ; -q[6][18] = 0.055200 ; -q[6][19] = 0.205868 ; - -q[7][8] = 0.058131 ; -q[7][9] = 0.051089 ; -q[7][10] = 0.087056 ; -q[7][11] = 0.193243 ; -q[7][12] = 0.046560 ; -q[7][13] = 0.050143 ; -q[7][14] = 0.124492 ; -q[7][15] = 0.612843 ; -q[7][16] = 0.136266 ; -q[7][17] = 0.023990 ; -q[7][18] = 0.037568 ; -q[7][19] = 0.082579 ; - -q[8][9] = 0.020039 ; -q[8][10] = 0.103552 ; -q[8][11] = 0.153323 ; -q[8][12] = 0.021157 ; -q[8][13] = 0.079807 ; -q[8][14] = 0.078892 ; -q[8][15] = 0.167406 ; -q[8][16] = 0.101117 ; -q[8][17] = 0.020009 ; -q[8][18] = 0.286027 ; -q[8][19] = 0.068575 ; - -q[9][10] = 2.089890 ; -q[9][11] = 0.093181 ; -q[9][12] = 0.493845 ; -q[9][13] = 0.321020 ; -q[9][14] = 0.054797 ; -q[9][15] = 0.081567 ; -q[9][16] = 0.376588 ; -q[9][17] = 0.034954 ; -q[9][18] = 0.086237 ; -q[9][19] = 3.654430 ; - -q[10][11] = 0.201204 ; -q[10][12] = 1.105667 ; -q[10][13] = 0.946499 ; -q[10][14] = 0.169784 ; -q[10][15] = 0.214977 ; -q[10][16] = 0.243227 ; -q[10][17] = 0.083439 ; -q[10][18] = 0.189842 ; -q[10][19] = 1.337571 ; - -q[11][12] = 0.096474 ; -q[11][13] = 0.038261 ; -q[11][14] = 0.212302 ; -q[11][15] = 0.400072 ; -q[11][16] = 0.446646 ; -q[11][17] = 0.023321 ; -q[11][18] = 0.068689 ; -q[11][19] = 0.144587 ; - -q[12][13] = 0.173052 ; -q[12][14] = 0.010363 ; -q[12][15] = 0.090515 ; -q[12][16] = 0.184609 ; -q[12][17] = 0.022019 ; -q[12][18] = 0.073223 ; -q[12][19] = 0.307309 ; - -q[13][14] = 0.042564 ; -q[13][15] = 0.138119 ; -q[13][16] = 0.085870 ; -q[13][17] = 0.128050 ; -q[13][18] = 0.898663 ; -q[13][19] = 0.247329 ; - -q[14][15] = 0.430431 ; -q[14][16] = 0.207143 ; -q[14][17] = 0.014584 ; -q[14][18] = 0.032043 ; -q[14][19] = 0.129315 ; - -q[15][16] = 1.767766 ; -q[15][17] = 0.035933 ; -q[15][18] = 0.121979 ; -q[15][19] = 0.127700 ; - -q[16][17] = 0.020437 ; -q[16][18] = 0.094617 ; -q[16][19] = 0.740372 ; - -q[17][18] = 0.124746 ; -q[17][19] = 0.022134 ; - -q[18][19] = 0.125733 ; - -} - - -/* - * WAG matrix: Simon Whelan and Nick Goldman - * - */ - -void wagdata(dmatrix q, double *f) -{ - /* - * WAG model of amino acid evolution - * - * S. Whelan and N. Goldman. 2000. In prep. - * - * presented at the MASAMB-X workshop in Cambridge - * - * Whelan, S., and N. Goldman. 2000. - * The WAG amino acid rate matrix. - * Manuscript in prep. - */ - - /* Q matrix */ - q[0][1] = 0.610810; q[0][2] = 0.569079; - q[0][3] = 0.821500; q[0][4] = 1.141050; - q[0][5] = 1.011980; q[0][6] = 1.756410; - q[0][7] = 1.572160; q[0][8] = 0.354813; - q[0][9] = 0.219023; q[0][10] = 0.443935; - q[0][11] = 1.005440; q[0][12] = 0.989475; - q[0][13] = 0.233492; q[0][14] = 1.594890; - q[0][15] = 3.733380; q[0][16] = 2.349220; - q[0][17] = 0.125227; q[0][18] = 0.268987; - q[0][19] = 2.221870; - - q[1][2] = 0.711690; q[1][3] = 0.165074; - q[1][4] = 0.585809; q[1][5] = 3.360330; - q[1][6] = 0.488649; q[1][7] = 0.650469; - q[1][8] = 2.362040; q[1][9] = 0.206722; - q[1][10] = 0.551450; q[1][11] = 5.925170; - q[1][12] = 0.758446; q[1][13] = 0.116821; - q[1][14] = 0.753467; q[1][15] = 1.357640; - q[1][16] = 0.613776; q[1][17] = 1.294610; - q[1][18] = 0.423612; q[1][19] = 0.280336; - - q[2][3] = 6.013660; q[2][4] = 0.296524; - q[2][5] = 1.716740; q[2][6] = 1.056790; - q[2][7] = 1.253910; q[2][8] = 4.378930; - q[2][9] = 0.615636; q[2][10] = 0.147156; - q[2][11] = 3.334390; q[2][12] = 0.224747; - q[2][13] = 0.110793; q[2][14] = 0.217538; - q[2][15] = 4.394450; q[2][16] = 2.257930; - q[2][17] = 0.078463; q[2][18] = 1.208560; - q[2][19] = 0.221176; - - q[3][4] = 0.033379; q[3][5] = 0.691268; - q[3][6] = 6.833400; q[3][7] = 0.961142; - q[3][8] = 1.032910; q[3][9] = 0.043523; - q[3][10] = 0.093930; q[3][11] = 0.533362; - q[3][12] = 0.116813; q[3][13] = 0.052004; - q[3][14] = 0.472601; q[3][15] = 1.192810; - q[3][16] = 0.417372; q[3][17] = 0.146348; - q[3][18] = 0.363243; q[3][19] = 0.169417; - - q[4][5] = 0.109261; q[4][6] = 0.023920; - q[4][7] = 0.341086; q[4][8] = 0.275403; - q[4][9] = 0.189890; q[4][10] = 0.428414; - q[4][11] = 0.083649; q[4][12] = 0.437393; - q[4][13] = 0.441300; q[4][14] = 0.122303; - q[4][15] = 1.560590; q[4][16] = 0.570186; - q[4][17] = 0.795736; q[4][18] = 0.604634; - q[4][19] = 1.114570; - - q[5][6] = 6.048790; q[5][7] = 0.366510; - q[5][8] = 4.749460; q[5][9] = 0.131046; - q[5][10] = 0.964886; q[5][11] = 4.308310; - q[5][12] = 1.705070; q[5][13] = 0.110744; - q[5][14] = 1.036370; q[5][15] = 1.141210; - q[5][16] = 0.954144; q[5][17] = 0.243615; - q[5][18] = 0.252457; q[5][19] = 0.333890; - - q[6][7] = 0.630832; q[6][8] = 0.635025; - q[6][9] = 0.141320; q[6][10] = 0.172579; - q[6][11] = 2.867580; q[6][12] = 0.353912; - q[6][13] = 0.092310; q[6][14] = 0.755791; - q[6][15] = 0.782467; q[6][16] = 0.914814; - q[6][17] = 0.172682; q[6][18] = 0.217549; - q[6][19] = 0.655045; - - q[7][8] = 0.276379; q[7][9] = 0.034151; - q[7][10] = 0.068651; q[7][11] = 0.415992; - q[7][12] = 0.194220; q[7][13] = 0.055288; - q[7][14] = 0.273149; q[7][15] = 1.486700; - q[7][16] = 0.251477; q[7][17] = 0.374321; - q[7][18] = 0.114187; q[7][19] = 0.209108; - - q[8][9] = 0.152215; q[8][10] = 0.555096; - q[8][11] = 0.992083; q[8][12] = 0.450867; - q[8][13] = 0.756080; q[8][14] = 0.771387; - q[8][15] = 0.822459; q[8][16] = 0.525511; - q[8][17] = 0.289998; q[8][18] = 4.290350; - q[8][19] = 0.131869; - - q[9][10] = 3.517820; q[9][11] = 0.360574; - q[9][12] = 4.714220; q[9][13] = 1.177640; - q[9][14] = 0.111502; q[9][15] = 0.353443; - q[9][16] = 1.615050; q[9][17] = 0.234326; - q[9][18] = 0.468951; q[9][19] = 8.659740; - - q[10][11] = 0.287583; q[10][12] = 5.375250; - q[10][13] = 2.348200; q[10][14] = 0.462018; - q[10][15] = 0.382421; q[10][16] = 0.364222; - q[10][17] = 0.740259; q[10][18] = 0.443205; - q[10][19] = 1.997370; - - q[11][12] = 1.032220; q[11][13] = 0.098843; - q[11][14] = 0.619503; q[11][15] = 1.073780; - q[11][16] = 1.537920; q[11][17] = 0.152232; - q[11][18] = 0.147411; q[11][19] = 0.342012; - - q[12][13] = 1.320870; q[12][14] = 0.194864; - q[12][15] = 0.556353; q[12][16] = 1.681970; - q[12][17] = 0.570369; q[12][18] = 0.473810; - q[12][19] = 2.282020; - - q[13][14] = 0.179896; q[13][15] = 0.606814; - q[13][16] = 0.191467; q[13][17] = 1.699780; - q[13][18] = 7.154480; q[13][19] = 0.725096; - - q[14][15] = 1.786490; q[14][16] = 0.885349; - q[14][17] = 0.156619; q[14][18] = 0.239607; - q[14][19] = 0.351250; - - q[15][16] = 4.847130; q[15][17] = 0.578784; - q[15][18] = 0.872519; q[15][19] = 0.258861; - - q[16][17] = 0.126678; q[16][18] = 0.325490; - q[16][19] = 1.547670; - - q[17][18] = 2.763540; q[17][19] = 0.409817; - - q[18][19] = 0.347826; - - /* original frequencies */ - f[ 0] = 0.0866; - f[ 1] = 0.0440; - f[ 2] = 0.0391; - f[ 3] = 0.0570; - f[ 4] = 0.0193; - f[ 5] = 0.0367; - f[ 6] = 0.0581; - f[ 7] = 0.0833; - f[ 8] = 0.0244; - f[ 9] = 0.0485; - f[10] = 0.0862; - f[11] = 0.0620; - f[12] = 0.0195; - f[13] = 0.0384; - f[14] = 0.0458; - f[15] = 0.0695; - f[16] = 0.0610; - f[17] = 0.0144; - f[18] = 0.0353; - f[19] = 0.0709; -} - -void cprev45data(dmatrix q, double *f) -{ - /* cpREV45 model of amino acid evolution - * Adachi, J., P.J. Waddell, W. Martin, and M. Hasegawa. 2000. - * J. Mol. Evol. 50:348-358 - * (reconstructed from 45 chloroplast genomes) - */ - q[0][1] = 105; q[0][2] = 227; - q[0][3] = 175; q[0][4] = 669; - q[0][5] = 157; q[0][6] = 499; - q[0][7] = 665; q[0][8] = 66; - q[0][9] = 145; q[0][10] = 197; - q[0][11] = 236; q[0][12] = 185; - q[0][13] = 68; q[0][14] = 490; - q[0][15] = 2440; q[0][16] = 1340; - q[0][17] = 14; q[0][18] = 56; - q[0][19] = 968; - - q[1][2] = 357; q[1][3] = 43; - q[1][4] = 823; q[1][5] = 1745; - q[1][6] = 152; q[1][7] = 243; - q[1][8] = 715; q[1][9] = 136; - q[1][10] = 203; q[1][11] = 4482; - q[1][12] = 125; q[1][13] = 53; - q[1][14] = 87; q[1][15] = 385; - q[1][16] = 314; q[1][17] = 230; - q[1][18] = 323; q[1][19] = 92; - - q[2][3] = 4435; q[2][4] = 538; - q[2][5] = 768; q[2][6] = 1055; - q[2][7] = 653; q[2][8] = 1405; - q[2][9] = 168; q[2][10] = 113; - q[2][11] = 2430; q[2][12] = 61; - q[2][13] = 97; q[2][14] = 173; - q[2][15] = 2085; q[2][16] = 1393; - q[2][17] = 40; q[2][18] = 754; - q[2][19] = 83; - - q[3][4] = 10; q[3][5] = 400; - q[3][6] = 3691; q[3][7] = 431; - q[3][8] = 331; q[3][9] = 10; - q[3][10] = 10; q[3][11] = 412; - q[3][12] = 47; q[3][13] = 22; - q[3][14] = 170; q[3][15] = 590; - q[3][16] = 266; q[3][17] = 18; - q[3][18] = 281; q[3][19] = 75; - - q[4][5] = 10; q[4][6] = 10; - q[4][7] = 303; q[4][8] = 441; - q[4][9] = 280; q[4][10] = 396; - q[4][11] = 48; q[4][12] = 159; - q[4][13] = 726; q[4][14] = 285; - q[4][15] = 2331; q[4][16] = 576; - q[4][17] = 435; q[4][18] = 1466; - q[4][19] = 592; - - q[5][6] = 3122; q[5][7] = 133; - q[5][8] = 1269; q[5][9] = 92; - q[5][10] = 286; q[5][11] = 3313; - q[5][12] = 202; q[5][13] = 10; - q[5][14] = 323; q[5][15] = 396; - q[5][16] = 241; q[5][17] = 53; - q[5][18] = 391; q[5][19] = 54; - - q[6][7] = 379; q[6][8] = 162; - q[6][9] = 148; q[6][10] = 82; - q[6][11] = 2629; q[6][12] = 113; - q[6][13] = 145; q[6][14] = 185; - q[6][15] = 568; q[6][16] = 369; - q[6][17] = 63; q[6][18] = 142; - q[6][19] = 200; - - q[7][8] = 19; q[7][9] = 40; - q[7][10] = 20; q[7][11] = 263; - q[7][12] = 21; q[7][13] = 25; - q[7][14] = 28; q[7][15] = 691; - q[7][16] = 92; q[7][17] = 82; - q[7][18] = 10; q[7][19] = 91; - - q[8][9] = 29; q[8][10] = 66; - q[8][11] = 305; q[8][12] = 10; - q[8][13] = 127; q[8][14] = 152; - q[8][15] = 303; q[8][16] = 32; - q[8][17] = 69; q[8][18] = 1971; - q[8][19] = 25; - - q[9][10] = 1745; q[9][11] = 345; - q[9][12] = 1772; q[9][13] = 454; - q[9][14] = 117; q[9][15] = 216; - q[9][16] = 1040; q[9][17] = 42; - q[9][18] = 89; q[9][19] = 4797; - - q[10][11] = 218; q[10][12] = 1351; - q[10][13] = 1268; q[10][14] = 219; - q[10][15] = 516; q[10][16] = 156; - q[10][17] = 159; q[10][18] = 189; - q[10][19] = 865; - - q[11][12] = 193; q[11][13] = 72; - q[11][14] = 302; q[11][15] = 868; - q[11][16] = 918; q[11][17] = 10; - q[11][18] = 247; q[11][19] = 249; - - q[12][13] = 327; q[12][14] = 100; - q[12][15] = 93; q[12][16] = 645; - q[12][17] = 86; q[12][18] = 215; - q[12][19] = 475; - - q[13][14] = 43; q[13][15] = 487; - q[13][16] = 148; q[13][17] = 468; - q[13][18] = 2370; q[13][19] = 317; - - q[14][15] = 1202; q[14][16] = 260; - q[14][17] = 49; q[14][18] = 97; - q[14][19] = 122; - - q[15][16] = 2151; q[15][17] = 73; - q[15][18] = 522; q[15][19] = 167; - - q[16][17] = 29; q[16][18] = 71; - q[16][19] = 760; - - q[17][18] = 346; q[17][19] = 10; - - q[18][19] = 119; - - f[0] = 0.076; - f[1] = 0.062; - f[2] = 0.041; - f[3] = 0.037; - f[4] = 0.009; - f[5] = 0.038; - f[6] = 0.049; - f[7] = 0.084; - f[8] = 0.025; - f[9] = 0.081; - f[10] = 0.101; - f[11] = 0.050; - f[12] = 0.022; - f[13] = 0.051; - f[14] = 0.043; - f[15] = 0.062; - f[16] = 0.054; - f[17] = 0.018; - f[18] = 0.031; - f[19] = 0.066; -} - diff --git a/forester/archive/RIO/others/puzzle_mod/src/ppuzzle.c b/forester/archive/RIO/others/puzzle_mod/src/ppuzzle.c deleted file mode 100644 index 04a1cc2..0000000 --- a/forester/archive/RIO/others/puzzle_mod/src/ppuzzle.c +++ /dev/null @@ -1,2418 +0,0 @@ -/* - * ppuzzle.c - * - * - * Part of TREE-PUZZLE 5.0 (June 2000) - * - * (c) 1999-2000 by Heiko A. Schmidt, Korbinian Strimmer, - * M. Vingron, and Arndt von Haeseler - * (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler - * - * All parts of the source except where indicated are distributed under - * the GNU public licence. See http://www.opensource.org for details. - */ - -#define EXTERN extern - -#include -#include -#include "ppuzzle.h" - - -int PP_IamMaster; -int PP_IamSlave; -int PP_Myid; -int PP_MyMaster; -int PP_NumProcs; -MPI_Comm PP_Comm; - -int *freeslaves; /* Queue of free slaves */ -int firstslave, /* headpointer of queue */ - lastslave; /* tailpointer of queue */ - -int *permutsent, - *permutrecved, - *quartsent, - *quartrecved, - *doquartsent, - *doquartrecved, - *splitsent, - *splitrecved, - *permutsentn, - *permutrecvedn, - *quartsentn, - *quartrecvedn, - *doquartsentn, - *doquartrecvedn, - *splitsentn, - *splitrecvedn; - -double *walltimes, - *cputimes; -double *fullwalltimes, - *fullcputimes; -double *altwalltimes, - *altcputimes; - -int PP_permutsent = 0; /* # of */ -int PP_permutrecved = 0; /* # of */ -int PP_quartsent = 0; /* # of */ -int PP_quartrecved = 0; /* # of */ -int PP_doquartsent = 0; /* # of */ -int PP_doquartrecved = 0; /* # of */ -int PP_splitsent = 0; /* # of */ -int PP_splitrecved = 0; /* # of */ -int PP_permutsentn = 0; /* # of */ -int PP_permutrecvedn = 0; /* # of */ -int PP_quartsentn = 0; /* # of */ -int PP_quartrecvedn = 0; /* # of */ -int PP_doquartsentn = 0; /* # of */ -int PP_doquartrecvedn = 0; /* # of */ -int PP_splitsentn = 0; /* # of */ -int PP_splitrecvedn = 0; /* # of */ - -double PP_starttime = 0, - PP_stoptime = 0, - PP_inittime = 0, - PP_paramcomptime = 0, - PP_paramsendtime = 0, - PP_quartcomptime = 0, - PP_quartsendtime = 0, - PP_puzzletime = 0, - PP_treetime = 0, - PP_lasttime = 0; - -int PP_MaxSlave = 0; - - -/********************************************************************* -* miscellaneous utilities * -*********************************************************************/ - -int dcmp(const void *a, const void *b) -{ - if (*(double *)a > *(double *)b) return (-1); - else if (*(double *)a < *(double *)b) return 1; - else return 0; -} - -/******************/ - -void PP_cmpd(int rank, double a, double b) -{ - if (a != b) - FPRINTF(STDOUTFILE "(%2d) *** %.3f != %.3f\n", rank, a, b); -} - -/******************/ - -void PP_cmpi(int rank, int a, int b) -{ - if (a != b) - FPRINTF(STDOUTFILE "(%2d) *** %d != %d\n", rank, a, b); -} - -/******************/ - -double PP_timer() -{ - double tmptime; - if (PP_lasttime == 0) { - PP_lasttime = MPI_Wtime(); - return(0); - } - else { - tmptime = PP_lasttime; - PP_lasttime = MPI_Wtime(); - return(PP_lasttime - tmptime); - } -} - -/******************/ - -void PP_Printerror(FILE *of, int id, int err) -{ - char errstr[MPI_MAX_ERROR_STRING]; - int errstrlen; - - if ((err > MPI_SUCCESS) && (err <= MPI_ERR_LASTCODE)) { - MPI_Error_string(err, errstr, &errstrlen); - fprintf(of, "(%2d) MPI ERROR %d : %s\n", id, err, errstr); - } - else { - if (err == MPI_SUCCESS) - fprintf(of, "(%2d) MPI ERROR %d : No error\n", id, err); - else - fprintf(of, "(%2d) MPI ERROR %d : unknown error number\n", id, err); - } -} /* PP_Printerror */ - -/******************/ - -void PP_Printbiparts(cmatrix biparts) -{ int n1, n2; - for (n1=0; n1<(Maxspc-3); n1++) { - if (n1==0) FPRINTF(STDOUTFILE "(%2d) bipartition : ", PP_Myid); - else FPRINTF(STDOUTFILE "(%2d) : ", PP_Myid); - for (n2=0; n2= qnum) - while ((lowval > qnum)) { - dd -= 1; lowval = (uli) dd*(dd-1)*(dd-2)*(dd-3)/24; - } - else { - while (highval <= qnum) { - dd += 1; highval = (uli) (dd+1)*dd*(dd-1)*(dd-2)/24; - } - lowval = (uli) dd*(dd-1)*(dd-2)*(dd-3)/24; - } - qnum -= lowval; - if (qnum > 0) { - temp = (double)(6 * qnum); - temp = pow(temp, (double)(1/3)); - cc = (uli) floor(temp); - if (cc < 2) cc= 2; - lowval = (uli) cc*(cc-1)*(cc-2)/6; - highval = (uli) (cc+1)*cc*(cc-1)/6; - if (lowval >= qnum) - while ((lowval > qnum)) { - cc -= 1; lowval = (uli) cc*(cc-1)*(cc-2)/6; - } - else { - while (highval <= qnum) { - cc += 1; highval = (uli) (cc+1)*cc*(cc-1)/6; - } - lowval = (uli) cc*(cc-1)*(cc-2)/6; - } - qnum -= lowval; - if (qnum > 0) { - temp = (double)(2 * qnum); - temp = sqrt(temp); - bb = (uli) floor(temp); - if (bb < 1) bb= 1; - lowval = (uli) bb*(bb-1)/2; - highval = (uli) (bb+1)*bb/2; - if (lowval >= qnum) - while ((lowval > qnum)) { - bb -= 1; lowval = (uli) bb*(bb-1)/2; - } - else { - while (highval <= qnum) { - bb += 1; highval = (uli) (bb+1)*bb/2; - } - lowval = (uli) bb*(bb-1)/2; - } - qnum -= lowval; - if (qnum > 0) { - aa = (uli) qnum; - if (aa < 0) aa= 0; - } - } - } - *d = (int)dd; - *c = (int)cc; - *b = (int)bb; - *a = (int)aa; -} /* num2quart */ - -/******************/ - -uli numquarts(int maxspc) -{ - uli tmp; - int a, b, c, d; - - if (maxspc < 4) - return (uli)0; - else { - maxspc--; - a = maxspc-3; - b = maxspc-2; - c = maxspc-1; - d = maxspc; - - tmp = (uli) 1 + a + - (uli) b * (b-1) / 2 + - (uli) c * (c-1) * (c-2) / 6 + - (uli) d * (d-1) * (d-2) * (d-3) / 24; - return (tmp); - } -} /* numquarts */ - -/******************/ - -uli quart2num (int a, int b, int c, int d) -{ - uli tmp; - if ((a>b) || (b>c) || (c>d)) { - fprintf(stderr, "Error PP5 not (%d <= %d <= %d <= %d) !!!\n", a, b, c, d); - exit (1); - } - tmp = (uli) a + - (uli) b * (b-1) / 2 + - (uli) c * (c-1) * (c-2) / 6 + - (uli) d * (d-1) * (d-2) * (d-3) / 24; - return (tmp); -} /* quart2num */ -#endif -/******************/ - - -/********************************************************************* -* queue for storing the ranks of slaves waiting for work * -*********************************************************************/ - -void PP_initslavequeue() -{ - int n; - freeslaves = new_ivector(PP_NumProcs); - firstslave = 0; - PP_MaxSlave = PP_NumProcs-1; - lastslave = PP_MaxSlave-1; - freeslaves[PP_MaxSlave] = PP_MaxSlave; - for (n=0; n 900) { - /* every 900 seconds */ - /* percentage of completed trees */ - if (mflag == 0) { - FPRINTF(STDOUTFILE "\n"); - mflag = 1; - } - tc2 = 100.0*Currtrial/Numtrial + - 100.0*nq/Numquartets/Numtrial; - mintogo = (100.0-tc2) * - (double) (time2-time0)/60.0/tc2; - hours = floor(mintogo/60.0); - minutes = mintogo - 60.0*hours; - FPRINTF(STDOUTFILE "%2.2f%%", tc2); - FPRINTF(STDOUTFILE " completed (remaining"); - FPRINTF(STDOUTFILE " time: %.0f", hours); - FPRINTF(STDOUTFILE " hours %.0f", minutes); - FPRINTF(STDOUTFILE " minutes)\n"); - time1 = time2; - } -# endif /* SEQUENTIAL */ - } - - /* find out which edge has the lowest edgeinfo */ - minimumedgeinfo(); - - /* add the next leaf on minedge */ - addnextleaf(minedge); - } - - /* compute bipartitions of current tree */ - computebiparts(); - -#if PARALLEL - if (PP_IamMaster) makenewsplitentries(); -# else - makenewsplitentries(); -# endif - - { - int *ctree, startnode; - char *trstr; - ctree = initctree(); - copytree(ctree); - startnode = sortctree(ctree); - trstr=sprintfctree(ctree, psteptreestrlen); - (void) addtree2list(&trstr, 1, &psteptreelist, &psteptreenum, &psteptreesum); -# ifdef PVERBOSE2 - /* fprintf(STDOUT, "%s\n", trstr); */ - printfpstrees(psteptreelist); -# endif - freectree(&ctree); - } - - - /* free tree before building the next tree */ - freetree(); - -} /* PP_slave_do_puzzling */ - -/******************/ - -void PP_do_puzzling(ivector trueID) -{ -int dest; - -# if PARALLEL - dest = PP_getslave(); - PP_SendPermut(dest, Maxspc, trueID); -# endif - - /* initialize tree */ - inittree(); - - PP_RecvSplits(Maxspc, biparts); - -# ifdef PVERBOSE3 - PP_Printbiparts(biparts); -# endif /* PVERBOSE3 */ - - makenewsplitentries(); - - /* free tree before building the next tree */ - freetree(); - -} /* PP_do_puzzling */ - -/******************/ - - -void PP_do_write_quart(int e, - int f, - int g, - int h, - double d1, - double d2, - double d3, - uli *numbq, - uli *bqarr) -{ - double lhs[3], - temp, - wlist[6], - plist[6]; - unsigned char qpbranching; - int badquartet; - - lhs[0] = d1; - lhs[1] = d2; - lhs[2] = d3; - - badquartet = FALSE; - - /* compute Bayesian weights */ - temp = (lhs[0] + lhs[1] + lhs[2])/3.0; - lhs[0] = exp(lhs[0] - temp); - lhs[1] = exp(lhs[1] - temp); - lhs[2] = exp(lhs[2] - temp); - temp = lhs[0] + lhs[1] + lhs[2]; - wlist[0] = lhs[0] / temp; - wlist[1] = 1.0; - wlist[2] = lhs[1] / temp; - wlist[3] = 2.0; - wlist[4] = lhs[2] / temp; - wlist[5] = 4.0; - - /* sort in descending order */ - qsort(wlist, 3, 2*sizeof(double), dcmp); - - /* check out the three possibilities */ - - /* 100 distribution */ - plist[0] = (1.0 - wlist[0])*(1.0 - wlist[0]) + - (0.0 - wlist[2])*(0.0 - wlist[2]) + - (0.0 - wlist[4])*(0.0 - wlist[4]); - plist[1] = wlist[1]; - - /* 110 distribution */ - plist[2] = (0.5 - wlist[0])*(0.5 - wlist[0]) + - (0.5 - wlist[2])*(0.5 - wlist[2]) + - (0.0 - wlist[4])*(0.0 - wlist[4]); - plist[3] = wlist[1] + wlist[3]; - - /* 111 distribution */ - temp = 1.0/3.0; - plist[4] = (temp - wlist[0])*(temp - wlist[0]) + - (temp - wlist[2])*(temp - wlist[2]) + - (temp - wlist[4])*(temp - wlist[4]); - plist[5] = wlist[1] + wlist[3] + wlist[5]; - - /* sort in descending order */ - qsort(plist, 3, 2*sizeof(double), dcmp); - - qpbranching = (unsigned char) plist[5]; - writequartet(e, f, g, h, qpbranching); - - /* a bad quartet is a quartet that shows - equal weights for all three possible topologies */ - if (qpbranching == 7) badquartet = TRUE; - - if (badquartet) { - bqarr[(*numbq)++] = quart2num(e, f, g, h); -# ifdef PVERBOSE3 - FPRINTF(STDOUTFILE "(%2d) bad quartet: %d %d %d %d -> %ld\n", - PP_Myid, e, f, g, h, quart2num(e, f, g, h)); -# endif /* PVERBOSE3 */ - badqs++; - badtaxon[e]++; - badtaxon[f]++; - badtaxon[g]++; - badtaxon[h]++; - } /* if badquartet */ -} /* PP_do_write_quart */ - -/********************************************************************* -* sending/receiving the important sizes and parameter (M->S) * -*********************************************************************/ - -void PP_SendSizes(int mspc, - int msite, - int ncats, - int nptrn, - int rad, - int outgr, - double frconst, - int rseed) -{ -# define NUMINT 7 -# define NUMDBL 1 - int ints[NUMINT]; - double doubles[NUMDBL]; - MPI_Datatype Dtypes[2] = {MPI_INT, MPI_DOUBLE}; - int Dtypelens[2] = {NUMINT , NUMDBL}; - MPI_Aint Dtypeaddr[2]; - MPI_Datatype PP_Sizes; - int dest; - int error; - -# ifdef PVERBOSE2 - FPRINTF(STDOUTFILE "(%2d) Sending: Maxspc=%d Maxsite=%d numcats=%d\n", PP_Myid, mspc, msite, ncats); - FPRINTF(STDOUTFILE "(%2d) Numprtn=%d tpmradix=%d fracconst=%.3f\n", PP_Myid, nptrn, rad, frconst); -# endif /* PVERBOSE2 */ - - ints[0] = mspc; - ints[1] = msite; - ints[2] = ncats; - ints[3] = nptrn; - ints[4] = rad; - ints[5] = outgr; - ints[6] = rseed; - doubles[0] = frconst; - - MPI_Address(ints, Dtypeaddr); - MPI_Address(doubles, (Dtypeaddr+1)); - - MPI_Type_struct(2, Dtypelens, Dtypeaddr, Dtypes, &PP_Sizes); - MPI_Type_commit(&PP_Sizes); - - for (dest=1; dest (%2d) Sent Sizes\n", PP_Myid, dest); -# endif /* PVERBOSE3 */ - - } /* for each slave */ - - MPI_Type_free(&PP_Sizes); - -# ifdef PVERBOSE3 - FPRINTF(STDOUTFILE "(%2d) ... Sent Sizes\n", PP_Myid); -# endif /* PVERBOSE3 */ - -# undef NUMINT -# undef NUMDBL -} /* PP_SendSizes */ - - -/******************/ - -void PP_RecvSizes(int *mspc, - int *msite, - int *ncats, - int *nptrn, - int *rad, - int *outgr, - double *frconst, - int *rseed) -{ -# define NUMINT 7 -# define NUMDBL 1 - int ints[NUMINT]; - double doubles[NUMDBL]; - MPI_Datatype Dtypes[2] = {MPI_INT, MPI_DOUBLE}; - int Dtypelens[2] = {NUMINT , NUMDBL}; - MPI_Aint Dtypeaddr[2]; - MPI_Datatype PP_Sizes; - MPI_Status stat; - int error; - -# ifdef PVERBOSE3 - FPRINTF(STDOUTFILE "(%2d) Receiving Sizes ...\n", PP_Myid); -# endif /* PVERBOSE3 */ - - MPI_Address(ints, Dtypeaddr); - MPI_Address(doubles, (Dtypeaddr+1)); - - MPI_Type_struct(2, Dtypelens, Dtypeaddr, Dtypes, &PP_Sizes); - MPI_Type_commit(&PP_Sizes); - - error = MPI_Probe(PP_MyMaster, MPI_ANY_TAG, PP_Comm, &stat); - if (error != MPI_SUCCESS) - PP_Printerror(STDOUT, 700+PP_Myid, error); - if (stat.MPI_TAG != PP_SIZES) { - if (stat.MPI_TAG == PP_DONE) { - PP_RecvDone(); -# ifdef PVERBOSE1 - FPRINTF(STDOUTFILE "(%2d) Finishing...\n", PP_Myid); -# endif /* PVERBOSE1 */ - MPI_Finalize(); - exit(1); - } else { - FPRINTF(STDOUTFILE "(%2d) Error: unexpected TAG received...\n", PP_Myid); - MPI_Finalize(); - exit(1); - } - } - - error = MPI_Recv(MPI_BOTTOM, 1, PP_Sizes, PP_MyMaster, MPI_ANY_TAG, PP_Comm, &stat); - if (error != MPI_SUCCESS) - PP_Printerror(STDOUT, 700+PP_Myid, error); - if (stat.MPI_TAG != PP_SIZES) { - FPRINTF(STDOUTFILE "(%2d) Error: unexpected TAG received...\n", PP_Myid); - MPI_Finalize(); - exit(1); - } - - *mspc = ints[0]; - *msite = ints[1]; - *ncats = ints[2]; - *nptrn = ints[3]; - *rad = ints[4]; - *outgr = ints[5]; - *rseed = ints[6]; - *frconst = doubles[0]; - - MPI_Type_free(&PP_Sizes); - -# ifdef PVERBOSE2 - FPRINTF(STDOUTFILE "(%2d) <- (%2d) Received: Maxspec=%d Maxsite=%d numcats=%d\n", PP_Myid, PP_MyMaster, *mspc, *msite, *ncats); - FPRINTF(STDOUTFILE "(%2d) Numprtn=%d tpmradix=%d fracconst=%.3f\n", PP_Myid, *nptrn, *rad, *frconst); -# endif /* PVERBOSE2 */ - -# undef NUMINT -# undef NUMDBL -} /* PP_RecvSizes */ - - - -/********************************************************************* -* sending/receiving the data matrizes (M->S) * -*********************************************************************/ - -void PP_RecvData( - cmatrix Seqpat, /* cmatrix (Maxspc x Numptrn) */ - ivector Alias, /* ivector (Maxsite) */ - ivector Weight, /* ivector (Numptrn) */ - ivector constpat, - dvector Rates, /* dvector (numcats) */ - dvector Eval, /* dvector (tpmradix) */ - dvector Freqtpm, - dmatrix Evec, /* dmatrix (tpmradix x tpmradix) */ - dmatrix Ievc, - dmatrix iexp, - dmatrix Distanmat, /* dmatrix (Maxspc x Maxspc) */ - dcube ltprobr) /* dcube (numcats x tpmradix x tpmradix) */ -{ - MPI_Datatype Dtypes[12]; - int Dtypelens[12]; - MPI_Aint Dtypeaddr[12]; - MPI_Datatype PP_Data; - MPI_Status stat; - int error; - -# ifdef PVERBOSE3 - FPRINTF(STDOUTFILE "(%2d) Receiving Sizes ...\n", PP_Myid); -# endif /* PVERBOSE2 */ - - Dtypes [0] = MPI_CHAR; Dtypelens [0] = Maxspc * Numptrn; - MPI_Address(&(Seqpat[0][0]), &(Dtypeaddr[0])); - Dtypes [1] = MPI_INT; Dtypelens [1] = Maxsite ; - MPI_Address(&(Alias[0]), &(Dtypeaddr[1])); - Dtypes [2] = MPI_INT; Dtypelens [2] = Numptrn ; - MPI_Address(&(Weight[0]), &(Dtypeaddr[2])); - Dtypes [3] = MPI_INT; Dtypelens [3] = Numptrn ; - MPI_Address(&(constpat[0]), &(Dtypeaddr[3])); - Dtypes [4] = MPI_DOUBLE; Dtypelens [4] = numcats ; - MPI_Address(&(Rates[0]), &(Dtypeaddr[4])); - Dtypes [5] = MPI_DOUBLE; Dtypelens [5] = tpmradix ; - MPI_Address(&(Eval[0]), &(Dtypeaddr[5])); - Dtypes [6] = MPI_DOUBLE; Dtypelens [6] = tpmradix ; - MPI_Address(&(Freqtpm[0]), &(Dtypeaddr[6])); - Dtypes [7] = MPI_DOUBLE; Dtypelens [7] = tpmradix * tpmradix ; - MPI_Address(&(Evec[0][0]), &(Dtypeaddr[7])); - Dtypes [8] = MPI_DOUBLE; Dtypelens [8] = tpmradix * tpmradix ; - MPI_Address(&(Ievc[0][0]), &(Dtypeaddr[8])); - Dtypes [9] = MPI_DOUBLE; Dtypelens [9] = tpmradix * tpmradix ; - MPI_Address(&(iexp[0][0]), &(Dtypeaddr[9])); - Dtypes [10] = MPI_DOUBLE; Dtypelens [10] = Maxspc * Maxspc ; - MPI_Address(&(Distanmat[0][0]), &(Dtypeaddr[10])); - Dtypes [11] = MPI_DOUBLE; Dtypelens [11] = numcats * tpmradix * tpmradix ; - MPI_Address(&(ltprobr[0][0][0]), &(Dtypeaddr[11])); - - MPI_Type_struct(12, Dtypelens, Dtypeaddr, Dtypes, &PP_Data); - MPI_Type_commit(&PP_Data); - - - error = MPI_Probe(PP_MyMaster, MPI_ANY_TAG, PP_Comm, &stat); - if (error != MPI_SUCCESS) - PP_Printerror(STDOUT, 700+PP_Myid, error); - if (stat.MPI_TAG != PP_DATA) { - if (stat.MPI_TAG == PP_DONE) { - PP_RecvDone(); -# ifdef PVERBOSE1 - FPRINTF(STDOUTFILE "(%2d) Finishing...\n", PP_Myid); -# endif /* PVERBOSE1 */ - MPI_Finalize(); - exit(1); - } else { - FPRINTF(STDOUTFILE "(%2d) Error: unexpected TAG received...\n", PP_Myid); - MPI_Finalize(); - exit(1); - } - } - - - error = MPI_Recv(MPI_BOTTOM, 1, PP_Data, PP_MyMaster, PP_DATA, PP_Comm, &stat); - if (error != MPI_SUCCESS) - PP_Printerror(STDOUT, 900+PP_Myid, error); - -# ifdef PVERBOSE2 - FPRINTF(STDOUTFILE "(%2d) <- (%2d) Received : Alias(0)=%d - Weight(0)=%d - constpat(0)=%d\n", PP_Myid, PP_MyMaster, Alias[0], Weight[0], constpat[0]); - FPRINTF(STDOUTFILE "(%2d) Rates(0)=%.3f - Eval(0)=%.3f - Freqtpm(0)=%.3f\n", PP_Myid, Rates[0], Eval[0], Freqtpm[0]); - FPRINTF(STDOUTFILE "(%2d) Evec(0,0)=%.3f - Ievc(0,0)=%.3f - iexp(0,0)=%.3f - Distanmat(0,1)=%.3f\n", PP_Myid, Evec[0][0], Ievc[0][0], iexp[0][0], Distanmat[0][1]); - FPRINTF(STDOUTFILE "(%2d) Distanmat(0,1)=%.3f\n", PP_Myid, Distanmat[0][1]); - FPRINTF(STDOUTFILE "(%2d) ltprobr(0,0,0)=%.3f\n", PP_Myid, ltprobr[0][0][0]); -# endif /* PVERBOSE2 */ - - MPI_Type_free(&PP_Data); - -} /* PP_RecvData */ - - -/******************/ - -void PP_SendData( - cmatrix Seqpat, /* cmatrix (Maxspc x Numptrn) */ - ivector Alias, /* ivector (Maxsite) */ - ivector Weight, /* ivector (Numptrn) */ - ivector constpat, - dvector Rates, /* dvector (numcats) */ - dvector Eval, /* dvector (tpmradix) */ - dvector Freqtpm, - dmatrix Evec, /* dmatrix (tpmradix x tpmradix) */ - dmatrix Ievc, - dmatrix iexp, - dmatrix Distanmat, /* dmatrix (Maxspc x Maxspc) */ - dcube ltprobr) /* dcube (numcats x tpmradix x tpmradix) */ -{ - MPI_Datatype Dtypes[12]; - int Dtypelens[12]; - MPI_Aint Dtypeaddr[12]; - MPI_Datatype PP_Data; - int dest; - int error; - -# ifdef PVERBOSE2 - FPRINTF(STDOUTFILE "(%2d) Sending: Alias(0)=%d - Weight(0)=%d - constpat(0)=%d\n", PP_Myid, Alias[0], Weight[0], constpat[0]); - FPRINTF(STDOUTFILE "(%2d) Rates(0)=%.3f - Eval(0)=%.3f - Freqtpm(0)=%.3f\n", PP_Myid, Rates[0], Eval[0], Freqtpm[0]); - FPRINTF(STDOUTFILE "(%2d) Evec(0,0)=%.3f - Ievc(0,0)=%.3f - iexp(0,0)=%.3f - Distanmat(0,1)=%.3f\n", PP_Myid, Evec[0][0], Ievc[0][0], iexp[0][0], Distanmat[0][1]); - FPRINTF(STDOUTFILE "(%2d) ltprobr(0,0,0)=%.3f\n", PP_Myid, ltprobr[0][0][0]); -# endif /* PVERBOSE2 */ - - Dtypes [0] = MPI_CHAR; Dtypelens [0] = Maxspc * Numptrn; - MPI_Address(&(Seqpat[0][0]), &(Dtypeaddr[0])); - Dtypes [1] = MPI_INT; Dtypelens [1] = Maxsite ; - MPI_Address(&(Alias[0]), &(Dtypeaddr[1])); - Dtypes [2] = MPI_INT; Dtypelens [2] = Numptrn ; - MPI_Address(&(Weight[0]), &(Dtypeaddr[2])); - Dtypes [3] = MPI_INT; Dtypelens [3] = Numptrn ; - MPI_Address(&(constpat[0]), &(Dtypeaddr[3])); - Dtypes [4] = MPI_DOUBLE; Dtypelens [4] = numcats ; - MPI_Address(&(Rates[0]), &(Dtypeaddr[4])); - Dtypes [5] = MPI_DOUBLE; Dtypelens [5] = tpmradix ; - MPI_Address(&(Eval[0]), &(Dtypeaddr[5])); - Dtypes [6] = MPI_DOUBLE; Dtypelens [6] = tpmradix ; - MPI_Address(&(Freqtpm[0]), &(Dtypeaddr[6])); - Dtypes [7] = MPI_DOUBLE; Dtypelens [7] = tpmradix * tpmradix ; - MPI_Address(&(Evec[0][0]), &(Dtypeaddr[7])); - Dtypes [8] = MPI_DOUBLE; Dtypelens [8] = tpmradix * tpmradix ; - MPI_Address(&(Ievc[0][0]), &(Dtypeaddr[8])); - Dtypes [9] = MPI_DOUBLE; Dtypelens [9] = tpmradix * tpmradix ; - MPI_Address(&(iexp[0][0]), &(Dtypeaddr [9])); - Dtypes [10] = MPI_DOUBLE; Dtypelens [10] = Maxspc * Maxspc ; - MPI_Address(&(Distanmat[0][0]), &(Dtypeaddr[10])); - Dtypes [11] = MPI_DOUBLE; Dtypelens [11] = numcats * tpmradix * tpmradix ; - MPI_Address(&(ltprobr[0][0][0]), &(Dtypeaddr[11])); - - MPI_Type_struct(12, Dtypelens, Dtypeaddr, Dtypes, &PP_Data); - MPI_Type_commit(&PP_Data); - - for (dest=1; dest (%2d) Sent Data\n", PP_Myid, dest); -# endif /* PVERBOSE2 */ - - } /* for each slave */ - - MPI_Type_free(&PP_Data); - -# ifdef PVERBOSE3 - FPRINTF(STDOUTFILE "(%2d) ... Sent Data\n", PP_Myid); -# endif /* PVERBOSE2 */ - -} /* PP_SendData */ - - -/************************************************************************** -* procedures to send the request to compute a single quartet (M->S) * -**************************************************************************/ - -void PP_SendDoQuart(int dest, - int a, - int b, - int c, - int d, - int approx) -{ -# define NUMINT 5 - int ints[NUMINT]; - int error; - - ints[0] = a; - ints[1] = b; - ints[2] = c; - ints[3] = d; - ints[4] = approx; - - PP_doquartsent++; - PP_doquartsentn++; - -# ifdef PVERBOSE2 - FPRINTF(STDOUTFILE "(%2d) Sending -> (%2d): Quart(%d,%d,%d,%d)\n", PP_Myid, dest, a, b, c, d); -# endif /* PVERBOSE2 */ - - error = MPI_Ssend(ints, NUMINT, MPI_INT, dest, PP_DOQUART, PP_Comm); - if (error != MPI_SUCCESS) - PP_Printerror(STDOUT, PP_Myid, error); - -# ifdef PVERBOSE3 - FPRINTF(STDOUTFILE "(%2d) ... Sent \n", PP_Myid); -# endif /* PVERBOSE3 */ -# undef NUMINT - -} /* PP_SendDoQuart */ - - - -/******************/ - -void PP_RecvDoQuart(int *a, - int *b, - int *c, - int *d, - int *approx) -{ -# define NUMINT 5 - int ints[NUMINT]; - int error; - MPI_Status stat; - PP_doquartrecved++; - PP_doquartrecvedn++; - -# ifdef PVERBOSE3 - FPRINTF(STDOUTFILE "(%2d) Receiving: Quart\n", PP_Myid); -# endif /* PVERBOSE3 */ - - error = MPI_Recv(ints, NUMINT, MPI_INT, PP_MyMaster, PP_DOQUART, PP_Comm, &stat); - if (error != MPI_SUCCESS) - PP_Printerror(STDOUT, 200+PP_Myid, error); - - *a = ints[0]; - *b = ints[1]; - *c = ints[2]; - *d = ints[3]; - *approx = ints[4]; - -# ifdef PVERBOSE2 - FPRINTF(STDOUTFILE "(%2d) Received: Quart(%d,%d,%d,%d,%c)\n", PP_Myid, *a, *b, *c, *d, (approx ? 'A' : 'E')); -# endif /* PVERBOSE2 */ -# undef NUMINT - -} /* PP_RecvDoQuart */ - - -/************************************************************************** -* procedures to send the result of a single quartet (S->M) * -**************************************************************************/ - -void PP_SendQuart(int a, - int b, - int c, - int d, - double d1, - double d2, - double d3, - int approx) -{ -# define NUMINT 5 -# define NUMDBL 3 - int ints[NUMINT]; - double doubles[NUMDBL]; - MPI_Datatype Dtypes[2] = {MPI_INT, MPI_DOUBLE}; - int Dtypelens[2] = {NUMINT , NUMDBL}; - MPI_Aint Dtypeaddr[2]; - MPI_Datatype PP_Quart; - int error; - - PP_quartsent++; - PP_quartsentn++; - ints[0] = a; - ints[1] = b; - ints[2] = c; - ints[3] = d; - ints[4] = approx; - doubles[0] = d1; - doubles[1] = d2; - doubles[2] = d3; - - MPI_Address(ints, Dtypeaddr); - MPI_Address(doubles, (Dtypeaddr+1)); - - MPI_Type_struct(2, Dtypelens, Dtypeaddr, Dtypes, &PP_Quart); - MPI_Type_commit(&PP_Quart); - -# ifdef PVERBOSE2 - FPRINTF(STDOUTFILE "(%2d) Sending: Quart(%d,%d,%d,%d) = (%.3f, %.3f, %.3f)\n", PP_Myid, a, b, c, d, d1, d2, d3); -# endif /* PVERBOSE2 */ - - error = MPI_Ssend(MPI_BOTTOM, 1, PP_Quart, PP_MyMaster, PP_QUART, PP_Comm); - if (error != MPI_SUCCESS) - PP_Printerror(STDOUT, 400+PP_Myid, error); - - MPI_Type_free(&PP_Quart); - -# ifdef PVERBOSE3 - FPRINTF(STDOUTFILE "(%2d) ... Sent \n", PP_Myid); -# endif /* PVERBOSE3 */ -# undef NUMINT -# undef NUMDBL - -} /* PP_SendQuart */ - - - -/******************/ - -void PP_RecvQuart(int *a, - int *b, - int *c, - int *d, - double *d1, - double *d2, - double *d3, - int *approx) -{ -# define NUMINT 5 -# define NUMDBL 3 - int ints[NUMINT]; - double doubles[NUMDBL]; - MPI_Datatype Dtypes[2] = {MPI_INT, MPI_DOUBLE}; - int Dtypelens[2] = {NUMINT , NUMDBL}; - MPI_Aint Dtypeaddr[2]; - MPI_Datatype PP_Quart; - int error; - MPI_Status stat; - - PP_quartrecved++; - PP_quartrecvedn++; - MPI_Address(ints, Dtypeaddr); - MPI_Address(doubles, (Dtypeaddr+1)); - - MPI_Type_struct(2, Dtypelens, Dtypeaddr, Dtypes, &PP_Quart); - MPI_Type_commit(&PP_Quart); - - error = MPI_Recv(MPI_BOTTOM, 1, PP_Quart, MPI_ANY_SOURCE, PP_QUART, PP_Comm, &stat); - - if (error != MPI_SUCCESS) - PP_Printerror(STDOUT, 500+PP_Myid, error); - - PP_putslave(stat.MPI_SOURCE); - - *a = ints[0]; - *b = ints[1]; - *c = ints[2]; - *d = ints[3]; - *d1 = doubles[0]; - *d2 = doubles[1]; - *d3 = doubles[2]; - *approx = ints[4]; - - MPI_Type_free(&PP_Quart); - -# ifdef PVERBOSE2 - FPRINTF(STDOUTFILE "(%2d) Received <- (%2d): Quart(%d,%d,%d,%d)=(%.3f, %.3f, %.3f)\n", PP_Myid, stat.MPI_SOURCE, *a, *b, *c, *d, *d1, *d2, *d3); -# endif /* PVERBOSE2 */ -# undef NUMINT -# undef NUMDBL - -} /* PP_RecvQuart */ - - - -/************************************************************************** -* procedures to send the request to compute a block of quartets (M->S) * -**************************************************************************/ - -void PP_SendDoQuartBlock(int dest, uli firstq, uli amount, int approx) -{ -# define NUMULI 3 - uli ulongs[NUMULI]; - int error; - - PP_doquartsent += amount; - PP_doquartsentn++; -# ifdef PVERBOSE2 - FPRINTF(STDOUTFILE "(%2d) Sending: DOQuartBlock Signal\n", PP_Myid); -# endif /* PVERBOSE2 */ - - ulongs[0] = firstq; - ulongs[1] = amount; - ulongs[2] = (uli)approx; - - error = MPI_Ssend(ulongs, NUMULI, MPI_UNSIGNED_LONG, dest, PP_DOQUARTBLOCK, PP_Comm); - if (error != MPI_SUCCESS) PP_Printerror(STDOUT, 2100+PP_Myid, error); - -# ifdef PVERBOSE3 - FPRINTF(STDOUTFILE "(%2d) ... Sent DOQuartBlock Signal (addr:%ld, num:%ld)\n", PP_Myid, firstq, amount); -# endif /* PVERBOSE3 */ -# undef NUMULI - -} /* PP_SendDoQuartBlock */ - -/******************/ - -void PP_RecvDoQuartBlock(uli *firstq, uli *amount, uli **bq, int *approx) -{ -# define NUMULI 3 - uli ulongs[NUMULI]; - MPI_Status stat; - int error; - -# ifdef PVERBOSE2 - FPRINTF(STDOUTFILE "(%2d) Receiving: DOQuartBlock Signal\n", PP_Myid); -# endif /* PVERBOSE2 */ - - error = MPI_Recv(&ulongs, NUMULI, MPI_UNSIGNED_LONG, PP_MyMaster, PP_DOQUARTBLOCK, PP_Comm, &stat); - if (error != MPI_SUCCESS) - PP_Printerror(STDOUT, 2100+PP_Myid, error); - - *firstq=ulongs[0]; - *amount=ulongs[1]; - *approx= (int)ulongs[2]; - - *bq = malloc((unsigned)*amount * sizeof(uli)); - - PP_doquartrecved += *amount; - PP_doquartrecvedn++; - -# ifdef PVERBOSE3 - FPRINTF(STDOUTFILE "(%2d) ... DOQuartBlock (addr:%ld, num:%ld)\n", - PP_Myid, *firstq, *amount); -# endif /* PVERBOSE3 */ - -# undef NUMULI -} /* PP_RecvDoQuartBlock */ - -/********************************************************************* -* procedures to send the results of a block of quartets (S->M) * -*********************************************************************/ - -void PP_SendQuartBlock(uli startq, - uli numofq, - unsigned char *quartetinfo, - uli numofbq, - uli *bq, - int approx) -{ -# define NUMULI 3 -# define NUMINT 1 - unsigned char *trueaddr; - uli truenum; - int error; - int ints[NUMINT]; - uli ulis[NUMULI]; - MPI_Datatype Dtypes[2] = {MPI_UNSIGNED_LONG, MPI_INT}; - int Dtypelens[2] = {NUMULI, NUMINT}; - MPI_Aint Dtypeaddr[2]; - MPI_Datatype PP_QBlockSpecs; - MPI_Datatype DtypesRes[2] = {MPI_UNSIGNED_CHAR, MPI_UNSIGNED_LONG}; - int DtypelensRes[2]; - MPI_Aint DtypeaddrRes[2]; - MPI_Datatype PP_QBlockRes; - -/* - uli *bq; - uli numofbq; -*/ - - PP_quartsent += numofq; - PP_quartsentn++; - - truenum = (uli)((numofq+1)/2); - trueaddr = (unsigned char *)(quartetinfo + (uli)(startq/2)); - -# ifdef PVERBOSE2 - FPRINTF(STDOUTFILE "(%2d) Sending: startq=%lud numofq=%lud\n", PP_Myid, startq, numofq); - FPRINTF(STDOUTFILE "(%2d) approx=%c\n", PP_Myid, (approx ? 'A' : 'E')); -# endif /* PVERBOSE2 */ - - ints[0] = approx; - ulis[0] = startq; - ulis[1] = numofq; - ulis[2] = numofbq; - - MPI_Address(ulis, Dtypeaddr); - MPI_Address(ints, (Dtypeaddr+1)); - - MPI_Type_struct(2, Dtypelens, Dtypeaddr, Dtypes, &PP_QBlockSpecs); - MPI_Type_commit(&PP_QBlockSpecs); - -# ifdef PVERBOSE2 - FPRINTF(STDOUTFILE "(%2d) Sending: xxPP_QuartBlockSpecs(0,%lu)=%d,%d\n", PP_Myid, truenum-1, trueaddr[0], trueaddr[truenum-1]); -# endif /* PVERBOSE2 */ - - - error = MPI_Ssend(MPI_BOTTOM, 1, PP_QBlockSpecs, PP_MyMaster, PP_QUARTBLOCKSPECS, PP_Comm); -# ifdef PVERBOSE3 - FPRINTF(STDOUTFILE "(%2d) ... Sent QuartBlockSpecs (%ld, %ld, %ld, %d)\n", PP_Myid, ulis[0], ulis[1], ulis[2], ints[0]); -# endif /* PVERBOSE3 */ - - MPI_Address(trueaddr, DtypeaddrRes); - DtypelensRes[0] = truenum; - - MPI_Address(bq, (DtypeaddrRes + 1)); - DtypelensRes[1] = numofbq; - MPI_Type_struct(2, DtypelensRes, DtypeaddrRes, DtypesRes, &PP_QBlockRes); - MPI_Type_commit(&PP_QBlockRes); - - error = MPI_Ssend(MPI_BOTTOM, 1, PP_QBlockRes, PP_MyMaster, PP_QUARTBLOCK, PP_Comm); - if (error != MPI_SUCCESS) - PP_Printerror(STDOUT, PP_Myid, error); - - MPI_Type_free(&PP_QBlockSpecs); - MPI_Type_free(&PP_QBlockRes); -# ifdef PVERBOSE3 - FPRINTF(STDOUTFILE "(%2d) ... Sent xxPP_QuartBlock(0,%lu)=%d,%d\n", PP_Myid, truenum-1, trueaddr[0], trueaddr[truenum-1]); -# endif /* PVERBOSE3 */ - -# undef NUMULI -# undef NUMINT -} /* PP_SendQuartBlock */ - - - -/******************/ - -void PP_RecvQuartBlock(int slave, - uli *startq, - uli *numofq, - unsigned char *quartetinfo, - int *approx) -{ -# define NUMULI 3 -# define NUMINT 1 - unsigned char *trueaddr; - uli truenum; - int error; - int dest; - int ints[NUMINT]; - uli ulis[NUMULI]; - MPI_Datatype Dtypes[2] = {MPI_UNSIGNED_LONG, MPI_INT}; - int Dtypelens[2] = {NUMULI, NUMINT}; - MPI_Aint Dtypeaddr[2]; - MPI_Datatype PP_QBlockSpecs; - MPI_Datatype DtypesRes[2] = {MPI_UNSIGNED_CHAR, MPI_UNSIGNED_LONG}; - int DtypelensRes[2]; - MPI_Aint DtypeaddrRes[2]; - MPI_Datatype PP_QBlockRes; - MPI_Status stat; - uli count; -uli num; -uli *numofbq; -uli *bq; -numofbq=# -# ifdef PVERBOSE3 - FPRINTF(STDOUTFILE "(%2d) Receiving QuartBlock ...\n", PP_Myid); -# endif /* PVERBOSE3 */ - MPI_Address(ulis, Dtypeaddr); - MPI_Address(ints, (Dtypeaddr+1)); - - MPI_Type_struct(2, Dtypelens, Dtypeaddr, Dtypes, &PP_QBlockSpecs); - MPI_Type_commit(&PP_QBlockSpecs); - - MPI_Probe(MPI_ANY_SOURCE, PP_QUARTBLOCKSPECS, PP_Comm, &stat); - dest = stat.MPI_SOURCE; - error = MPI_Recv(MPI_BOTTOM, 1, PP_QBlockSpecs, dest, PP_QUARTBLOCKSPECS, PP_Comm, &stat); - if (error != MPI_SUCCESS) - PP_Printerror(STDOUT, PP_Myid, error); - - *approx = ints[0]; - *startq = ulis[0]; - *numofq = ulis[1]; - *numofbq = ulis[2]; - - PP_quartrecved += *numofq; - PP_quartrecvedn++; - truenum = (uli)((*numofq+1)/2); - trueaddr = (unsigned char *)(quartetinfo + (uli)(*startq/2)); -# ifdef PVERBOSE3 - FPRINTF(STDOUTFILE "(%2d) ... Recv QuartBlockSpecs (%ld, %ld, %ld, %d)\n", PP_Myid, ulis[0], ulis[1], ulis[2], ints[0]); -# endif /* PVERBOSE3 */ - - DtypelensRes[0] = truenum; - MPI_Address(trueaddr, DtypeaddrRes); - - bq = malloc((unsigned) *numofbq * sizeof(uli)); - - DtypelensRes[1] = *numofbq; - MPI_Address(bq, (DtypeaddrRes+1)); - MPI_Type_struct(2, DtypelensRes, DtypeaddrRes, DtypesRes, &PP_QBlockRes); - MPI_Type_commit(&PP_QBlockRes); - - error = MPI_Recv(MPI_BOTTOM, 1, PP_QBlockRes, dest, PP_QUARTBLOCK, PP_Comm, &stat); - if (error != MPI_SUCCESS) - PP_Printerror(STDOUT, PP_Myid, error); -# ifdef PVERBOSE3 - FPRINTF(STDOUTFILE "(%2d) ... Recv QuartBlock \n", PP_Myid); -# endif /* PVERBOSE3 */ - - PP_putslave(dest); - - for(count = 0; count < *numofbq; count++){ - int a, b, c, d; - num2quart(bq[count], &a, &b, &c, &d); -# ifdef PVERBOSE2 - FPRINTF(STDOUTFILE "(%2d) %ld. bad quarted (%d, %d, %d, %d) = %ld\n", PP_Myid, count, a, b, c, d, bq[count]); -# endif /* PVERBOSE2 */ - - badqs++; - badtaxon[a]++; - badtaxon[b]++; - badtaxon[c]++; - badtaxon[d]++; - if (show_optn) { - fputid10(unresfp, a); - fprintf(unresfp, " "); - fputid10(unresfp, b); - fprintf(unresfp, " "); - fputid10(unresfp, c); - fprintf(unresfp, " "); - fputid(unresfp, d); - fprintf(unresfp, "\n"); - } - } - free(bq); - MPI_Type_free(&PP_QBlockSpecs); - MPI_Type_free(&PP_QBlockRes); -# ifdef PVERBOSE2 - FPRINTF(STDOUTFILE "(%2d) <- (%2d) ... Recv xxPP_QuartBlock(0,%lu)=%d,%d\n", PP_Myid, dest, truenum-1, trueaddr[0], trueaddr[truenum-1]); -# endif /* PVERBOSE2 */ - -# undef NUMULI -# undef NUMINT -} /* PP_RecvQuartBlock */ - - -/********************************************************************* -* send/receive array with all quartets (M->S) * -*********************************************************************/ - -void PP_SendAllQuarts(unsigned long Numquartets, - unsigned char *quartetinfo) -{ - MPI_Datatype Dtypes[1] = {MPI_UNSIGNED_CHAR}; - int Dtypelens[1]; - MPI_Aint Dtypeaddr[1]; - MPI_Datatype PP_AllQuarts; - int dest; - int error; - -# ifdef PVERBOSE2 - FPRINTF(STDOUTFILE "(%2d) Sending: PP_AllQuart(0)=%d\n", PP_Myid, quartetinfo[0]); -# endif /* PVERBOSE2 */ - - /* compute number of quartets */ - if (Numquartets % 2 == 0) { /* even number */ - Dtypelens[0] = (Numquartets)/2; - } else { /* odd number */ - Dtypelens[0] = (Numquartets + 1)/2; - } - - MPI_Address(&(quartetinfo[0]), Dtypeaddr); - MPI_Type_struct(1, Dtypelens, Dtypeaddr, Dtypes, &PP_AllQuarts); - MPI_Type_commit(&PP_AllQuarts); - - for (dest=1; dest (%2d) ... Sent xxAllQuart(0,%d)=%d,%d (%luq -> %db)\n", - PP_Myid, dest, Dtypelens[0]-1, quartetinfo[0], quartetinfo[Dtypelens[0]-1], - Numquartets, Dtypelens[0]-1); -# endif /* PVERBOSE3 */ - } /* for each slave */ - - MPI_Type_free(&PP_AllQuarts); - - -} /* PP_SendAllQuarts */ - - - -/******************/ - -void PP_RecvAllQuarts(int taxa, - unsigned long *Numquartets, - unsigned char *quartetinfo) -{ - MPI_Datatype Dtypes[1] = {MPI_UNSIGNED_CHAR}; - int Dtypelens[1]; - MPI_Aint Dtypeaddr[1]; - MPI_Datatype PP_AllQuarts; - MPI_Status stat; - int error; - -# ifdef PVERBOSE3 - FPRINTF(STDOUTFILE "(%2d) Receiving AllQuarts ...\n", PP_Myid); -# endif /* PVERBOSE3 */ - - /* compute number of quartets */ - *Numquartets = (uli) taxa*(taxa-1)*(taxa-2)*(taxa-3)/24; - if (*Numquartets % 2 == 0) { /* even number */ - Dtypelens[0] = (*Numquartets)/2; - } else { /* odd number */ - Dtypelens[0] = (*Numquartets + 1)/2; - } - - MPI_Address(&(quartetinfo[0]), Dtypeaddr); - MPI_Type_struct(1, Dtypelens, Dtypeaddr, Dtypes, &PP_AllQuarts); - MPI_Type_commit(&PP_AllQuarts); - - error = MPI_Recv(MPI_BOTTOM, 1, PP_AllQuarts, PP_MyMaster, PP_ALLQUARTS, PP_Comm, &stat); - if (error != MPI_SUCCESS) - PP_Printerror(STDOUT, 1300+PP_Myid, error); - - MPI_Type_free(&PP_AllQuarts); - -# ifdef PVERBOSE2 - FPRINTF(STDOUTFILE "(%2d) <- (%2d) ... Recv xxAllQuart(0,%d)=%d,%d (%luq -> %db)\n", - PP_Myid, PP_MyMaster, Dtypelens[0]-1, quartetinfo[0], quartetinfo[Dtypelens[0]-1], - *Numquartets, Dtypelens[0]-1); -# endif /* PVERBOSE2 */ - -} /* PP_RecvAllQuarts */ - - - -/********************************************************************* -* procedures to send request for a single puzzle tree * -*********************************************************************/ - -void PP_SendPermut(int dest, - int taxa, - ivector permut) -{ - MPI_Datatype Dtypes[1] = {MPI_INT}; - int Dtypelens[1]; - MPI_Aint Dtypeaddr[1]; - MPI_Datatype PP_Permut; - int error; - - PP_permutsent++; - PP_permutsentn++; - Dtypelens[0] = taxa; - - MPI_Address(&(permut[0]), Dtypeaddr); - MPI_Type_struct(1, Dtypelens, Dtypeaddr, Dtypes, &PP_Permut); - MPI_Type_commit(&PP_Permut); - -# ifdef PVERBOSE2 - FPRINTF(STDOUTFILE "(%2d) Sending -> (%2d): PP_Permut(0)=%d\n", PP_Myid, dest, permut[0]); -# endif /* PVERBOSE2 */ - - error = MPI_Ssend(MPI_BOTTOM, 1, PP_Permut, dest, PP_DOPUZZLE, PP_Comm); - if (error != MPI_SUCCESS) - PP_Printerror(STDOUT, 1500+PP_Myid, error); - - MPI_Type_free(&PP_Permut); - -# ifdef PVERBOSE3 - FPRINTF(STDOUTFILE "(%2d) ... Sent PP_Permut\n", PP_Myid); -# endif /* PVERBOSE3 */ - -} /* PP_SendPermut */ - -/******************/ - -void PP_RecvPermut(int taxa, - ivector permut) -{ - MPI_Datatype Dtypes[1] = {MPI_INT}; - int Dtypelens[1]; - MPI_Aint Dtypeaddr[1]; - MPI_Datatype PP_Permut; - MPI_Status stat; - int error; - - PP_permutrecved++; - PP_permutrecvedn++; -# ifdef PVERBOSE3 - FPRINTF(STDOUTFILE "(%2d) Receiving: PP_Permut\n", PP_Myid); -# endif /* PVERBOSE3 */ - - Dtypelens[0] = taxa; - - MPI_Address(&(permut[0]), Dtypeaddr); - MPI_Type_struct(1, Dtypelens, Dtypeaddr, Dtypes, &PP_Permut); - MPI_Type_commit(&PP_Permut); - - error = MPI_Recv(MPI_BOTTOM, 1, PP_Permut, PP_MyMaster, PP_DOPUZZLE, PP_Comm, &stat); - if (error != MPI_SUCCESS) - PP_Printerror(STDOUT, 1700+PP_Myid, error); - - MPI_Type_free(&PP_Permut); - -# ifdef PVERBOSE2 - FPRINTF(STDOUTFILE "(%2d) Received: PP_Permut(0)=%d\n", PP_Myid, permut[0]); -# endif /* PVERBOSE2 */ - -} /* PP_RecvPermut */ - -/********************************************************************* -* procedures to send the splits of a puzzle tree to the master * -*********************************************************************/ - -void PP_SendSplitsBlock(int taxa, - uli blocksize, - cmatrix *biparts, - int pstnum, - treelistitemtype *pstlist) -{ - MPI_Datatype *Dtypes; - int *Dtypelens; - MPI_Aint *Dtypeaddr; - MPI_Datatype PP_Biparts; - int error; - int n; - int ints[3]; - int *pstnumarr; - treelistitemtype *pstptr; - - PP_splitsent+=blocksize; - PP_splitsentn++; - - ints[0] = taxa; - ints[1] = (int) blocksize; - ints[2] = pstnum; - error = MPI_Ssend(ints, 3, MPI_INT, PP_MyMaster, PP_PUZZLEBLOCKSPECS, PP_Comm); - if (error != MPI_SUCCESS) - PP_Printerror(STDOUT, 1800+PP_Myid, error); - - Dtypes = malloc((blocksize + pstnum + 1) * sizeof(MPI_Datatype)); - Dtypelens = malloc((blocksize + pstnum + 1) * sizeof(int)); - Dtypeaddr = malloc((blocksize + pstnum + 1) * sizeof(MPI_Aint)); - pstnumarr = malloc(pstnum * sizeof(int)); - -# ifdef PVERBOSE2 - FPRINTF(STDOUTFILE "(%2d) Sending: PP_bipartsblock(0..%lu,0,0)8=\"%c\"\n", PP_Myid, blocksize, biparts[0][0][0]); -# endif /* PVERBOSE2 */ - - for (n=0; n<(int)blocksize; n++) { - Dtypes[n] = MPI_CHAR; - Dtypelens[n] = (taxa - 3) * taxa; - MPI_Address(&(biparts[n][0][0]), &(Dtypeaddr[n])); - } - pstptr = pstlist; - for (n=0; n%d: [%d/%d] #=%d \"%s\"\n", - PP_Myid, PP_MyMaster, n, pstnum, pstnumarr[n], (*pstptr).tree); -# endif /* PVERBOSE3 */ - pstptr = (*pstptr).succ; - } - Dtypes[((int)blocksize + pstnum)] = MPI_INT; - Dtypelens[((int)blocksize + pstnum)] = pstnum; - MPI_Address(&(pstnumarr[0]), &(Dtypeaddr[((int)blocksize + pstnum)])); - - MPI_Type_struct(((int)blocksize + pstnum + 1), Dtypelens, Dtypeaddr, Dtypes, &PP_Biparts); - MPI_Type_commit(&PP_Biparts); - - error = MPI_Ssend(MPI_BOTTOM, 1, PP_Biparts, PP_MyMaster, PP_PUZZLEBLOCK, PP_Comm); - if (error != MPI_SUCCESS) - PP_Printerror(STDOUT, 1800+PP_Myid, error); - - MPI_Type_free(&PP_Biparts); - free(Dtypes); - free(Dtypelens); - free(Dtypeaddr); - free(pstnumarr); - -# ifdef PVERBOSE3 - FPRINTF(STDOUTFILE "(%2d) ... Sent PP_bipartsblock\n", PP_Myid); -# endif /* PVERBOSE3 */ - -} /* PP_SendSplitsBlock */ - -/******************/ - -void PP_RecvSplitsBlock(int *taxa, - uli *blocksize, - cmatrix **bip, - treelistitemtype **pstlist, - int *pstnum, - int *pstsum) -/* bp -> (*bip) */ -{ - MPI_Datatype *Dtypes; - int *Dtypelens; - MPI_Aint *Dtypeaddr; - MPI_Datatype PP_Biparts; - MPI_Status stat; - int error; - int n; - int dest; - int ints[3]; - int pstlistnum; - int tmpnum; - int tmpsum; - int *pstnumarr; - char **pstarr; - treelistitemtype *treeitem; - - error = MPI_Recv(ints, 3, MPI_INT, MPI_ANY_SOURCE, PP_PUZZLEBLOCKSPECS, PP_Comm, &stat); - if (error != MPI_SUCCESS) - PP_Printerror(STDOUT, 1900+PP_Myid, error); - - dest = stat.MPI_SOURCE; - *taxa = ints[0]; - *blocksize = (uli) ints[1]; - pstlistnum = ints[2]; - -# ifdef PVERBOSE2 - FPRINTF(STDOUTFILE "(%2d) Received<-%d: PP_bipartsblockspec(t=%d,b=%ld,p=%d)\n", PP_Myid, dest, *taxa, *blocksize, pstlistnum); -# endif /* PVERBOSE2 */ - - PP_splitrecved += *blocksize; - PP_splitrecvedn++; - - Dtypes = malloc((*blocksize + pstlistnum + 1) * sizeof(MPI_Datatype)); - Dtypelens = malloc((*blocksize + pstlistnum + 1) * sizeof(int)); - Dtypeaddr = malloc((*blocksize + pstlistnum + 1) * sizeof(MPI_Aint)); - (*bip) = (cmatrix *) malloc(*blocksize * sizeof(void *)); - pstnumarr = (int *) malloc(pstlistnum * sizeof(int)); - pstarr = (char **) malloc(pstlistnum * sizeof(char *)); - -/* pstarr[0] = (char *) malloc(psteptreestrlen * pstlistnum * sizeof(char)); - for(n=1; n 0) { - if (PP_emptyslave()) { - PP_RecvSplitsBlock(&tx, &bs, &bp, &psteptreelist, &psteptreenum, &psteptreesum); - for (bipnum=0; bipnum 0) { - PP_RecvSplitsBlock(&tx, &bs, &bp, &psteptreelist, &psteptreenum, &psteptreesum); - for (bipnum=0; bipnum%4ld (%dx%ld)\n", PP_Myid, qstart, qend, PP_NumProcs-1, qtodo); -# endif - - addtimes(GENERAL, &tarr); - for (i = 3; i < Maxspc; i++) - for (c = 2; c < i; c++) - for (b = 1; b < c; b++) - for (a = 0; a < b; a++) { - - idx = (uli) a + - (uli) b*(b-1)/2 + - (uli) c*(c-1)*(c-2)/6 + - (uli) i*(i-1)*(i-2)*(i-3)/24; - if ((idx >= qstart) && (idx <= qend)) { -# ifdef PVERBOSE4 - FPRINTF(STDOUTFILE "(%2d) %4ld <---> (%d,%d,%d,%d)\n",PP_Myid, idx, a,b,c,i); -# endif - compute_quartlklhds(a,b,c,i,&d1,&d2,&d3,approx); - PP_do_write_quart(a,b,c,i,d1,d2,d3,&nofbq,bqarr); - addtimes(QUARTETS, &tarr); - } /* if idx */ - } /* for for for for */ - PP_SendQuartBlock(qstart, qtodo, quartetinfo, nofbq, bqarr, approx); - - free(bqarr); bqarr=NULL; - - break; - } - - case PP_DOPUZZLEBLOCK: { - if (PP_AllQuartsReceived){ - uli Numtrial, ptodo; - cmatrix *bp; - int n; - - PP_RecvDoPermutBlock(&Numtrial); - ptodo = Numtrial; - - bp = (cmatrix *) malloc(Numtrial * sizeof(void *)); - for(n=0; nS */ -# define PP_SIZES 1 /* Array sizes needed M->S */ -# define PP_DATA 2 /* Data Arrays M->S */ - -# define PP_ALLQUARTS 3 /* All Quartets M->S */ - -# define PP_DOQUART 4 /* do 4Specs M->S */ -# define PP_DOQUARTX2 5 /* do 4Specs + X^2 M->S */ -# define PP_QUART 6 /* quartet back S->M */ -# define PP_QUARTX2 7 /* quartet + X^2 back S->M */ - -# define PP_DOQUARTBLOCKSPECS 8 /* do block Specs M->S */ -# define PP_DOQUARTBLOCK 9 /* do block of Quarts M->S */ -# define PP_QUARTBLOCKSPECS 10 /* block Specs S->M */ -# define PP_QUARTBLOCK 11 /* block of Quarts S->M */ - -# define PP_DOPUZZLE 12 /* do Puzzling step M->S */ -# define PP_PUZZLE 13 /* Puzzling tree back S->M */ -# define PP_DOPUZZLEBLOCK 14 /* do Puzzling block M->S */ -# define PP_DOPUZZLEBLOCKSPECS 15 /* do Puzzling block M->S */ -# define PP_PUZZLEBLOCK 16 /* Puzzling block S->M */ -# define PP_PUZZLEBLOCKSPECS 17 /* Puzzling block S->M */ - -# define PP_STATS 18 /* Slave Statistics S->M */ - -# define PP_WAIT 18 /* waiting for work S->M */ -# define PP_TEST 100 /* testing */ - -# define PERMUTQUEUESIZE 100 -# define QUARTQUEUESIZE 100 - - extern int PP_IamMaster; - extern int PP_IamSlave; - extern int PP_Myid; - extern int PP_MyMaster; - extern int PP_NumProcs; - extern MPI_Comm PP_Comm; -#endif /* PARALLEL */ - -extern int *permutsent, - *permutrecved, - *quartsent, - *quartrecved, - *doquartsent, - *doquartrecved, - *splitsent, - *splitrecved, - *permutsentn, - *permutrecvedn, - *quartsentn, - *quartrecvedn, - *doquartsentn, - *doquartrecvedn, - *splitsentn, - *splitrecvedn; -extern double *walltimes, - *cputimes; -extern double *fullwalltimes, - *fullcputimes; -extern double *altwalltimes, - *altcputimes; - -extern int PP_permutsent, - PP_permutrecved, - PP_quartsent, - PP_quartrecved, - PP_doquartsent, - PP_doquartrecved, - PP_splitsent, - PP_splitrecved, - PP_permutsentn, - PP_permutrecvedn, - PP_quartsentn, - PP_quartrecvedn, - PP_doquartsentn, - PP_doquartrecvedn, - PP_splitsentn, - PP_splitrecvedn; - -extern double PP_starttime, - PP_stoptime, - PP_inittime, - PP_paramcomptime, - PP_paramsendtime, - PP_quartcomptime, - PP_quartsendtime, - PP_puzzletime, - PP_treetime; - -void num2quart(uli qnum, int *a, int *b, int *c, int *d); -uli numquarts(int maxspc); -uli quart2num (int a, int b, int c, int d); - -int slave_main(int argc, char *argv[]); -void PP_Init(int *argc, char **argv[]); -void PP_Finalize(); -void PP_Printerror(FILE *of, int id, int err); -void PP_do_puzzling(ivector trueID); - -void PP_RecvDoQuart(int *a, - int *b, - int *c, - int *d, - int *approx); -void PP_SendDoQuart(int dest, - int a, - int b, - int c, - int d, - int approx); -void PP_RecvQuart(int *a, - int *b, - int *c, - int *d, - double *d1, - double *d2, - double *d3, - int *approx); -void PP_SendQuart(int a, - int b, - int c, - int d, - double d1, - double d2, - double d3, - int approx); -void PP_SendSizes(int mspc, - int msite, - int ncats, - int nptrn, - int rad, - int outgr, - double frconst, - int rseed); -void PP_RecvSizes(int *mspc, - int *msite, - int *ncats, - int *nptrn, - int *rad, - int *outgr, - double *frconst, - int *rseed); -void PP_RecvData( - cmatrix Seqpat, /* cmatrix (Maxspc x Numptrn) */ - ivector Alias, /* ivector (Maxsite) */ - ivector Weight, /* ivector (Numptrn) */ - ivector constpat, - dvector Rates, /* dvector (numcats) */ - dvector Eval, /* dvector (tpmradix) */ - dvector Freqtpm, - dmatrix Evec, /* dmatrix (tpmradix x tpmradix) */ - dmatrix Ievc, - dmatrix iexp, - dmatrix Distanmat, /* dmatrix (Maxspc x Maxspc) */ - dcube ltprobr); /* dcube (numcats x tpmradix x tpmradix) */ -void PP_SendData( - cmatrix Seqpat, /* cmatrix (Maxspc x Numptrn) */ - ivector Alias, /* ivector (Maxsite) */ - ivector Weight, /* ivector (Numptrn) */ - ivector constpat, - dvector Rates, /* dvector (numcats) */ - dvector Eval, /* dvector (tpmradix) */ - dvector Freqtpm, - dmatrix Evec, /* dmatrix (tpmradix x tpmradix) */ - dmatrix Ievc, - dmatrix iexp, - dmatrix Distanmat, /* dmatrix (Maxspc x Maxspc) */ - dcube ltprobr); /* dcube (numcats x tpmradix x tpmradix) */ -void PP_SendAllQuarts(unsigned long Numquartets, - unsigned char *quartetinfo); -void PP_RecvAllQuarts(int taxa, - unsigned long *Numquartets, - unsigned char *quartetinfo); - -void PP_SendDoQuartBlock(int dest, uli firstq, uli amount, int approx); -void PP_RecvDoQuartBlock(uli *firstq, uli *amount, uli **bq, int *approx); -void PP_SendQuartBlock(uli startq, - uli numofq, - unsigned char *quartetinfo, - uli numofbq, - uli *bq, - int approx); -void PP_RecvQuartBlock(int slave, - uli *startq, - uli *numofq, - unsigned char *quartetinfo, - int *approx); - -void PP_SendPermut(int dest, - int taxa, - ivector permut); -void PP_RecvPermut(int taxa, - ivector permut); -void PP_SendDoPermutBlock(uli puzzlings); -void PP_RecvDoPermutBlock(uli *taxa); - -void PP_SendSplits(int taxa, - cmatrix biparts); -void PP_RecvSplits(int taxa, - cmatrix biparts); -void PP_SendDone(); -void PP_RecvDone(); - -int PP_emptyslave(); -void PP_putslave(int sl); -int PP_getslave(); - -void PP_cmpd(int rank, double a, double b); -void PP_cmpi(int rank, int a, int b); - -#endif /* _PPUZZLE_ */ diff --git a/forester/archive/RIO/others/puzzle_mod/src/puzzle.h b/forester/archive/RIO/others/puzzle_mod/src/puzzle.h deleted file mode 100644 index 8165b1a..0000000 --- a/forester/archive/RIO/others/puzzle_mod/src/puzzle.h +++ /dev/null @@ -1,493 +0,0 @@ -/* - * puzzle.h - * - * - * Part of TREE-PUZZLE 5.0 (June 2000) - * - * (c) 1999-2000 by Heiko A. Schmidt, Korbinian Strimmer, - * M. Vingron, and Arndt von Haeseler - * (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler - * - * All parts of the source except where indicated are distributed under - * the GNU public licence. See http://www.opensource.org for details. - */ - - -#ifndef _PUZZLE_ -#define _PUZZLE_ - -#ifndef PACKAGE -# define PACKAGE "tree-puzzle" -#endif -#ifndef VERSION -# define VERSION "5.0" -#endif -#define DATE "October 2000" - -/* prototypes */ -#include -#include -#include -#include -#include -#include -#include -#include -#include "util.h" -#include "ml.h" -#ifdef PARALLEL -# include "ppuzzle.h" -#endif - -#define STDOUT stdout -#ifndef PARALLEL /* because printf() runs significantly faster */ - /* than fprintf(stdout) on an Apple McIntosh */ - /* (HS) */ -# define FPRINTF printf -# define STDOUTFILE -#else -# define FPRINTF fprintf -# define STDOUTFILE STDOUT, -#endif - -/* filenames */ -# define FILENAMELENTH 2048 - - -# define INFILEDEFAULT "infile" -# define OUTFILEDEFAULT "outfile" -# define TREEFILEDEFAULT "outtree" -# define INTREEDEFAULT "intree" -# define DISTANCESDEFAULT "outdist" -# define TRIANGLEDEFAULT "outlm.eps" -# define UNRESOLVEDDEFAULT "outqlist" -# define ALLQUARTDEFAULT "outallquart" -# define ALLQUARTLHDEFAULT "outallquartlh" -# define OUTPTLISTDEFAULT "outpstep" -# define OUTPTORDERDEFAULT "outptorder" - -# define INFILE infilename -# define OUTFILE outfilename -# define TREEFILE outtreename -# define INTREE intreename -# define DISTANCES outdistname -# define TRIANGLE outlmname -# define UNRESOLVED outqlistname -# define ALLQUART outallquartname -# define ALLQUARTLH outallquartlhname -# define OUTPTLIST outpstepname -# define OUTPTORDER outptordername - -EXTERN char infilename [FILENAMELENTH]; -EXTERN char outfilename [FILENAMELENTH]; -EXTERN char outtreename [FILENAMELENTH]; -EXTERN char intreename [FILENAMELENTH]; -EXTERN char outdistname [FILENAMELENTH]; -EXTERN char outlmname [FILENAMELENTH]; -EXTERN char outqlistname [FILENAMELENTH]; -EXTERN char outallquartname [FILENAMELENTH]; -EXTERN char outallquartlhname [FILENAMELENTH]; -EXTERN char outpstepname [FILENAMELENTH]; -EXTERN char outptordername [FILENAMELENTH]; - -#define OUTFILEEXT "puzzle" -#define TREEFILEEXT "tree" -#define DISTANCESEXT "dist" -#define TRIANGLEEXT "eps" -#define UNRESOLVEDEXT "qlist" -#define ALLQUARTEXT "allquart" -#define ALLQUARTLHEXT "allquartlh" -#define OUTPTLISTEXT "pstep" -#define OUTPTORDEREXT "ptorder" - -#ifndef PARALLEL /* because printf() runs significantly faster */ - /* than fprintf(stdout) on an Apple McIntosh */ - /* (HS) */ -# define FPRINTF printf -# define STDOUTFILE -#else -# define FPRINTF fprintf -# define STDOUT stdout -# define STDOUTFILE STDOUT, -#endif - - -/* auto_aamodel/auto_datatype values (xxx) */ -#define AUTO_OFF 0 -#define AUTO_GUESS 1 -#define AUTO_DEFAULT 2 - - -/* qptlist values (xxx) */ -#define PSTOUT_NONE 0 -#define PSTOUT_ORDER 1 -#define PSTOUT_LISTORDER 2 -#define PSTOUT_LIST 3 - -/* dtat_optn values (xxx) */ -#define NUCLEOTIDE 0 -#define AMINOACID 1 -#define BINARY 2 - -/* typ_optn values (xxx) */ -#define LIKMAPING_OPTN 1 -#define TREERECON_OPTN 0 - -/* puzzlemodes (xxx) */ -#define QUARTPUZ 0 -#define USERTREE 1 -#define PAIRDIST 2 - -/* rhetmodes (xxx) Modes of rate heterogeneity */ -#define UNIFORMRATE 0 -#define GAMMARATE 1 -#define TWORATE 2 -#define MIXEDRATE 3 - -/* defines for types of quartet likelihood computation (xxx) */ -#define EXACT 0 -#define APPROX 1 - -/* tree structure */ -typedef struct oneedge { - /* pointer to other three edges */ - struct oneedge *up; - struct oneedge *downleft; - struct oneedge *downright; - int numedge; /* number of edge */ - uli edgeinfo; /* value of this edge */ - int *edgemap; /* pointer to the local edgemap */ -} ONEEDGE; - - -/* variables */ -EXTERN cmatrix biparts; /* bipartitions of tree of current puzzling step */ -EXTERN cmatrix consbiparts; /* bipartitions of majority rule consensus tree */ -EXTERN cmatrix seqchars; /* characters contained in data set */ -EXTERN cmatrix treepict; /* picture of consensus tree */ -EXTERN double minscore; /* value of edgescore on minedge */ -EXTERN double tstvf84; /* F84 transition/transversion ratio */ -EXTERN double tstvratio; /* expected transition/transversion ratio */ -EXTERN double yrtsratio; /* expected pyrimidine/purine transition ratio */ -EXTERN dvector ulkl; /* log L of user trees */ -EXTERN dmatrix allsites; /* log L per sites of user trees */ -EXTERN dvector ulklc; /* log L of user trees (clock) */ -EXTERN dmatrix allsitesc; /* log L per sites of user trees (clock) */ -EXTERN FILE *utfp; /* pointer to user tree file */ -EXTERN FILE *ofp; /* pointer to output file */ -EXTERN FILE *seqfp; /* pointer to sequence input file */ -EXTERN FILE *tfp; /* pointer to tree file */ -EXTERN FILE *dfp; /* pointer to distance file */ -EXTERN FILE *trifp; /* pointer to triangle file */ -EXTERN FILE *unresfp; /* pointer to file with unresolved quartets */ -EXTERN FILE *tmpfp; /* pointer to temporary file */ -EXTERN FILE *qptlist; /* pointer to file with puzzling step trees */ -EXTERN FILE *qptorder; /* pointer to file with unique puzzling step trees */ -EXTERN int SHcodon; /* whether SH should be applied to 1st, 2nd codon positions */ -EXTERN int utree_optn; /* use first user tree for estimation */ -EXTERN int listqptrees; /* list puzzling step trees */ -EXTERN int approxqp; /* approximate QP quartets */ -EXTERN int *edgeofleaf; /* vector with edge number of all leaves */ -EXTERN int codon_optn; /* declares what positions in a codon should be used */ -EXTERN int compclock; /* computation of clocklike branch lengths */ -EXTERN int chooseA; /* leaf variable */ -EXTERN int chooseB; /* leaf variable */ -EXTERN int clustA, clustB, clustC, clustD; /* number of members of LM clusters */ -EXTERN int column; /* used for breaking lines (writing tree to treefile) */ -EXTERN int Frequ_optn; /* use empirical base frequencies */ -EXTERN int Maxbrnch; /* 2*Maxspc - 3 */ -EXTERN int Maxseqc; /* number of sequence characters per taxum */ -EXTERN int mflag; /* flag used for correct printing of runtime messages */ -EXTERN int minedge; /* edge with minimum edgeinfo */ -EXTERN int nextedge; /* number of edges in the current tree */ -EXTERN int nextleaf; /* next leaf to add to tree */ -EXTERN int numclust; /* number of clusters in LM analysis */ -EXTERN int outgroup; /* outgroup */ -EXTERN int puzzlemode; /* computation of QP tree and/or ML distances */ -EXTERN int rootsearch; /* how location of root is found */ -EXTERN int rhetmode; /* model of rate heterogeneity */ -EXTERN int splitlength; /* length of one entry in splitpatterns */ -EXTERN int *splitsizes; /* size of all different splits of all trees */ -EXTERN int usebestq_optn; /* use only best quartet topology, no bayesian weights */ -EXTERN int show_optn; /* show unresolved quartets */ -EXTERN int savequart_optn; /* save memory block which quartets to file */ -EXTERN int savequartlh_optn; /* save quartet likelihoods to file */ -EXTERN int saveqlhbin_optn; /* save quartet likelihoods binary */ -EXTERN int readquart_optn; /* read memory block which quartets from file */ -EXTERN int sym_optn; /* symmetrize doublet frequencies */ -EXTERN int xsize; /* depth of consensus tree picture */ -EXTERN int ytaxcounter; /* counter for establishing y-coordinates of all taxa */ -EXTERN int numutrees; /* number of users trees in input tree file */ -EXTERN ivector clusterA, clusterB, clusterC, clusterD; /* clusters for LM analysis */ -EXTERN ivector consconfid; /* confidence values of majority rule consensus tree */ -EXTERN ivector conssizes; /* partition sizes of majority rule consensus tree */ -EXTERN ivector trueID; /* leaf -> taxon on this leaf */ -EXTERN ivector xcor; /* x-coordinates of consensus tree nodes */ -EXTERN ivector ycor; /* y-coordinates of consensus tree nodes */ -EXTERN ivector ycormax; /* maximal y-coordinates of consensus tree nodes */ -EXTERN ivector ycormin; /* minimal y-coordinates of consensus tree nodes */ -EXTERN ivector ycortax; /* y-coordinates of all taxa */ -EXTERN ONEEDGE *edge; /* vector with all the edges of the tree */ -EXTERN uli *splitcomp; /* bipartition storage */ -EXTERN uli *splitfreqs; /* frequencies of all different splits of all trees */ -EXTERN uli *splitpatterns; /* all different splits of all trees */ -EXTERN uli badqs; /* number of bad quartets */ -EXTERN uli consincluded; /* number of included biparts in the consensus tree */ -EXTERN uli Currtrial; /* counter for puzzling steps */ -EXTERN uli maxbiparts; /* space is reserved for that many bipartitions */ -EXTERN uli mininfo; /* value of edgeinfo on minedge */ -EXTERN uli numbiparts; /* number of different bipartitions */ -EXTERN uli Numquartets; /* number of quartets */ -EXTERN uli Numtrial; /* number of puzzling steps */ -EXTERN uli lmqts; /* quartets investigated in LM analysis (0 = ALL) */ - -EXTERN int auto_datatype; /* guess datatype ? */ -EXTERN int guessdata_optn; /* guessed datatype */ - -EXTERN int auto_aamodel; /* guess amino acid modell ? */ -EXTERN int guessauto_aamodel; /* guessed amino acid modell ? */ -EXTERN int guessDayhf_optn; /* guessed Dayhoff model option */ -EXTERN int guessJtt_optn; /* guessed JTT model option */ -EXTERN int guessblosum62_optn; /* guessed BLOSUM 62 model option */ -EXTERN int guessmtrev_optn; /* guessed mtREV model option */ -EXTERN int guesscprev_optn; /* guessed cpREV model option */ -EXTERN int guessvtmv_optn; /* guessed VT model option */ -EXTERN int guesswag_optn; /* guessed WAG model option */ - -/* counter variables needed in likelihood mapping analysis */ -EXTERN uli ar1, ar2, ar3; -EXTERN uli reg1, reg2, reg3, reg4, reg5, reg6, reg7; -EXTERN uli reg1l, reg1r, reg2u, reg2d, reg3u, reg3d, - reg4u, reg4d, reg5l, reg5r, reg6u, reg6d; -EXTERN unsigned char *quartetinfo; /* place where quartets are stored */ -EXTERN dvector qweight; /* for use in QP and LM analysis */ -EXTERN dvector sqdiff; -EXTERN ivector qworder; -EXTERN ivector sqorder; - -EXTERN int randseed; -EXTERN int psteptreestrlen; - -typedef struct treelistitemtypedummy { - struct treelistitemtypedummy *pred; - struct treelistitemtypedummy *succ; - struct treelistitemtypedummy *sortnext; - struct treelistitemtypedummy *sortlast; - char *tree; - int count; - int id; - int idx; -} treelistitemtype; - -EXTERN treelistitemtype *psteptreelist; -EXTERN treelistitemtype *psteptreesortlist; -EXTERN int psteptreenum; -EXTERN int psteptreesum; - - -/* prototypes */ -void makeF84model(void); -void compnumqts(void); -void setoptions(void); -void openfiletoread(FILE **, char[], char[]); -void openfiletowrite(FILE **, char[], char[]); -void openfiletoappend(FILE **, char[], char[]); -void closefile(FILE *); -void symdoublets(void); -void computeexpectations(void); -void putdistance(FILE *); -void findidenticals(FILE *); -double averagedist(void); -void initps(FILE *); -void plotlmpoint(FILE *, double, double); -void finishps(FILE *); -void makelmpoint(FILE *, double, double, double); -void printtreestats(FILE *); -void timestamp(FILE *); -void writeoutputfile(FILE *, int); - -/* definitions for writing output */ -#define WRITEALL 0 -#define WRITEPARAMS 1 -#define WRITEREST 2 - -void writetimesstat(FILE *ofp); -void writecutree(FILE *, int); -void starttimer(void); -void checktimer(uli); -void estimateparametersnotree(void); -void estimateparameterstree(void); -int main(int, char *[]); -int ulicmp(const void *, const void *); -int intcmp(const void *, const void *); - -void readid(FILE *, int); -char readnextcharacter(FILE *, int, int); -void skiprestofline(FILE *, int, int); -void skipcntrl(FILE *, int, int); -void getseqs(FILE *); -void initid(int); -void fputid10(FILE *, int); -int fputid(FILE *, int); -void getsizesites(FILE *); -void getdataset(FILE *); -int guessdatatype(void); -void translatedataset(void); -void estimatebasefreqs(void); -void guessmodel(void); -void inittree(void); -void addnextleaf(int); -void freetree(void); -void writeOTU(FILE *, int); -void writetree(FILE *); -int *initctree(); -void copytree(int *ctree); -void freectree(int **snodes); -void printctree(int *ctree); -char *sprintfctree(int *ctree, int strlen); -void fprintffullpstree(FILE *outf, char *treestr); -int printfsortctree(int *ctree); -int sortctree(int *ctree); -int ct_1stedge(int node); -int ct_2ndedge(int node); -int ct_3rdedge(int node); - -void printfpstrees(treelistitemtype *list); -void printfsortedpstrees(treelistitemtype *list); -void fprintfsortedpstrees(FILE *output, treelistitemtype *list, int itemnum, int itemsum, int comment, float cutoff); - -void sortbynum(treelistitemtype *list, treelistitemtype **sortlist); -treelistitemtype *addtree2list(char **tree, - int numtrees, - treelistitemtype **list, - int *numitems, - int *numsum); -void freetreelist(treelistitemtype **list, - int *numitems, - int *numsum); -void resetedgeinfo(void); -void incrementedgeinfo(int, int); -void minimumedgeinfo(void); -void initconsensus(void); -void makepart(int, int); -void computebiparts(void); -void printsplit(FILE *, uli); -void makenewsplitentries(void); -void copysplit(uli, int); -void makeconsensus(void); -void writenode(FILE *, int); -void writeconsensustree(FILE *); -void nodecoordinates(int); -void drawnode(int, int); -void plotconsensustree(FILE *); -unsigned char *mallocquartets(int); -void freequartets(void); -unsigned char readquartet(int, int, int, int); -void writequartet(int, int, int, int, unsigned char); -void sort3doubles(dvector, ivector); -void computeallquartets(void); -void checkquartet(int, int, int, int); -void num2quart(uli qnum, int *a, int *b, int *c, int *d); -uli numquarts(int maxspc); -uli quart2num (int a, int b, int c, int d); - -void writetpqfheader(int nspec, FILE *ofp, int flag); - - -/* extracted from main (xxx) */ -void compute_quartlklhds(int a, int b, int c, int d, double *d1, double *d2, double *d3, int approx); - - -/* definitions for timing */ - -#define OVERALL 0 -#define GENERAL 1 -#define OPTIONS 2 -#define PARAMEST 3 -#define QUARTETS 4 -#define PUZZLING 5 -#define TREEEVAL 6 - -typedef struct { - int currentjob; - clock_t tempcpu; - clock_t tempfullcpu; - clock_t tempcpustart; - time_t temptime; - time_t tempfulltime; - time_t temptimestart; - - clock_t maxcpu; - clock_t mincpu; - time_t maxtime; - time_t mintime; - - double maxcpublock; - double mincpublock; - double mincputick; - double mincputicktime; - double maxtimeblock; - double mintimeblock; - - double generalcpu; - double optionscpu; - double paramestcpu; - double quartcpu; - double quartblockcpu; - double quartmaxcpu; - double quartmincpu; - double puzzcpu; - double puzzblockcpu; - double puzzmaxcpu; - double puzzmincpu; - double treecpu; - double treeblockcpu; - double treemaxcpu; - double treemincpu; - double cpu; - double fullcpu; - - double generaltime; - double optionstime; - double paramesttime; - double quarttime; - double quartblocktime; - double quartmaxtime; - double quartmintime; - double puzztime; - double puzzblocktime; - double puzzmaxtime; - double puzzmintime; - double treetime; - double treeblocktime; - double treemaxtime; - double treemintime; - double time; - double fulltime; -} timearray_t; - -EXTERN double cputime, walltime; -EXTERN double fullcpu, fulltime; -EXTERN double fullcputime, fullwalltime; -EXTERN double altcputime, altwalltime; -EXTERN clock_t cputimestart, cputimestop, cputimedummy; -EXTERN time_t walltimestart, walltimestop, walltimedummy; -EXTERN clock_t Startcpu; /* start cpu time */ -EXTERN clock_t Stopcpu; /* stop cpu time */ -EXTERN time_t Starttime; /* start time */ -EXTERN time_t Stoptime; /* stop time */ -EXTERN time_t time0; /* timer variable */ -EXTERN time_t time1; /* yet another timer */ -EXTERN time_t time2; /* yet another timer */ -EXTERN timearray_t tarr; - -void resetqblocktime(timearray_t *ta); -void resetpblocktime(timearray_t *ta); -void inittimearr(timearray_t *ta); -void addtimes(int jobtype, timearray_t *ta); -#ifdef TIMEDEBUG - void printtimearr(timearray_t *ta); -#endif /* TIMEDEBUG */ - -#endif /* _PUZZLE_ */ - diff --git a/forester/archive/RIO/others/puzzle_mod/src/puzzle1.c b/forester/archive/RIO/others/puzzle_mod/src/puzzle1.c deleted file mode 100644 index 9a4d790..0000000 --- a/forester/archive/RIO/others/puzzle_mod/src/puzzle1.c +++ /dev/null @@ -1,4527 +0,0 @@ -/* - * puzzle1.c - * - * - * Part of TREE-PUZZLE 5.0 (June 2000) - * - * (c) 1999-2000 by Heiko A. Schmidt, Korbinian Strimmer, - * M. Vingron, and Arndt von Haeseler - * (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler - * - * All parts of the source except where indicated are distributed under - * the GNU public licence. See http://www.opensource.org for details. - */ - - -/* Modified by Christian Zmasek to: - - name and pairwise dist. output as one line per seq. - - removed some unnecessary -- for my puposes -- output. - - - !WARNING: Use ONLY together with FORESTER/RIO! - !For all other puposes download the excellent original! - - last modification: 05/19/01 - - - - void putdistance(FILE *fp): - - removed: "if ((j + 1) % 7 == 0 && j+1 != Maxspc) - fprintf(fp, "\n ");" - - - - - int main(int argc, char *argv[]): - - removed: - "FPRINTF(STDOUTFILE "Writing parameters to file %s\n", OUTFILE); - openfiletowrite(&ofp, OUTFILE, "general output"); - writeoutputfile(ofp,WRITEPARAMS); - fclose(ofp);" - - "openfiletoappend(&ofp, OUTFILE, "general output"); - writeoutputfile(ofp,WRITEREST);" - - "openfiletoappend(&ofp, OUTFILE, "general output"); - writeoutputfile(ofp,WRITEREST);" - - "openfiletoappend(&ofp, OUTFILE, "general output"); - writeoutputfile(ofp,WRITEREST);" - - "timestamp(ofp); - closefile(ofp);" - - -*/ - - - -#define EXTERN - -#include "puzzle.h" -#include "gamma.h" - -void num2quart(uli qnum, int *a, int *b, int *c, int *d) -{ - double temp; - uli aa, bb, cc, dd; - uli lowval=0, highval=0; - - aa=0; bb=1; cc=2; dd=3; - - temp = (double)(24 * qnum); - temp = sqrt(temp); - temp = sqrt(temp); - /* temp = pow(temp, (double)(1/4)); */ - dd = (uli) floor(temp) + 1; - if (dd < 3) dd = 3; - lowval = (uli) dd*(dd-1)*(dd-2)*(dd-3)/24; - highval = (uli) (dd+1)*dd*(dd-1)*(dd-2)/24; - if (lowval >= qnum) - while ((lowval > qnum)) { - dd -= 1; lowval = (uli) dd*(dd-1)*(dd-2)*(dd-3)/24; - } - else { - while (highval <= qnum) { - dd += 1; highval = (uli) (dd+1)*dd*(dd-1)*(dd-2)/24; - } - lowval = (uli) dd*(dd-1)*(dd-2)*(dd-3)/24; - } - qnum -= lowval; - if (qnum > 0) { - temp = (double)(6 * qnum); - temp = pow(temp, (double)(1/3)); - cc = (uli) floor(temp); - if (cc < 2) cc= 2; - lowval = (uli) cc*(cc-1)*(cc-2)/6; - highval = (uli) (cc+1)*cc*(cc-1)/6; - if (lowval >= qnum) - while ((lowval > qnum)) { - cc -= 1; lowval = (uli) cc*(cc-1)*(cc-2)/6; - } - else { - while (highval <= qnum) { - cc += 1; highval = (uli) (cc+1)*cc*(cc-1)/6; - } - lowval = (uli) cc*(cc-1)*(cc-2)/6; - } - qnum -= lowval; - if (qnum > 0) { - temp = (double)(2 * qnum); - temp = sqrt(temp); - bb = (uli) floor(temp); - if (bb < 1) bb= 1; - lowval = (uli) bb*(bb-1)/2; - highval = (uli) (bb+1)*bb/2; - if (lowval >= qnum) - while ((lowval > qnum)) { - bb -= 1; lowval = (uli) bb*(bb-1)/2; - } - else { - while (highval <= qnum) { - bb += 1; highval = (uli) (bb+1)*bb/2; - } - lowval = (uli) bb*(bb-1)/2; - } - qnum -= lowval; - if (qnum > 0) { - aa = (uli) qnum; - if (aa < 0) aa= 0; - } - } - } - *d = (int)dd; - *c = (int)cc; - *b = (int)bb; - *a = (int)aa; -} /* num2quart */ - -/******************/ - -uli numquarts(int maxspc) -{ - uli tmp; - int a, b, c, d; - - if (maxspc < 4) - return (uli)0; - else { - maxspc--; - a = maxspc-3; - b = maxspc-2; - c = maxspc-1; - d = maxspc; - - tmp = (uli) 1 + a + - (uli) b * (b-1) / 2 + - (uli) c * (c-1) * (c-2) / 6 + - (uli) d * (d-1) * (d-2) * (d-3) / 24; - return (tmp); - } -} /* numquarts */ - -/******************/ - -uli quart2num (int a, int b, int c, int d) -{ - uli tmp; - if ((a>b) || (b>c) || (c>d)) { - fprintf(stderr, "Error PP5 not (%d <= %d <= %d <= %d) !!!\n", a, b, c, -d); - exit (1); - } - tmp = (uli) a + - (uli) b * (b-1) / 2 + - (uli) c * (c-1) * (c-2) / 6 + - (uli) d * (d-1) * (d-2) * (d-3) / 24; - return (tmp); -} /* quart2num */ - -/******************/ - - - -/* flag=0 old allquart binary */ -/* flag=1 allquart binary */ -/* flag=2 allquart ACSII */ -/* flag=3 quartlh binary */ -/* flag=4 quartlh ASCII */ - -void writetpqfheader(int nspec, - FILE *ofp, - int flag) -{ int currspec; - - if (flag == 0) { - unsigned long nquart; - unsigned long blocklen; - - nquart = numquarts(nspec); - /* compute number of bytes */ - if (nquart % 2 == 0) { /* even number */ - blocklen = (nquart)/2; - } else { /* odd number */ - blocklen = (nquart + 1)/2; - } - /* FPRINTF(STDOUTFILE "Writing quartet file: %s\n", filename); */ - fprintf(ofp, "TREE-PUZZLE\n%s\n\n", VERSION); - fprintf(ofp, "species: %d\n", nspec); - fprintf(ofp, "quartets: %lu\n", nquart); - fprintf(ofp, "bytes: %lu\n\n", blocklen); - - - /* fwrite(&(quartetinfo[0]), sizeof(char), blocklen, ofp); */ - } - - if (flag == 1) fprintf(ofp, "##TPQF-BB (TREE-PUZZLE %s)\n%d\n", VERSION, nspec); - if (flag == 2) fprintf(ofp, "##TPQF-BA (TREE-PUZZLE %s)\n%d\n", VERSION, nspec); - if (flag == 3) fprintf(ofp, "##TPQF-LB (TREE-PUZZLE %s)\n%d\n", VERSION, nspec); - if (flag == 4) fprintf(ofp, "##TPQF-LA (TREE-PUZZLE %s)\n%d\n", VERSION, nspec); - - for (currspec=0; currspec MAXTS) { - FPRINTF(STDOUTFILE "\n\n\nF84 model not possible "); - FPRINTF(STDOUTFILE "(bad Ts/Tv parameter)\n"); - tstvf84 = 0.0; - return; - } - if (yr < MINYR || yr > MAXYR) { - FPRINTF(STDOUTFILE "\n\n\nF84 model not possible "); - FPRINTF(STDOUTFILE "(bad Y/R transition parameter)\n"); - tstvf84 = 0.0; - return; - } - TSparam = ts; - YRparam = yr; - optim_optn = FALSE; -} - -/* compute number of quartets used in LM analysis */ -void compnumqts() -{ - if (lmqts == 0) { - if (numclust == 4) - Numquartets = (uli) clustA*clustB*clustC*clustD; - if (numclust == 3) - Numquartets = (uli) clustA*clustB*clustC*(clustC-1)/2; - if (numclust == 2) - Numquartets = (uli) clustA*(clustA-1)/2 * clustB*(clustB-1)/2; - if (numclust == 1) - Numquartets = (uli) Maxspc*(Maxspc-1)*(Maxspc-2)*(Maxspc-3)/24; - } else { - Numquartets = lmqts; - } -} - -/* set options interactively */ -void setoptions() -{ - int i, valid; - double sumfreq; - char ch; - - /* defaults */ - rhetmode = UNIFORMRATE; /* assume rate homogeneity */ - numcats = 1; - Geta = 0.05; - grate_optim = FALSE; - fracinv = 0.0; - fracinv_optim = FALSE; - - compclock = FALSE; /* compute clocklike branch lengths */ - locroot = -1; /* search for optimal place of root */ - qcalg_optn = FALSE; /* don't use sampling of quartets */ - approxp_optn = TRUE; /* approximate parameter estimates */ - listqptrees = PSTOUT_NONE; /* list puzzling step trees */ - - /* approximate QP quartets? */ - if (Maxspc <= 6) approxqp = FALSE; - else approxqp = TRUE; - - codon_optn = 0; /* use all positions in a codon */ - - /* number of puzzling steps */ - if (Maxspc <= 25) Numtrial = 1000; - else if (Maxspc <= 50) Numtrial = 10000; - else if (Maxspc <= 75) Numtrial = 25000; - else Numtrial = 50000; - - utree_optn = TRUE; /* use first user tree for estimation */ - outgroup = 0; /* use first taxon as outgroup */ - sym_optn = FALSE; /* symmetrize doublet frequencies */ - tstvf84 = 0.0; /* disable F84 model */ - show_optn = FALSE; /* show unresolved quartets */ - typ_optn = TREERECON_OPTN; /* tree reconstruction */ - numclust = 1; /* one clusters in LM analysis */ - lmqts = 0; /* all quartets in LM analysis */ - compnumqts(); - if (Numquartets > 10000) { - lmqts = 10000; /* 10000 quartets in LM analysis */ - compnumqts(); - } - - do { - FPRINTF(STDOUTFILE "\n\n\nGENERAL OPTIONS\n"); - FPRINTF(STDOUTFILE " b Type of analysis? "); - if (typ_optn == TREERECON_OPTN) FPRINTF(STDOUTFILE "Tree reconstruction\n"); - if (typ_optn == LIKMAPING_OPTN) FPRINTF(STDOUTFILE "Likelihood mapping\n"); - if (typ_optn == TREERECON_OPTN) { - FPRINTF(STDOUTFILE " k Tree search procedure? "); - if (puzzlemode == QUARTPUZ) FPRINTF(STDOUTFILE "Quartet puzzling\n"); - if (puzzlemode == USERTREE) FPRINTF(STDOUTFILE "User defined trees\n"); - if (puzzlemode == PAIRDIST) FPRINTF(STDOUTFILE "Pairwise distances only (no tree)\n"); - if (puzzlemode == QUARTPUZ) { - FPRINTF(STDOUTFILE " v Approximate quartet likelihood? %s\n", - (approxqp ? "Yes" : "No")); - FPRINTF(STDOUTFILE " u List unresolved quartets? %s\n", - (show_optn ? "Yes" : "No")); - FPRINTF(STDOUTFILE " n Number of puzzling steps? %lu\n", - Numtrial); - FPRINTF(STDOUTFILE " j List puzzling step trees? "); - switch (listqptrees) { - case PSTOUT_NONE: FPRINTF(STDOUTFILE "No\n"); break; - case PSTOUT_ORDER: FPRINTF(STDOUTFILE "Unique topologies\n"); break; - case PSTOUT_LISTORDER: FPRINTF(STDOUTFILE "Unique topologies & Chronological list\n"); break; - case PSTOUT_LIST: FPRINTF(STDOUTFILE "Chronological list only\n"); break; - } - - FPRINTF(STDOUTFILE " o Display as outgroup? "); - fputid(STDOUT, outgroup); - FPRINTF(STDOUTFILE "\n"); - } - if (puzzlemode == QUARTPUZ || puzzlemode == USERTREE) { - FPRINTF(STDOUTFILE " z Compute clocklike branch lengths? "); - if (compclock) FPRINTF(STDOUTFILE "Yes\n"); - else FPRINTF(STDOUTFILE "No\n"); - } - if (compclock) - if (puzzlemode == QUARTPUZ || puzzlemode == USERTREE) { - FPRINTF(STDOUTFILE " l Location of root? "); - if (locroot < 0) FPRINTF(STDOUTFILE "Best place (automatic search)\n"); - else if (locroot < Maxspc) { - FPRINTF(STDOUTFILE "Branch %d (", locroot + 1); - fputid(STDOUT, locroot); - FPRINTF(STDOUTFILE ")\n"); - } else FPRINTF(STDOUTFILE "Branch %d (internal branch)\n", locroot + 1); - } - } - if (typ_optn == LIKMAPING_OPTN) { - FPRINTF(STDOUTFILE " g Group sequences in clusters? "); - if (numclust == 1) FPRINTF(STDOUTFILE "No\n"); - else FPRINTF(STDOUTFILE "Yes (%d clusters as specified)\n", numclust); - FPRINTF(STDOUTFILE " n Number of quartets? "); - if (lmqts == 0) FPRINTF(STDOUTFILE "%lu (all possible)\n", Numquartets); - else FPRINTF(STDOUTFILE "%lu (random choice)\n", lmqts); - } - FPRINTF(STDOUTFILE " e Parameter estimates? "); - if (approxp_optn) FPRINTF(STDOUTFILE "Approximate (faster)\n"); - else FPRINTF(STDOUTFILE "Exact (slow)\n"); - if (!(puzzlemode == USERTREE && typ_optn == TREERECON_OPTN)) { - FPRINTF(STDOUTFILE " x Parameter estimation uses? "); - if (qcalg_optn) FPRINTF(STDOUTFILE "Quartet sampling + NJ tree\n"); - else FPRINTF(STDOUTFILE "Neighbor-joining tree\n"); - - } else { - FPRINTF(STDOUTFILE " x Parameter estimation uses? "); - if (utree_optn) - FPRINTF(STDOUTFILE "1st input tree\n"); - else if (qcalg_optn) FPRINTF(STDOUTFILE "Quartet sampling + NJ tree\n"); - else FPRINTF(STDOUTFILE "Neighbor-joining tree\n"); - } - FPRINTF(STDOUTFILE "SUBSTITUTION PROCESS\n"); - FPRINTF(STDOUTFILE " d Type of sequence input data? "); - if (auto_datatype == AUTO_GUESS) FPRINTF(STDOUTFILE "Auto: "); - if (data_optn == NUCLEOTIDE) FPRINTF(STDOUTFILE "Nucleotides\n"); - if (data_optn == AMINOACID) FPRINTF(STDOUTFILE "Amino acids\n"); - if (data_optn == BINARY) FPRINTF(STDOUTFILE "Binary states\n"); - if (data_optn == NUCLEOTIDE && (Maxseqc % 3) == 0 && !SH_optn) { - FPRINTF(STDOUTFILE " h Codon positions selected? "); - if (codon_optn == 0) FPRINTF(STDOUTFILE "Use all positions\n"); - if (codon_optn == 1) FPRINTF(STDOUTFILE "Use only 1st positions\n"); - if (codon_optn == 2) FPRINTF(STDOUTFILE "Use only 2nd positions\n"); - if (codon_optn == 3) FPRINTF(STDOUTFILE "Use only 3rd positions\n"); - if (codon_optn == 4) FPRINTF(STDOUTFILE "Use 1st and 2nd positions\n"); - } - FPRINTF(STDOUTFILE " m Model of substitution? "); - if (data_optn == NUCLEOTIDE) { /* nucleotides */ - if (nuc_optn) { - if(HKY_optn) - FPRINTF(STDOUTFILE "HKY (Hasegawa et al. 1985)\n"); - else { - FPRINTF(STDOUTFILE "TN (Tamura-Nei 1993)\n"); - FPRINTF(STDOUTFILE " p Constrain TN model to F84 model? "); - if (tstvf84 == 0.0) - FPRINTF(STDOUTFILE "No\n"); - else FPRINTF(STDOUTFILE "Yes (Ts/Tv ratio = %.2f)\n", tstvf84); - } - FPRINTF(STDOUTFILE " t Transition/transversion parameter? "); - if (optim_optn) - FPRINTF(STDOUTFILE "Estimate from data set\n"); - else - FPRINTF(STDOUTFILE "%.2f\n", TSparam); - if (TN_optn) { - FPRINTF(STDOUTFILE " r Y/R transition parameter? "); - if (optim_optn) - FPRINTF(STDOUTFILE "Estimate from data set\n"); - else - FPRINTF(STDOUTFILE "%.2f\n", YRparam); - } - } - if (SH_optn) { - FPRINTF(STDOUTFILE "SH (Schoeniger-von Haeseler 1994)\n"); - FPRINTF(STDOUTFILE " t Transition/transversion parameter? "); - if (optim_optn) - FPRINTF(STDOUTFILE "Estimate from data set\n"); - else - FPRINTF(STDOUTFILE "%.2f\n", TSparam); - } - } - if (data_optn == NUCLEOTIDE && SH_optn) { - FPRINTF(STDOUTFILE " h Doublets defined by? "); - if (SHcodon) - FPRINTF(STDOUTFILE "1st and 2nd codon positions\n"); - else - FPRINTF(STDOUTFILE "1st+2nd, 3rd+4th, etc. site\n"); - } - if (data_optn == AMINOACID) { /* amino acids */ - switch (auto_aamodel) { - case AUTO_GUESS: - FPRINTF(STDOUTFILE "Auto: "); - break; - case AUTO_DEFAULT: - FPRINTF(STDOUTFILE "Def.: "); - break; - } - if (Dayhf_optn) FPRINTF(STDOUTFILE "Dayhoff (Dayhoff et al. 1978)\n"); - if (Jtt_optn) FPRINTF(STDOUTFILE "JTT (Jones et al. 1992)\n"); - if (mtrev_optn) FPRINTF(STDOUTFILE "mtREV24 (Adachi-Hasegawa 1996)\n"); - if (cprev_optn) FPRINTF(STDOUTFILE "cpREV45 (Adachi et al. 2000)\n"); - if (blosum62_optn) FPRINTF(STDOUTFILE "BLOSUM62 (Henikoff-Henikoff 92)\n"); - if (vtmv_optn) FPRINTF(STDOUTFILE "VT (Mueller-Vingron 2000)\n"); - if (wag_optn) FPRINTF(STDOUTFILE "WAG (Whelan-Goldman 2000)\n"); - } - if (data_optn == BINARY) { /* binary states */ - FPRINTF(STDOUTFILE "Two-state model (Felsenstein 1981)\n"); - } - if (data_optn == AMINOACID) - FPRINTF(STDOUTFILE " f Amino acid frequencies? "); - else if (data_optn == NUCLEOTIDE && SH_optn) - FPRINTF(STDOUTFILE " f Doublet frequencies? "); - else if (data_optn == NUCLEOTIDE && nuc_optn) - FPRINTF(STDOUTFILE " f Nucleotide frequencies? "); - else if (data_optn == BINARY) - FPRINTF(STDOUTFILE " f Binary state frequencies? "); - FPRINTF(STDOUTFILE "%s\n", (Frequ_optn ? "Estimate from data set" : - "Use specified values")); - if (data_optn == NUCLEOTIDE && SH_optn) - FPRINTF(STDOUTFILE " s Symmetrize doublet frequencies? %s\n", - (sym_optn ? "Yes" : "No")); - - FPRINTF(STDOUTFILE "RATE HETEROGENEITY\n"); - FPRINTF(STDOUTFILE " w Model of rate heterogeneity? "); - if (rhetmode == UNIFORMRATE) FPRINTF(STDOUTFILE "Uniform rate\n"); - if (rhetmode == GAMMARATE ) FPRINTF(STDOUTFILE "Gamma distributed rates\n"); - if (rhetmode == TWORATE ) FPRINTF(STDOUTFILE "Two rates (1 invariable + 1 variable)\n"); - if (rhetmode == MIXEDRATE ) FPRINTF(STDOUTFILE "Mixed (1 invariable + %d Gamma rates)\n", numcats); - - if (rhetmode == TWORATE || rhetmode == MIXEDRATE) { - FPRINTF(STDOUTFILE " i Fraction of invariable sites? "); - if (fracinv_optim) FPRINTF(STDOUTFILE "Estimate from data set"); - else FPRINTF(STDOUTFILE "%.2f", fracinv); - if (fracinv == 0.0 && !fracinv_optim) FPRINTF(STDOUTFILE " (all sites variable)"); - FPRINTF(STDOUTFILE "\n"); - } - if (rhetmode == GAMMARATE || rhetmode == MIXEDRATE) { - FPRINTF(STDOUTFILE " a Gamma distribution parameter alpha? "); - if (grate_optim) - FPRINTF(STDOUTFILE "Estimate from data set\n"); - else if (Geta > 0.5) - FPRINTF(STDOUTFILE "%.2f (strong rate heterogeneity)\n", (1.0-Geta)/Geta); - else FPRINTF(STDOUTFILE "%.2f (weak rate heterogeneity)\n", (1.0-Geta)/Geta); - FPRINTF(STDOUTFILE " c Number of Gamma rate categories? %d\n", numcats); - } - - FPRINTF(STDOUTFILE "\nQuit [q], confirm [y], or change [menu] settings: "); - - /* read one char */ - ch = getchar(); - if (ch != '\n') { - do ; - while (getchar() != '\n'); - } - ch = (char) tolower((int) ch); - - /* letters in use: a b c d e f g h i j k l m n o p q r s t u v w y x z */ - /* letters not in use: */ - - switch (ch) { - - case '\n': break; - - case 'z': if (typ_optn == TREERECON_OPTN && (puzzlemode == QUARTPUZ || puzzlemode == USERTREE)) { - compclock = compclock + 1; - if (compclock == 2) compclock = 0; - } else { - FPRINTF(STDOUTFILE "\n\n\nThis is not a possible option!\n"); - } - break; - - case 'l': if (compclock && typ_optn == TREERECON_OPTN && (puzzlemode == QUARTPUZ || puzzlemode == USERTREE)) { - FPRINTF(STDOUTFILE "\n\n\nEnter an invalid branch number to search "); - FPRINTF(STDOUTFILE "for the best location!\n"); - FPRINTF(STDOUTFILE "\nPlace root at branch (1-%d): ", - 2*Maxspc-3); - scanf("%d", &locroot); - do ; - while (getchar() != '\n'); - if (locroot < 1 || locroot > 2*Maxspc-3) locroot = 0; - locroot = locroot - 1; - } else { - FPRINTF(STDOUTFILE "\n\n\nThis is not a possible option!\n"); - } - break; - - case 'e': if ((rhetmode == TWORATE || rhetmode == MIXEDRATE) && fracinv_optim) { - FPRINTF(STDOUTFILE "\n\n\nInvariable sites estimation needs to be exact!\n"); - } else { - approxp_optn = approxp_optn + 1; - if (approxp_optn == 2) approxp_optn = 0; - } - break; - - case 'w': rhetmode = rhetmode + 1; - if (rhetmode == 4) rhetmode = UNIFORMRATE; - if (rhetmode == UNIFORMRATE) { /* uniform rate */ - numcats = 1; - Geta = 0.05; - grate_optim = FALSE; - fracinv = 0.0; - fracinv_optim = FALSE; - } - if (rhetmode == GAMMARATE ) { /* Gamma distributed rates */ - numcats = 8; - Geta = 0.05; - grate_optim = TRUE; - fracinv = 0.0; - fracinv_optim = FALSE; - } - if (rhetmode == TWORATE ) { /* two rates (1 invariable + 1 variable) */ - approxp_optn = FALSE; - numcats = 1; - Geta = 0.05; - grate_optim = FALSE; - fracinv = 0.0; - fracinv_optim = TRUE; - } - if (rhetmode == MIXEDRATE ) { /* mixed (1 invariable + Gamma rates) */ - approxp_optn = FALSE; - numcats = 8; - Geta = 0.05; - grate_optim = TRUE; - fracinv = 0.0; - fracinv_optim = TRUE; - } - break; - - case 'i': if (rhetmode == TWORATE || rhetmode == MIXEDRATE) { - FPRINTF(STDOUTFILE "\n\n\nEnter an invalid value for "); - FPRINTF(STDOUTFILE "estimation from data set!\n"); - FPRINTF(STDOUTFILE "\nFraction of invariable sites among all sites (%.2f-%.2f): ", - MINFI, MAXFI); - scanf("%lf", &fracinv); - do ; - while (getchar() != '\n'); - if (fracinv < MINFI || fracinv > MAXFI) { - fracinv_optim = TRUE; - fracinv = 0.0; - } else { - fracinv_optim = FALSE; - } - } else { - FPRINTF(STDOUTFILE "\n\n\nThis is not a possible option!\n"); - } - break; - - case 'a': if (rhetmode == GAMMARATE || rhetmode == MIXEDRATE) { - FPRINTF(STDOUTFILE "\n\n\nEnter an invalid value for estimation from data set!\n"); - FPRINTF(STDOUTFILE "\nGamma distribution parameter alpha (%.2f-%.2f): ", - (1.0-MAXGE)/MAXGE, (1.0-MINGE)/MINGE); - scanf("%lf", &Geta); - do ; - while (getchar() != '\n'); - if (Geta < (1.0-MAXGE)/MAXGE || Geta > (1.0-MINGE)/MINGE) { - grate_optim = TRUE; - Geta = 0.05; - } else { - grate_optim = FALSE; - Geta = 1.0/(1.0 + Geta); - } - } else - FPRINTF(STDOUTFILE "\n\n\nThis is not a possible option!\n"); - break; - - case 'c': if (rhetmode == GAMMARATE || rhetmode == MIXEDRATE) { - FPRINTF(STDOUTFILE "\n\n\nNumber of Gamma rate categories (%d-%d): ", - MINCAT, MAXCAT); - scanf("%d", &numcats); - do ; - while (getchar() != '\n'); - if (numcats < MINCAT || numcats > MAXCAT) { - FPRINTF(STDOUTFILE "\n\n\nThis number of categories is not available!\n"); - numcats = 4; - } - } else { - FPRINTF(STDOUTFILE "\n\n\nThis is not a possible option!\n"); - } - break; - - case 'h': if (data_optn == NUCLEOTIDE && (Maxseqc % 3) == 0 && !SH_optn) { - codon_optn = codon_optn + 1; - if (codon_optn == 5) codon_optn = 0; - translatedataset(); - /* reestimate nucleotide frequencies only - if user did not specify other values */ - if (Frequ_optn) estimatebasefreqs(); - - } else if (data_optn == NUCLEOTIDE && SH_optn) { - if (Maxseqc % 2 != 0 && Maxseqc % 3 == 0) { - SHcodon = TRUE; - FPRINTF(STDOUTFILE "\n\n\nThis is the only possible option for the data set!\n"); - } - if (Maxseqc % 3 != 0 && Maxseqc % 2 == 0) { - SHcodon = FALSE; - FPRINTF(STDOUTFILE "\n\n\nThis is the only possible option for the data set!\n"); - } - if (Maxseqc % 2 == 0 && Maxseqc % 3 == 0) { - if (SHcodon) - SHcodon = FALSE; - else - SHcodon = TRUE; - translatedataset(); - /* reestimate nucleotide frequencies only - if user did not specify other values */ - if (Frequ_optn) estimatebasefreqs(); - } - } else { - FPRINTF(STDOUTFILE "\n\n\nThis is not a possible option!\n"); - } - break; - - case 'x': if (typ_optn == TREERECON_OPTN && puzzlemode == USERTREE) { - if (utree_optn) { - utree_optn = FALSE; - qcalg_optn = FALSE; - } else { - qcalg_optn = qcalg_optn + 1; - if (qcalg_optn == 2) { - qcalg_optn = 0; - utree_optn = TRUE; - } - } - } else { - qcalg_optn = qcalg_optn + 1; - if (qcalg_optn == 2) qcalg_optn = 0; - } - break; - - case 'k': if (typ_optn == TREERECON_OPTN) { - puzzlemode = (puzzlemode + 1) % 3; - /* puzzlemode = puzzlemode + 1; - if (puzzlemode == 3) puzzlemode = 0; - xxx */ - } else { - FPRINTF(STDOUTFILE "\n\n\nThis is not a possible option!\n"); - } - break; - - case 'b': typ_optn = (typ_optn + 1) % 2; - /* typ_optn = typ_optn + 1; - if (typ_optn == 2) typ_optn = TREERECON_OPTN; - xxx */ - break; - - case 'g': if (typ_optn == LIKMAPING_OPTN) { - clustA = clustB = clustC = clustD = 0; - if (numclust != 1) { - numclust = 1; - } else { - FPRINTF(STDOUTFILE "\n\n\nNumber of clusters (2-4): "); - scanf("%d", &numclust); - do ; - while (getchar() != '\n'); - if (numclust < 2 || numclust > 4) { - numclust = 1; - FPRINTF(STDOUTFILE "\n\n\nOnly 2, 3, or 4 "); - FPRINTF(STDOUTFILE "clusters possible\n"); - } else { - FPRINTF(STDOUTFILE "\nDistribute all sequences over the "); - if (numclust == 2) { - FPRINTF(STDOUTFILE "two clusters a and b (At least two\n"); - FPRINTF(STDOUTFILE "sequences per cluster are necessary), "); - } - if (numclust == 3) { - FPRINTF(STDOUTFILE "three clusters a, b, and c\n"); - FPRINTF(STDOUTFILE "(At least one sequence in cluster a and b, and at least two\n"); - FPRINTF(STDOUTFILE "sequences in c are necessary), "); - } - if (numclust == 4) { - FPRINTF(STDOUTFILE "four clusters a, b, c, and d\n"); - FPRINTF(STDOUTFILE "(At least one sequence per cluster is necessary),\n"); - } - FPRINTF(STDOUTFILE "type x to exclude a sequence:\n\n"); - - for (i = 0; i < Maxspc; i++) { - valid = FALSE; - do { - fputid10(STDOUT, i); - FPRINTF(STDOUTFILE ": "); - /* read one char */ - ch = getchar(); - if (ch != '\n') { - do ; - while (getchar() != '\n'); - } - ch = (char) tolower((int) ch); - if (ch == 'a' || ch == 'b' || ch == 'x') - valid = TRUE; - if (numclust == 3 || numclust == 4) - if (ch == 'c') valid = TRUE; - if (numclust == 4) - if (ch == 'd') valid = TRUE; - } while (!valid); - if (ch == 'a') { - clusterA[clustA] = i; - clustA++; - } - if (ch == 'b') { - clusterB[clustB] = i; - clustB++; - } - if (ch == 'c') { - clusterC[clustC] = i; - clustC++; - } - if (ch == 'd') { - clusterD[clustD] = i; - clustD++; - } - } - /* check clusters */ - valid = TRUE; - if (numclust == 4) { - if (clustA == 0) { - valid = FALSE; - numclust = 1; - FPRINTF(STDOUTFILE "\n\n\nNo sequence in cluster a\n"); - } - if (clustB == 0) { - valid = FALSE; - numclust = 1; - FPRINTF(STDOUTFILE "\n\n\nNo sequence in cluster b\n"); - } - if (clustC == 0) { - valid = FALSE; - numclust = 1; - FPRINTF(STDOUTFILE "\n\n\nNo sequence in cluster c\n"); - } - if (clustD == 0) { - valid = FALSE; - numclust = 1; - FPRINTF(STDOUTFILE "\n\n\nNo sequence in cluster d\n"); - } - } - if (numclust == 3) { - if (clustA == 0) { - valid = FALSE; - numclust = 1; - FPRINTF(STDOUTFILE "\n\n\nNo sequence in cluster a\n"); - } - if (clustB == 0) { - valid = FALSE; - numclust = 1; - FPRINTF(STDOUTFILE "\n\n\nNo sequence in cluster b\n"); - } - if (clustC < 2) { - valid = FALSE; - numclust = 1; - if (clustC == 0) - FPRINTF(STDOUTFILE "\n\n\nNo sequence in cluster c\n"); - else - FPRINTF(STDOUTFILE "\n\n\nOnly one sequence in cluster c\n"); - } - } - if (numclust == 2) { - if (clustA < 2) { - valid = FALSE; - numclust = 1; - if (clustA == 0) - FPRINTF(STDOUTFILE "\n\n\nNo sequence in cluster a\n"); - else - FPRINTF(STDOUTFILE "\n\n\nOnly one sequence in cluster a\n"); - } - if (clustB < 2) { - valid = FALSE; - numclust = 1; - if (clustB == 0) - FPRINTF(STDOUTFILE "\n\n\nNo sequence in cluster b\n"); - else - FPRINTF(STDOUTFILE "\n\n\nOnly one sequence in cluster b\n"); - } - } - if (valid) { - FPRINTF(STDOUTFILE "\nNumber of sequences in each cluster:\n\n"); - FPRINTF(STDOUTFILE "Cluster a: %d\n", clustA); - FPRINTF(STDOUTFILE "Cluster b: %d\n", clustB); - if (numclust > 2) - FPRINTF(STDOUTFILE "Cluster c: %d\n", clustC); - if (numclust == 4) - FPRINTF(STDOUTFILE "Cluster d: %d\n", clustD); - FPRINTF(STDOUTFILE "\nExcluded sequences: "); - if (numclust == 2) FPRINTF(STDOUTFILE "%d\n", - Maxspc-clustA-clustB); - if (numclust == 3) FPRINTF(STDOUTFILE "%d\n", - Maxspc-clustA-clustB-clustC); - if (numclust == 4) FPRINTF(STDOUTFILE "%d\n", - Maxspc-clustA-clustB-clustC-clustD); - - } - } - } - /* number of resulting quartets */ - compnumqts(); - - } else { - FPRINTF(STDOUTFILE "\n\n\nThis is not a possible option!\n"); - } - break; - - case 'd': if (auto_datatype == AUTO_GUESS) { - auto_datatype = AUTO_OFF; - guessdata_optn = data_optn; - data_optn = 0; - } else { - data_optn = data_optn + 1; - if (data_optn == 3) { - auto_datatype = AUTO_GUESS; - data_optn = guessdata_optn; - } - } - /* translate characters into format used by ML engine */ - translatedataset(); - estimatebasefreqs(); - break; - - case 'u': if (puzzlemode == QUARTPUZ && typ_optn == TREERECON_OPTN) - show_optn = 1 - show_optn; - else - FPRINTF(STDOUTFILE "\n\n\nThis is not a possible option!\n"); - break; - - case 'j': if (puzzlemode == QUARTPUZ && typ_optn == TREERECON_OPTN) - listqptrees = (listqptrees + 1) % 4; - else - FPRINTF(STDOUTFILE "\n\n\nThis is not a possible option!\n"); - break; - - case 'v': if (puzzlemode == QUARTPUZ && typ_optn == TREERECON_OPTN) - approxqp = 1 - approxqp; - else - FPRINTF(STDOUTFILE "\n\n\nThis is not a possible option!\n"); - break; - - case 'f': if (Frequ_optn) { - tstvf84 = 0.0; - Frequ_optn = FALSE; - sumfreq = 0.0; - if (data_optn == AMINOACID) - FPRINTF(STDOUTFILE "\n\n\nAmino acid"); - else if (data_optn == NUCLEOTIDE && SH_optn) - FPRINTF(STDOUTFILE "\n\n\nDoublet"); - else if (data_optn == NUCLEOTIDE && nuc_optn) - FPRINTF(STDOUTFILE "\n\n\nNucleotide"); - else if (data_optn == BINARY) - FPRINTF(STDOUTFILE "\n\n\nBinary state"); - FPRINTF(STDOUTFILE " frequencies (in %%):\n\n"); - for (i = 0; i < gettpmradix() - 1; i++) { - FPRINTF(STDOUTFILE "pi(%s) = ", int2code(i)); - scanf("%lf", &(Freqtpm[i])); - do ; - while (getchar() != '\n'); - Freqtpm[i] = Freqtpm[i]/100.0; - if (Freqtpm[i] < 0.0) { - FPRINTF(STDOUTFILE "\n\n\nNegative frequency not possible\n"); - estimatebasefreqs(); - break; - } - sumfreq = sumfreq + Freqtpm[i]; - if (sumfreq > 1.0) { - FPRINTF(STDOUTFILE "\n\n\nThe sum of "); - FPRINTF(STDOUTFILE "all frequencies exceeds"); - FPRINTF(STDOUTFILE " 100%%\n"); - estimatebasefreqs(); - break; - } - if (i == gettpmradix() - 2) - Freqtpm[i+1] = 1.0 - sumfreq; - } - } else estimatebasefreqs(); - break; - - case 's': if (data_optn == NUCLEOTIDE && SH_optn) { - sym_optn = 1 - sym_optn; - } else { - FPRINTF(STDOUTFILE "\n\n\nThis is not a possible option!\n"); - } - break; - - case 'n': if (puzzlemode == QUARTPUZ && typ_optn == TREERECON_OPTN) - { - FPRINTF(STDOUTFILE "\n\n\nNumber of puzzling steps: "); - scanf("%lu", &Numtrial); - do ; - while (getchar() != '\n'); - if (Numtrial < 1) { - FPRINTF(STDOUTFILE "\n\n\nThe number of puzzling"); - FPRINTF(STDOUTFILE " steps can't be smaller than one\n"); - Numtrial = 1000; - } - } - else if (typ_optn == LIKMAPING_OPTN) - { - FPRINTF(STDOUTFILE "\n\nEnter zero to use all possible"); - FPRINTF(STDOUTFILE " quartets in the analysis!\n"); - FPRINTF(STDOUTFILE "\nNumber of random quartets: "); - scanf("%lu", &lmqts); - do ; - while (getchar() != '\n'); - - /* compute number of quartets used */ - compnumqts(); - } - else - { - FPRINTF(STDOUTFILE "\n\n\nThis is not a possible option!\n"); - } - break; - - case 'o': if (puzzlemode == QUARTPUZ && typ_optn == TREERECON_OPTN) { - FPRINTF(STDOUTFILE "\n\n\nSequence to be displayed as outgroup (1-%d): ", - Maxspc); - scanf("%d", &outgroup); - do ; - while (getchar() != '\n'); - if (outgroup < 1 || outgroup > Maxspc) { - FPRINTF(STDOUTFILE "\n\n\nSequences are numbered "); - FPRINTF(STDOUTFILE "from 1 to %d\n", - Maxspc); - outgroup = 1; - } - outgroup = outgroup - 1; - } else { - FPRINTF(STDOUTFILE "\n\n\nThis is not a possible option!\n"); - } - break; - - case 'm': if (data_optn == NUCLEOTIDE) { /* nucleotide data */ - if(HKY_optn && nuc_optn) { - /* HKY -> TN */ - tstvf84 = 0.0; - TSparam = 2.0; - YRparam = 0.9; - HKY_optn = FALSE; - TN_optn = TRUE; - optim_optn = TRUE; - nuc_optn = TRUE; - SH_optn = FALSE; - break; - } - if(TN_optn && nuc_optn) { - if (Maxseqc % 2 == 0 || Maxseqc % 3 == 0) { - /* number of chars needs to be a multiple 2 or 3 */ - /* TN -> SH */ - if (Maxseqc % 2 != 0 && Maxseqc % 3 == 0) - SHcodon = TRUE; - else - SHcodon = FALSE; - tstvf84 = 0.0; - TSparam = 2.0; - YRparam = 1.0; - HKY_optn = TRUE; - TN_optn = FALSE; - optim_optn = TRUE; - nuc_optn = FALSE; - SH_optn = TRUE; - /* translate characters into format */ - /* used by ML engine */ - translatedataset(); - estimatebasefreqs(); - } else { - FPRINTF(STDOUTFILE "\n\n\nSH model not "); - FPRINTF(STDOUTFILE "available for the data set!\n"); - /* TN -> HKY */ - tstvf84 = 0.0; - TSparam = 2.0; - YRparam = 1.0; - HKY_optn = TRUE; - TN_optn = FALSE; - optim_optn = TRUE; - nuc_optn = TRUE; - SH_optn = FALSE; - } - break; - } - if(SH_optn) { - /* SH -> HKY */ - tstvf84 = 0.0; - TSparam = 2.0; - YRparam = 1.0; - HKY_optn = TRUE; - TN_optn = FALSE; - optim_optn = TRUE; - nuc_optn = TRUE; - SH_optn = FALSE; - /* translate characters into format */ - /* used by ML engine */ - translatedataset(); - estimatebasefreqs(); - break; - } - break; - } - if (data_optn == AMINOACID) { /* amino acid data */ - if (auto_aamodel) { - /* AUTO -> Dayhoff */ - Dayhf_optn = TRUE; - Jtt_optn = FALSE; - mtrev_optn = FALSE; - cprev_optn = FALSE; - blosum62_optn = FALSE; - vtmv_optn = FALSE; - wag_optn = FALSE; - auto_aamodel = AUTO_OFF; - break; - } - if (Dayhf_optn) { - /* Dayhoff -> JTT */ - Dayhf_optn = FALSE; - Jtt_optn = TRUE; - mtrev_optn = FALSE; - cprev_optn = FALSE; - blosum62_optn = FALSE; - vtmv_optn = FALSE; - wag_optn = FALSE; - auto_aamodel = AUTO_OFF; - break; - } - if (Jtt_optn) { - /* JTT -> mtREV */ - Dayhf_optn = FALSE; - Jtt_optn = FALSE; - mtrev_optn = TRUE; - cprev_optn = FALSE; - blosum62_optn = FALSE; - vtmv_optn = FALSE; - wag_optn = FALSE; - auto_aamodel = AUTO_OFF; - break; - } -#ifdef CPREV - if (mtrev_optn) { - /* mtREV -> cpREV */ - Dayhf_optn = FALSE; - Jtt_optn = FALSE; - mtrev_optn = FALSE; - cprev_optn = TRUE; - blosum62_optn = FALSE; - vtmv_optn = FALSE; - wag_optn = FALSE; - auto_aamodel = AUTO_OFF; - break; - } -#else /* ! CPREV */ - if (mtrev_optn) { - /* mtREV -> BLOSUM 62 */ - Dayhf_optn = FALSE; - Jtt_optn = FALSE; - mtrev_optn = FALSE; - cprev_optn = FALSE; - blosum62_optn = TRUE; - vtmv_optn = FALSE; - wag_optn = FALSE; - auto_aamodel = AUTO_OFF; - break; - } -#endif /* ! CPREV */ - -#ifdef CPREV - if (cprev_optn) { - /* cpREV -> BLOSUM 62 */ - Dayhf_optn = FALSE; - Jtt_optn = FALSE; - mtrev_optn = FALSE; - cprev_optn = FALSE; - blosum62_optn = TRUE; - vtmv_optn = FALSE; - wag_optn = FALSE; - auto_aamodel = AUTO_OFF; - break; - } -#endif - if (blosum62_optn) { - /* BLOSUM 62 -> VT model */ - Dayhf_optn = FALSE; - Jtt_optn = FALSE; - mtrev_optn = FALSE; - cprev_optn = FALSE; - blosum62_optn = FALSE; - vtmv_optn = TRUE; - wag_optn = FALSE; - auto_aamodel = AUTO_OFF; - break; - } - if (vtmv_optn) { - /* VT model -> WAG model */ - Dayhf_optn = FALSE; - Jtt_optn = FALSE; - mtrev_optn = FALSE; - cprev_optn = FALSE; - blosum62_optn = FALSE; - vtmv_optn = FALSE; - wag_optn = TRUE; - auto_aamodel = AUTO_OFF; - break; - } - if (wag_optn) { - /* WAG model -> AUTO */ - Dayhf_optn = guessDayhf_optn; - Jtt_optn = guessJtt_optn; - mtrev_optn = guessmtrev_optn; - cprev_optn = guesscprev_optn; - blosum62_optn = guessblosum62_optn; - vtmv_optn = guessvtmv_optn; - wag_optn = guesswag_optn; - auto_aamodel = guessauto_aamodel; - break; - } - break; - } - if (data_optn == BINARY) { - FPRINTF(STDOUTFILE "\n\n\nNo other model available!\n"); - } - break; - - case 't': if (data_optn != NUCLEOTIDE) { - FPRINTF(STDOUTFILE "\n\n\nThis is not a possible option!\n"); - } else { - tstvf84 = 0.0; - FPRINTF(STDOUTFILE "\n\n\nEnter an invalid value for "); - FPRINTF(STDOUTFILE "estimation from data set!\n"); - FPRINTF(STDOUTFILE "\nTransition/transversion parameter (%.2f-%.2f): ", - MINTS, MAXTS); - scanf("%lf", &TSparam); - do ; - while (getchar() != '\n'); - if (TSparam < MINTS || TSparam > MAXTS) { - optim_optn = TRUE; - TSparam = 2.0; - } else { - optim_optn = FALSE; - } - } - break; - - case 'q': FPRINTF(STDOUTFILE "\n\n\n"); -# if PARALLEL - PP_SendDone(); - MPI_Finalize(); -# endif /* PARALLEL */ - exit(0); - - break; - - case 'r': if (!(TN_optn && nuc_optn)){ - FPRINTF(STDOUTFILE "\n\n\nThis is not a possible option!\n"); - } else { - tstvf84 = 0.0; - FPRINTF(STDOUTFILE "\n\n\nEnter an invalid value "); - FPRINTF(STDOUTFILE "for estimation from data set!\n"); - FPRINTF(STDOUTFILE "\nY/R transition parameter (%.2f-%.2f): ", MINYR, MAXYR); - scanf("%lf", &YRparam); - do ; - while (getchar() != '\n'); - if (YRparam < MINYR || YRparam > MAXYR) { - optim_optn = TRUE; - YRparam = 0.9; - } else if (YRparam == 1.0) { - TN_optn = FALSE; - HKY_optn = TRUE; - if (optim_optn) TSparam = 2.0; - } else { - optim_optn = FALSE; - } - } - break; - - case 'p': if (!(TN_optn && nuc_optn)){ - FPRINTF(STDOUTFILE "\n\n\nThis is not a possible option!\n"); - } else { - FPRINTF(STDOUTFILE "\n\n\nThe F84 model (Felsenstein 1984) is a restricted"); - FPRINTF(STDOUTFILE " TN model, and the one\nF84 parameter uniquely"); - FPRINTF(STDOUTFILE " determines the two corresponding TN parameters!\n\n"); - FPRINTF(STDOUTFILE "F84 expected transition/transversion ratio: "); - scanf("%lf", &tstvf84); - do ; - while (getchar() != '\n'); - if (tstvf84 <= 0.0) tstvf84 = 0.0; - else makeF84model(); - } - break; - - case 'y': break; - - default: FPRINTF(STDOUTFILE "\n\n\nThis is not a possible option!\n"); - break; - } - } while (ch != 'y'); - - FPRINTF(STDOUTFILE "\n\n\n"); -} - -/* open file for reading */ -void openfiletoread(FILE **fp, char name[], char descr[]) -{ - int count = 0; - cvector str; - - if ((*fp = fopen(name, "r")) == NULL) { - FPRINTF(STDOUTFILE "\n\n\nPlease enter a file name for the %s: ", descr); - str = mygets(); - while ((*fp = fopen(str, "r")) == NULL) - { - count++; - if (count > 10) - { - FPRINTF(STDOUTFILE "\n\n\nToo many trials - quitting ...\n"); - exit(1); - } - FPRINTF(STDOUTFILE "File '%s' not found, ", str); - FPRINTF(STDOUTFILE "please enter alternative name: "); - free_cvector(str); - str = mygets(); - } - free_cvector(str); - FPRINTF(STDOUTFILE "\n"); - } -} /* openfiletoread */ - - -/* open file for writing */ -void openfiletowrite(FILE **fp, char name[], char descr[]) -{ - int count = 0; - cvector str; - - if ((*fp = fopen(name, "w")) == NULL) { - FPRINTF(STDOUTFILE "\n\n\nPlease enter a file name for the %s: ", descr); - str = mygets(); - while ((*fp = fopen(str, "w")) == NULL) - { - count++; - if (count > 10) - { - FPRINTF(STDOUTFILE "\n\n\nToo many trials - quitting ...\n"); - exit(1); - } - FPRINTF(STDOUTFILE "File '%s' not created, ", str); - FPRINTF(STDOUTFILE "please enter other name: "); - free_cvector(str); - str = mygets(); - } - free_cvector(str); - FPRINTF(STDOUTFILE "\n"); - } -} /* openfiletowrite */ - - -/* open file for appending */ -void openfiletoappend(FILE **fp, char name[], char descr[]) -{ - int count = 0; - cvector str; - - if ((*fp = fopen(name, "a")) == NULL) { - FPRINTF(STDOUTFILE "\n\n\nPlease enter a file name for the %s: ", descr); - str = mygets(); - while ((*fp = fopen(str, "a")) == NULL) - { - count++; - if (count > 10) - { - FPRINTF(STDOUTFILE "\n\n\nToo many trials - quitting ...\n"); - exit(1); - } - FPRINTF(STDOUTFILE "File '%s' not created, ", str); - FPRINTF(STDOUTFILE "please enter other name: "); - free_cvector(str); - str = mygets(); - } - free_cvector(str); - FPRINTF(STDOUTFILE "\n"); - } -} /* openfiletowrite */ - - -/* close file */ -void closefile(FILE *fp) -{ - fclose(fp); -} /* closefile */ - -/* symmetrize doublet frequencies */ -void symdoublets() -{ - int i, imean; - double mean; - - if (data_optn == NUCLEOTIDE && SH_optn && sym_optn) { - /* ML frequencies */ - mean = (Freqtpm[1] + Freqtpm[4])/2.0; /* AC CA */ - Freqtpm[1] = mean; - Freqtpm[4] = mean; - mean = (Freqtpm[2] + Freqtpm[8])/2.0; /* AG GA */ - Freqtpm[2] = mean; - Freqtpm[8] = mean; - mean = (Freqtpm[3] + Freqtpm[12])/2.0; /* AT TA */ - Freqtpm[3] = mean; - Freqtpm[12] = mean; - mean = (Freqtpm[6] + Freqtpm[9])/2.0; /* CG GC */ - Freqtpm[6] = mean; - Freqtpm[9] = mean; - mean = (Freqtpm[7] + Freqtpm[13])/2.0; /* CT TC */ - Freqtpm[7] = mean; - Freqtpm[13] = mean; - mean = (Freqtpm[11] + Freqtpm[14])/2.0; /* GT TG */ - Freqtpm[11] = mean; - Freqtpm[14] = mean; - - /* base composition of each taxon */ - for (i = 0; i < Maxspc; i++) { - imean = (Basecomp[i][1] + Basecomp[i][4])/2; /* AC CA */ - Basecomp[i][1] = imean; - Basecomp[i][4] = imean; - imean = (Basecomp[i][2] + Basecomp[i][8])/2; /* AG GA */ - Basecomp[i][2] = imean; - Basecomp[i][8] = imean; - imean = (Basecomp[i][3] + Basecomp[i][12])/2; /* AT TA */ - Basecomp[i][3] = imean; - Basecomp[i][12] = imean; - imean = (Basecomp[i][6] + Basecomp[i][9])/2; /* CG GC */ - Basecomp[i][6] = imean; - Basecomp[i][9] = imean; - imean = (Basecomp[i][7] + Basecomp[i][13])/2; /* CT TC */ - Basecomp[i][7] = imean; - Basecomp[i][13] = imean; - imean = (Basecomp[i][11] + Basecomp[i][14])/2; /* GT TG */ - Basecomp[i][11] = imean; - Basecomp[i][14] = imean; - } - } -} - -/* show Ts/Tv ratio and Ts Y/R ratio */ -void computeexpectations() -{ - double AlphaYBeta, AlphaRBeta, piR, piY, num, denom, pyr, pur; - - if (nuc_optn == TRUE) { /* 4x4 nucs */ - piR = Freqtpm[0] + Freqtpm[2]; - piY = Freqtpm[1] + Freqtpm[3]; - AlphaRBeta = 4.0*TSparam / (1 + YRparam); - AlphaYBeta = AlphaRBeta * YRparam; - tstvratio = (AlphaRBeta*Freqtpm[0]*Freqtpm[2] + - AlphaYBeta*Freqtpm[1]*Freqtpm[3])/(piR * piY); - yrtsratio = (AlphaYBeta*Freqtpm[1]*Freqtpm[3]) / - (AlphaRBeta*Freqtpm[0]*Freqtpm[2]); - } else { /* 16x16 nucs */ - pyr = Freqtpm[1]*Freqtpm[3] + Freqtpm[5]*Freqtpm[7] + - Freqtpm[9]*Freqtpm[11] + Freqtpm[4]*Freqtpm[12] + - Freqtpm[5]*Freqtpm[13] + Freqtpm[6]*Freqtpm[14] + - Freqtpm[7]*Freqtpm[15] + Freqtpm[13]*Freqtpm[15]; - pur = Freqtpm[0]*Freqtpm[2] + Freqtpm[4]*Freqtpm[6] + - Freqtpm[0]*Freqtpm[8] + Freqtpm[1]*Freqtpm[9] + - Freqtpm[2]*Freqtpm[10] + Freqtpm[8]*Freqtpm[10] + - Freqtpm[3]*Freqtpm[11] + Freqtpm[12]*Freqtpm[14]; - num = pyr + pur; - denom = Freqtpm[0]*Freqtpm[1] + Freqtpm[1]*Freqtpm[2] + - Freqtpm[0]*Freqtpm[3] + Freqtpm[2]*Freqtpm[3] + - Freqtpm[0]*Freqtpm[4] + Freqtpm[1]*Freqtpm[5] + - Freqtpm[4]*Freqtpm[5] + Freqtpm[2]*Freqtpm[6] + - Freqtpm[5]*Freqtpm[6] + Freqtpm[3]*Freqtpm[7] + - Freqtpm[4]*Freqtpm[7] + Freqtpm[6]*Freqtpm[7] + - Freqtpm[4]*Freqtpm[8] + Freqtpm[5]*Freqtpm[9] + - Freqtpm[8]*Freqtpm[9] + Freqtpm[6]*Freqtpm[10] + - Freqtpm[9]*Freqtpm[10] + Freqtpm[7]*Freqtpm[11] + - Freqtpm[8]*Freqtpm[11] + Freqtpm[10]*Freqtpm[11] + - Freqtpm[0]*Freqtpm[12] + Freqtpm[8]*Freqtpm[12] + - Freqtpm[1]*Freqtpm[13] + Freqtpm[9]*Freqtpm[13] + - Freqtpm[12]*Freqtpm[13] + Freqtpm[2]*Freqtpm[14] + - Freqtpm[10]*Freqtpm[14] + Freqtpm[13]*Freqtpm[14] + - Freqtpm[3]*Freqtpm[15] + Freqtpm[11]*Freqtpm[15] + - Freqtpm[12]*Freqtpm[15] + Freqtpm[14]*Freqtpm[15]; - tstvratio = 2.0*TSparam * num/denom; - yrtsratio = pyr/pur; - } -} - -/* write ML distance matrix to file */ -void putdistance(FILE *fp) /* mod CZ 05/19/01 */ -{ - int i, j; - - fprintf(fp, " %d\n", Maxspc); - for (i = 0; i < Maxspc; i++) { - fputid10(fp, i); - for (j = 0; j < Maxspc; j++) { - fprintf(fp, " %.5f", Distanmat[i][j]/100.0); - } - fprintf(fp, "\n"); - } -} - - -/* find identical sequences */ -void findidenticals(FILE *fp) -{ - int i, j, noids; - cvector useqs; - - useqs = new_cvector(Maxspc); - - for (i = 0; i < Maxspc; i++) - useqs[i] = 0; - - noids = TRUE; - for (i = 0; i < Maxspc && noids; i++) - for (j = i + 1; j < Maxspc && noids; j++) - if (Distanmat[i][j] == 0.0) noids = FALSE; - - if (noids) - fprintf(fp, " All sequences are unique.\n"); - else { - for (i = 0; i < Maxspc; i++) { - noids = TRUE; - for (j = i + 1; j < Maxspc && noids; j++) - if (Distanmat[i][j] == 0.0) noids = FALSE; - - if (!noids && useqs[i] == 0) { - fputid(fp, i); - useqs[i] = 1; - for (j = i + 1; j < Maxspc; j++) - if (Distanmat[i][j] == 0.0) { - fprintf(fp, ", "); - fputid(fp, j); - useqs[j] = 1; - } - fprintf(fp, ".\n"); - } - } - } - free_cvector(useqs); -} - -/* compute average distance */ -double averagedist() -{ - int i, j; - double sum; - - sum = 0.0; - for (i = 0; i < Maxspc; i++) - for (j = i + 1; j < Maxspc; j++) - sum = sum + Distanmat[i][j]; - - sum = sum / (double) Maxspc / ((double) Maxspc - 1.0) * 2.0; - - return sum; -} - -/* first lines of EPSF likelihood mapping file */ -void initps(FILE *ofp) -{ - fprintf(ofp, "%%!PS-Adobe-3.0 EPSF-3.0\n"); - fprintf(ofp, "%%%%BoundingBox: 60 210 550 650\n"); - fprintf(ofp, "%%%%Pages: 1\n"); -# ifndef ALPHA - fprintf(ofp, "%%%%Creator: %s (version %s)\n", PACKAGE, VERSION); -# else - fprintf(ofp, "%%%%Creator: %s (version %s%s)\n", PACKAGE, VERSION, ALPHA); -# endif - fprintf(ofp, "%%%%Title: Likelihood Mapping Analysis\n"); - fprintf(ofp, "%%%%CreationDate: %s", asctime(localtime(&Starttime)) ); - fprintf(ofp, "%%%%DocumentFonts: Helvetica\n"); - fprintf(ofp, "%%%%DocumentNeededFonts: Helvetica\n"); - fprintf(ofp, "%%%%EndComments\n"); - fprintf(ofp, "%% use inch as unit\n"); - fprintf(ofp, "/inch {72 mul} def\n"); - fprintf(ofp, "%% triangle side length (3 inch)\n"); - fprintf(ofp, "/tl {3 inch mul} def\n"); - fprintf(ofp, "%% plot one dot (x-y coordinates on stack)\n"); - fprintf(ofp, "/dot {\n"); - fprintf(ofp, "newpath\n"); - fprintf(ofp, "0.002 tl 0 360 arc %% radius is 0.002 of the triangle length\n"); - fprintf(ofp, "closepath\n"); - fprintf(ofp, "fill\n"); - fprintf(ofp, "} def\n"); - fprintf(ofp, "%% preamble\n"); - fprintf(ofp, "/Helvetica findfont\n"); - fprintf(ofp, "12 scalefont\n"); - fprintf(ofp, "setfont\n"); - fprintf(ofp, "%% 0/0 for triangle of triangles\n"); - fprintf(ofp, "0.9 inch 3 inch translate\n"); - fprintf(ofp, "%% first triangle (the one with dots)\n"); - fprintf(ofp, "0.6 tl 1.2 tl 0.8660254038 mul translate\n"); - fprintf(ofp, "newpath\n"); - fprintf(ofp, " 0.0 tl 0.0 tl moveto\n"); - fprintf(ofp, " 1.0 tl 0.0 tl lineto\n"); - fprintf(ofp, " 0.5 tl 0.8660254038 tl lineto\n"); - fprintf(ofp, "closepath\n"); - fprintf(ofp, "stroke\n"); -} - -/* plot one point of likelihood mapping analysis */ -void plotlmpoint(FILE *ofp, double w1, double w2) -{ - fprintf(ofp,"%.10f tl %.10f tl dot\n", - 0.5*w1 + w2, w1*0.8660254038); -} - -/* last lines of EPSF likelihood mapping file */ -void finishps(FILE *ofp) -{ - fprintf(ofp, "stroke\n"); - fprintf(ofp, "%% second triangle (the one with 3 basins)\n"); - fprintf(ofp, "/secondtriangle {\n"); - fprintf(ofp, "newpath\n"); - fprintf(ofp, " 0.0 tl 0.0 tl moveto\n"); - fprintf(ofp, " 1.0 tl 0.0 tl lineto\n"); - fprintf(ofp, " 0.5 tl 0.8660254038 tl lineto\n"); - fprintf(ofp, "closepath\n"); - fprintf(ofp, "stroke\n"); - fprintf(ofp, "newpath\n"); - fprintf(ofp, " 0.50 tl 0.2886751346 tl moveto\n"); - fprintf(ofp, " 0.50 tl 0.0000000000 tl lineto\n"); - fprintf(ofp, "stroke\n"); - fprintf(ofp, "newpath\n"); - fprintf(ofp, " 0.50 tl 0.2886751346 tl moveto\n"); - fprintf(ofp, " 0.25 tl 0.4330127019 tl lineto\n"); - fprintf(ofp, "stroke\n"); - fprintf(ofp, "newpath\n"); - fprintf(ofp, " 0.50 tl 0.2886751346 tl moveto\n"); - fprintf(ofp, " 0.75 tl 0.4330127019 tl lineto\n"); - fprintf(ofp, "stroke\n"); - fprintf(ofp, "0.44 tl 0.5 tl moveto %% up\n"); - fprintf(ofp, "(%.1f%%) show\n", (double) ar1*100.0/Numquartets); - fprintf(ofp, "0.25 tl 0.15 tl moveto %% down left\n"); - fprintf(ofp, "(%.1f%%) show\n", (double) ar3*100.0/Numquartets); - fprintf(ofp, "0.63 tl 0.15 tl moveto %% down right\n"); - fprintf(ofp, "(%.1f%%) show\n", (double) ar2*100.0/Numquartets); - fprintf(ofp, "} def\n"); - fprintf(ofp, "%% third triangle (the one with 7 basins)\n"); - fprintf(ofp, "/thirdtriangle {\n"); - fprintf(ofp, "newpath\n"); - fprintf(ofp, " 0.0 tl 0.0 tl moveto\n"); - fprintf(ofp, " 1.0 tl 0.0 tl lineto\n"); - fprintf(ofp, " 0.5 tl 0.8660254038 tl lineto\n"); - fprintf(ofp, "closepath\n"); - fprintf(ofp, "stroke\n"); - fprintf(ofp, "newpath\n"); - fprintf(ofp, " 0.25 tl 0.1443375673 tl moveto\n"); - fprintf(ofp, " 0.75 tl 0.1443375673 tl lineto\n"); - fprintf(ofp, " 0.50 tl 0.5773502692 tl lineto\n"); - fprintf(ofp, "closepath\n"); - fprintf(ofp, "stroke\n"); - fprintf(ofp, "newpath\n"); - fprintf(ofp, " 0.125 tl 0.2165063509 tl moveto\n"); - fprintf(ofp, " 0.250 tl 0.1443375673 tl lineto\n"); - fprintf(ofp, "stroke\n"); - fprintf(ofp, "newpath\n"); - fprintf(ofp, " 0.375 tl 0.6495190528 tl moveto\n"); - fprintf(ofp, " 0.500 tl 0.5773502692 tl lineto\n"); - fprintf(ofp, "stroke\n"); - fprintf(ofp, "newpath\n"); - fprintf(ofp, " 0.625 tl 0.6495190528 tl moveto\n"); - fprintf(ofp, " 0.500 tl 0.5773502692 tl lineto\n"); - fprintf(ofp, "stroke\n"); - fprintf(ofp, "newpath\n"); - fprintf(ofp, " 0.875 tl 0.2165063509 tl moveto\n"); - fprintf(ofp, " 0.750 tl 0.1443375673 tl lineto\n"); - fprintf(ofp, "stroke\n"); - fprintf(ofp, "newpath\n"); - fprintf(ofp, " 0.750 tl 0.00 tl moveto\n"); - fprintf(ofp, " 0.750 tl 0.1443375673 tl lineto\n"); - fprintf(ofp, "stroke\n"); - fprintf(ofp, "newpath\n"); - fprintf(ofp, " 0.250 tl 0.00 tl moveto\n"); - fprintf(ofp, " 0.250 tl 0.1443375673 tl lineto\n"); - fprintf(ofp, "stroke\n"); - fprintf(ofp, "0.42 tl 0.66 tl moveto %% up\n"); - fprintf(ofp, "(%.1f%%) show\n", (double) reg1*100.0/Numquartets); - fprintf(ofp, "0.07 tl 0.05 tl moveto %% down left\n"); - fprintf(ofp, "(%.1f%%) show\n", (double) reg3*100.0/Numquartets); - fprintf(ofp, "0.77 tl 0.05 tl moveto %% down right\n"); - fprintf(ofp, "(%.1f%%) show\n", (double) reg2*100.0/Numquartets); - fprintf(ofp, "0.43 tl 0.05 tl moveto %% down side\n"); - fprintf(ofp, "(%.1f%%) show\n", (double) reg5*100.0/Numquartets); - fprintf(ofp, "0.43 tl 0.28 tl moveto %% center\n"); - fprintf(ofp, "(%.1f%%) show\n", (double) reg7*100.0/Numquartets); - fprintf(ofp, "gsave\n"); - fprintf(ofp, "-60 rotate\n"); - fprintf(ofp, "-0.07 tl 0.77 tl moveto %% right side\n"); - fprintf(ofp, "(%.1f%%) show\n", (double) reg4*100.0/Numquartets); - fprintf(ofp, "grestore\n"); - fprintf(ofp, "gsave\n"); - fprintf(ofp, "60 rotate\n"); - fprintf(ofp, "0.4 tl -0.09 tl moveto %% left side\n"); - fprintf(ofp, "(%.1f%%) show\n", (double) reg6*100.0/Numquartets); - fprintf(ofp, "grestore\n"); - fprintf(ofp, "} def\n"); - fprintf(ofp, "%% print the other two triangles\n"); - fprintf(ofp, "-0.6 tl -1.2 tl 0.8660254038 mul translate\n"); - fprintf(ofp, "secondtriangle\n"); - fprintf(ofp, "1.2 tl 0 translate\n"); - fprintf(ofp, "thirdtriangle\n"); - if (numclust == 4) { /* four cluster analysis */ - fprintf(ofp, "%% label corners\n"); - fprintf(ofp, "0.375 tl 0.9 tl moveto\n"); - fprintf(ofp, "((a,b)-(c,d)) show %% CHANGE HERE IF NECESSARY\n"); - fprintf(ofp, "-0.16 tl -0.08 tl moveto\n"); - fprintf(ofp, "((a,d)-(b,c)) show %% CHANGE HERE IF NECESSARY\n"); - fprintf(ofp, "0.92 tl -0.08 tl moveto\n"); - fprintf(ofp, "((a,c)-(b,d)) show %% CHANGE HERE IF NECESSARY\n"); - } - if (numclust == 3) { /* three cluster analysis */ - fprintf(ofp, "%% label corners\n"); - fprintf(ofp, "0.375 tl 0.9 tl moveto\n"); - fprintf(ofp, "((a,b)-(c,c)) show %% CHANGE HERE IF NECESSARY\n"); - fprintf(ofp, "-0.16 tl -0.08 tl moveto\n"); - fprintf(ofp, "((a,c)-(b,c)) show %% CHANGE HERE IF NECESSARY\n"); - fprintf(ofp, "0.92 tl -0.08 tl moveto\n"); - fprintf(ofp, "((a,c)-(b,c)) show %% CHANGE HERE IF NECESSARY\n"); - } - if (numclust == 2) { /* two cluster analysis */ - fprintf(ofp, "%% label corners\n"); - fprintf(ofp, "0.375 tl 0.9 tl moveto\n"); - fprintf(ofp, "((a,a)-(b,b)) show %% CHANGE HERE IF NECESSARY\n"); - fprintf(ofp, "-0.16 tl -0.08 tl moveto\n"); - fprintf(ofp, "((a,b)-(a,b)) show %% CHANGE HERE IF NECESSARY\n"); - fprintf(ofp, "0.92 tl -0.08 tl moveto\n"); - fprintf(ofp, "((a,b)-(a,b)) show %% CHANGE HERE IF NECESSARY\n"); - } - fprintf(ofp, "showpage\n"); - fprintf(ofp, "%%%%EOF\n"); -} - -/* computes LM point from the three log-likelihood values, - plots the point, and does some statistics */ -void makelmpoint(FILE *fp, double b1, double b2, double b3) -{ - double w1, w2, w3, temp; - unsigned char qpbranching; - double temp1, temp2, temp3, onethird; - unsigned char discreteweight[3], treebits[3]; - - onethird = 1.0/3.0; - treebits[0] = (unsigned char) 1; - treebits[1] = (unsigned char) 2; - treebits[2] = (unsigned char) 4; - - /* sort in descending order */ - qweight[0] = b1; - qweight[1] = b2; - qweight[2] = b3; - sort3doubles(qweight, qworder); - - /* compute Bayesian weights */ - qweight[qworder[1]] = exp(qweight[qworder[1]]-qweight[qworder[0]]); - qweight[qworder[2]] = exp(qweight[qworder[2]]-qweight[qworder[0]]); - qweight[qworder[0]] = 1.0; - temp = qweight[0] + qweight[1] + qweight[2]; - qweight[0] = qweight[0]/temp; - qweight[1] = qweight[1]/temp; - qweight[2] = qweight[2]/temp; - - /* plot one point in likelihood mapping triangle */ - w1 = qweight[0]; - w2 = qweight[1]; - w3 = qweight[2]; - plotlmpoint(fp, w1, w2); - - /* check areas 1,2,3 */ - if (treebits[qworder[0]] == 1) ar1++; - else if (treebits[qworder[0]] == 2) ar2++; - else ar3++; - - /* check out regions 1,2,3,4,5,6,7 */ - - /* 100 distribution */ - temp1 = 1.0 - qweight[qworder[0]]; - sqdiff[0] = temp1*temp1 + - qweight[qworder[1]]*qweight[qworder[1]] + - qweight[qworder[2]]*qweight[qworder[2]]; - discreteweight[0] = treebits[qworder[0]]; - - /* 110 distribution */ - temp1 = 0.5 - qweight[qworder[0]]; - temp2 = 0.5 - qweight[qworder[1]]; - sqdiff[1] = temp1*temp1 + temp2*temp2 + - qweight[qworder[2]]*qweight[qworder[2]]; - discreteweight[1] = treebits[qworder[0]] + treebits[qworder[1]]; - - /* 111 distribution */ - temp1 = onethird - qweight[qworder[0]]; - temp2 = onethird - qweight[qworder[1]]; - temp3 = onethird - qweight[qworder[2]]; - sqdiff[2] = temp1 * temp1 + temp2 * temp2 + temp3 * temp3; - discreteweight[2] = (unsigned char) 7; - - /* sort in descending order */ - sort3doubles(sqdiff, sqorder); - - qpbranching = (unsigned char) discreteweight[sqorder[2]]; - - if (qpbranching == 1) { - reg1++; - if (w2 < w3) reg1l++; - else reg1r++; - } - if (qpbranching == 2) { - reg2++; - if (w1 < w3) reg2d++; - else reg2u++; - } - if (qpbranching == 4) { - reg3++; - if (w1 < w2) reg3d++; - else reg3u++; - } - if (qpbranching == 3) { - reg4++; - if (w1 < w2) reg4d++; - else reg4u++; - } - if (qpbranching == 6) { - reg5++; - if (w2 < w3) reg5l++; - else reg5r++; - } - if (qpbranching == 5) { - reg6++; - if (w1 < w3) reg6d++; - else reg6u++; - } - if (qpbranching == 7) reg7++; -} - -/* print tree statistics */ -void printtreestats(FILE *ofp) -{ - int i, j, besttree; - double bestlkl, difflkl, difflklps, temp, sum; - - /* find best tree */ - besttree = 0; - bestlkl = ulkl[0]; - for (i = 1; i < numutrees; i++) - if (ulkl[i] > bestlkl) { - besttree = i; - bestlkl = ulkl[i]; - } - - fprintf(ofp, "\n\nCOMPARISON OF USER TREES (NO CLOCK)\n\n"); - fprintf(ofp, "Tree log L difference S.E. Significantly worse\n"); - fprintf(ofp, "--------------------------------------------------------\n"); - for (i = 0; i < numutrees; i++) { - difflkl = ulkl[besttree]-ulkl[i]; - fprintf(ofp, "%2d %10.2f %8.2f ", i+1, ulkl[i], difflkl); - if (i == besttree) { - fprintf(ofp, " <----------------- best tree"); - } else { - /* compute variance of Log L differences over sites */ - difflklps = difflkl/(double)Maxsite; - sum = 0.0; - for (j = 0; j < Numptrn; j++) { - temp = allsites[besttree][j] - allsites[i][j] - difflklps; - sum += temp*temp*Weight[j]; - } - sum = sqrt(fabs(sum/(Maxsite-1.0)*Maxsite)); - fprintf(ofp, "%11.2f ", sum); - if (difflkl > 1.96*sum) - fprintf(ofp, "yes"); - else - fprintf(ofp, "no"); - } - fprintf(ofp, "\n"); - } - fprintf(ofp, "\nThis test (5%% significance) follows Kishino and Hasegawa (1989).\n"); - - if (compclock) { - - /* find best tree */ - besttree = 0; - bestlkl = ulklc[0]; - for (i = 1; i < numutrees; i++) - if (ulklc[i] > bestlkl) { - besttree = i; - bestlkl = ulklc[i]; - } - - fprintf(ofp, "\n\nCOMPARISON OF USER TREES (WITH CLOCK)\n\n"); - fprintf(ofp, "Tree log L difference S.E. Significantly worse\n"); - fprintf(ofp, "--------------------------------------------------------\n"); - for (i = 0; i < numutrees; i++) { - difflkl = ulklc[besttree]-ulklc[i]; - fprintf(ofp, "%2d %10.2f %8.2f ", i+1, ulklc[i], difflkl); - if (i == besttree) { - fprintf(ofp, " <----------------- best tree"); - } else { - /* compute variance of Log L differences over sites */ - difflklps = difflkl/(double)Maxsite; - sum = 0.0; - for (j = 0; j < Numptrn; j++) { - temp = allsitesc[besttree][j] - allsitesc[i][j] - difflklps; - sum += temp*temp*Weight[j]; - } - sum = sqrt(fabs(sum/(Maxsite-1.0)*Maxsite)); - fprintf(ofp, "%11.2f ", sum); - if (difflkl > 1.96*sum) - fprintf(ofp, "yes"); - else - fprintf(ofp, "no"); - } - fprintf(ofp, "\n"); - } - fprintf(ofp, "\nThis test (5%% significance) follows Kishino and Hasegawa (1989).\n"); - } -} - -/* time stamp */ -void timestamp(FILE* ofp) -{ - double timespan; - double cpuspan; - timespan = difftime(Stoptime, Starttime); - cpuspan = ((double) (Stopcpu - Startcpu) / CLOCKS_PER_SEC); - fprintf(ofp, "\n\nTIME STAMP\n\n"); - fprintf(ofp, "Date and time: %s", asctime(localtime(&Starttime)) ); - fprintf(ofp, "Runtime (excl. input) : %.0f seconds (= %.1f minutes = %.1f hours)\n", - timespan, timespan/60., timespan/3600.); - fprintf(ofp, "Runtime (incl. input) : %.0f seconds (= %.1f minutes = %.1f hours)\n", - fulltime, fulltime/60., fulltime/3600.); -#ifdef TIMEDEBUG - fprintf(ofp, "CPU time (incl. input): %.0f seconds (= %.1f minutes = %.1f hours)\n\n", - fullcpu, fullcpu/60., fullcpu/3600.); -#endif /* TIMEDEBUG */ - -} - -/* extern int bestrfound; */ - -/* write output file */ -void writeoutputfile(FILE *ofp, int part) -{ - int i, fail, df; - uli li; - double pval, delta; - - if ((part == WRITEPARAMS) || (part == WRITEALL)) { -# ifndef ALPHA - fprintf(ofp, "TREE-PUZZLE %s\n\n", VERSION); -# else - fprintf(ofp, "TREE-PUZZLE %s%s\n\n", VERSION, ALPHA); -# endif - - fprintf(ofp, "Input file name: %s\n",INFILE); - if (puzzlemode == USERTREE) fprintf(ofp, "User tree file name: %s\n",INTREE); - - - fprintf(ofp, "Type of analysis: "); - if (typ_optn == TREERECON_OPTN) fprintf(ofp, "tree reconstruction\n"); - if (typ_optn == LIKMAPING_OPTN) fprintf(ofp, "likelihood mapping\n"); - fprintf(ofp, "Parameter estimation: "); - if (approxp_optn) fprintf(ofp, "approximate (faster)\n"); - else fprintf(ofp, "accurate (slow)\n"); - if (!(puzzlemode == USERTREE && typ_optn == TREERECON_OPTN)) { - fprintf(ofp, "Parameter estimation uses: "); - if (qcalg_optn) - fprintf(ofp, "quartet sampling (for substitution process) + NJ tree (for rate variation)\n"); - else - fprintf(ofp, "neighbor-joining tree (for substitution process and rate variation)\n"); - } else { - fprintf(ofp, "Parameter estimation uses: "); - if (utree_optn) - fprintf(ofp, "1st user tree (for substitution process and rate variation)\n"); - else if (qcalg_optn) - fprintf(ofp, "quartet sampling (for substitution process) + NJ tree (for rate variation)\n"); - else - fprintf(ofp, "neighbor-joining tree (for substitution process and rate variation)\n"); - } - fprintf(ofp, "\nStandard errors (S.E.) are obtained by the curvature method.\n"); - fprintf(ofp, "The upper and lower bounds of an approximate 95%% confidence interval\n"); - fprintf(ofp, "for parameter or branch length x are x-1.96*S.E. and x+1.96*S.E.\n"); - fprintf(ofp, "\n\n"); - fprintf(ofp, "SEQUENCE ALIGNMENT\n\n"); - fprintf(ofp, "Input data: %d sequences with %d ", Maxspc, Maxsite); - if (data_optn == AMINOACID) - fprintf(ofp, "amino acid"); - else if (data_optn == NUCLEOTIDE && SH_optn) - fprintf(ofp, "doublet (%d nucleotide)", Maxsite*2); - else if (data_optn == NUCLEOTIDE && nuc_optn) - fprintf(ofp, "nucleotide"); - else if (data_optn == BINARY) - fprintf(ofp, "binary state"); - fprintf(ofp, " sites"); - if (data_optn == NUCLEOTIDE && (Maxseqc % 3) == 0 && !SH_optn) { - if (codon_optn == 1) fprintf(ofp, " (1st codon positions)"); - if (codon_optn == 2) fprintf(ofp, " (2nd codon positions)"); - if (codon_optn == 3) fprintf(ofp, " (3rd codon positions)"); - if (codon_optn == 4) fprintf(ofp, " (1st and 2nd codon positions)"); - } - if (data_optn == NUCLEOTIDE && SH_optn) { - if (SHcodon) - fprintf(ofp, " (1st and 2nd codon positions)"); - else - fprintf(ofp, " (1st+2nd, 3rd+4th, etc. site)"); - } - fprintf(ofp, "\n"); - fprintf(ofp, "Number of constant sites: %d (= %.1f%% of all sites)\n", - Numconst, 100.0*fracconst); - fprintf(ofp, "Number of site patterns: %d\n", - Numptrn); - fprintf(ofp, "Number of constant site patterns: %d (= %.1f%% of all site patterns)\n\n\n", - Numconstpat, 100.0*fracconstpat); - fprintf(ofp, "SUBSTITUTION PROCESS\n\n"); - fprintf(ofp, "Model of substitution: "); - if (data_optn == NUCLEOTIDE) { /* nucleotides */ - if (nuc_optn) { - if(HKY_optn) fprintf(ofp, "HKY (Hasegawa et al. 1985)\n"); - else fprintf(ofp, "TN (Tamura-Nei 1993)\n"); - fprintf(ofp, "Transition/transversion parameter"); - if (optim_optn) - fprintf(ofp, " (estimated from data set)"); - fprintf(ofp, ": %.2f", TSparam); - if (optim_optn) - fprintf(ofp, " (S.E. %.2f)", tserr); - fprintf(ofp, "\n"); - - if (optim_optn && TSparam > MAXTS - 1.0) - fprintf(ofp, "WARNING --- parameter estimate close to internal upper bound!\n"); - if (optim_optn && TSparam < MINTS + 0.1) - fprintf(ofp, "WARNING --- parameter estimate close to internal lower bound!\n"); - - if (TN_optn) { - fprintf(ofp, "Y/R transition parameter"); - if (optim_optn) - fprintf(ofp, " (estimated from data set)"); - fprintf(ofp, ": %.2f", YRparam); - if (optim_optn) - fprintf(ofp, " (S.E. %.2f)", yrerr); - fprintf(ofp, "\n"); - - if (optim_optn && YRparam > MAXYR - 0.5) - fprintf(ofp, "WARNING --- parameter estimate close to internal upper bound!\n"); - if (optim_optn && YRparam < MINYR + 0.1) - fprintf(ofp, "WARNING --- parameter estimate close to internal lower bound!\n"); - - } - } - if (SH_optn) { - fprintf(ofp, "SH (Schoeniger-von Haeseler 1994)\n"); - fprintf(ofp, "Transition/transversion parameter"); - if (optim_optn) fprintf(ofp, " (estimated from data set)"); - fprintf(ofp, ": %.2f\n", TSparam); - if (optim_optn) - fprintf(ofp, " (S.E. %.2f)", tserr); - fprintf(ofp, "\n"); - - if (optim_optn && TSparam > MAXTS - 1.0) - fprintf(ofp, "WARNING --- parameter estimate close to internal upper bound!\n"); - if (optim_optn && TSparam < MINTS + 0.1) - fprintf(ofp, "WARNING --- parameter estimate close to internal lower bound!\n"); - - } - } - if (data_optn == AMINOACID) { /* amino acids */ - if (Dayhf_optn) fprintf(ofp, "Dayhoff (Dayhoff et al. 1978)\n"); - if (Jtt_optn) fprintf(ofp, "JTT (Jones et al. 1992)\n"); - if (mtrev_optn) fprintf(ofp, "mtREV24 (Adachi-Hasegawa 1996)\n"); - if (cprev_optn) fprintf(ofp, "cpREV45 (Adachi et al. 2000)\n"); - if (blosum62_optn) fprintf(ofp, "BLOSUM 62 (Henikoff-Henikoff 1992)\n"); - if (vtmv_optn) fprintf(ofp, "VT (Mueller-Vingron 2000)\n"); - if (wag_optn) fprintf(ofp, "WAG (Whelan-Goldman 2000)\n"); - } - if (data_optn == BINARY) { /* binary states */ - fprintf(ofp, "Two-state model (Felsenstein 1981)\n"); - } - if (data_optn == AMINOACID) - fprintf(ofp, "Amino acid "); - else if (data_optn == NUCLEOTIDE && SH_optn) - fprintf(ofp, "Doublet "); - else if (data_optn == NUCLEOTIDE && nuc_optn) - fprintf(ofp, "Nucleotide "); - else if (data_optn == BINARY) - fprintf(ofp, "Binary state "); - fprintf(ofp, "frequencies ("); - if (Frequ_optn) fprintf(ofp, "estimated from data set"); - else fprintf(ofp, "user specified"); - if (data_optn == NUCLEOTIDE && SH_optn && sym_optn) - fprintf(ofp, " and symmetrized"); - fprintf(ofp, "):\n\n"); - for (i = 0; i < gettpmradix(); i++) - fprintf(ofp, " pi(%s) = %5.1f%%\n", - int2code(i), Freqtpm[i]*100); - if (data_optn == NUCLEOTIDE) { - fprintf(ofp, "\nExpected transition/transversion ratio: %.2f", - tstvratio); - if (tstvf84 == 0.0) fprintf(ofp, "\n"); - else fprintf(ofp, " (= F84 parameter)\n"); - fprintf(ofp, "Expected pyrimidine transition/purine transition"); - fprintf(ofp, " ratio: %.2f\n", yrtsratio); - if (tstvf84 != 0.0 && TN_optn) - fprintf(ofp, - "This TN model is equivalent to a F84 model (Felsenstein 1984).\n"); - } - fprintf(ofp, "\n\nRATE HETEROGENEITY\n\n"); - fprintf(ofp, "Model of rate heterogeneity: "); - if (rhetmode == UNIFORMRATE) fprintf(ofp, "uniform rate\n"); - if (rhetmode == GAMMARATE ) fprintf(ofp, "Gamma distributed rates\n"); - if (rhetmode == TWORATE ) fprintf(ofp, "two rates (1 invariable + 1 variable)\n"); - if (rhetmode == MIXEDRATE ) fprintf(ofp, "mixed (1 invariable + %d Gamma rates)\n", numcats); - if (rhetmode == TWORATE || rhetmode == MIXEDRATE) { - fprintf(ofp, "Fraction of invariable sites"); - if (fracinv_optim) fprintf(ofp, " (estimated from data set)"); - fprintf(ofp, ": %.2f", fracinv); - if (fracinv_optim) fprintf(ofp, " (S.E. %.2f)", fierr); - fprintf(ofp, "\n"); - - if (fracinv_optim && fracinv > MAXFI - 0.05) - fprintf(ofp, "WARNING --- parameter estimate close to internal upper bound!\n"); - - fprintf(ofp, "Number of invariable sites: %.0f\n", floor(fracinv*Maxsite)); - } - if (rhetmode == GAMMARATE || rhetmode == MIXEDRATE) { - fprintf(ofp, "Gamma distribution parameter alpha"); - if (grate_optim) fprintf(ofp, " (estimated from data set)"); - fprintf(ofp, ": %.2f", (1.0-Geta)/Geta); - if (grate_optim) fprintf(ofp, " (S.E. %.2f)", - geerr/(Geta*Geta)); /* first order approximation */ - fprintf(ofp, "\n"); - - if (grate_optim && Geta > MAXGE - 0.02) - fprintf(ofp, "WARNING --- parameter estimate close to internal upper bound!\n"); - if (grate_optim && Geta < MINGE + 0.01) - fprintf(ofp, "WARNING --- parameter estimate close to internal lower bound!\n"); - - fprintf(ofp, "Number of Gamma rate categories: %d\n", numcats); - } - if (rhetmode == MIXEDRATE) { - fprintf(ofp, "Total rate heterogeneity (invariable sites + Gamma model): "); - fprintf(ofp, "%.2f", fracinv + Geta - fracinv*Geta); - if (grate_optim && fracinv_optim) - fprintf(ofp, " (S.E. %.2f)", geerr + fierr); /* first order approximation */ - else if (grate_optim && !fracinv_optim) - fprintf(ofp, " (S.E. %.2f)", geerr); - else if (!grate_optim && fracinv_optim) - fprintf(ofp, " (S.E. %.2f)", fierr); - fprintf(ofp, "\n"); - } - if (rhetmode != UNIFORMRATE) { - fprintf(ofp, "\nRates and their respective probabilities used in the likelihood function:\n"); - fprintf(ofp, "\n Category Relative rate Probability\n"); - if (rhetmode == TWORATE || rhetmode == MIXEDRATE) - fprintf(ofp, " 0 0.0000 %.4f\n", fracinv); - for (i = 0; i < numcats; i++) - fprintf(ofp, " %d %.4f %.4f\n", - i+1, Rates[i], (1.0-fracinv)/(double) numcats); - } - if (rhetmode == GAMMARATE || rhetmode == MIXEDRATE) { - fprintf(ofp, "\nCategories 1-%d approximate a continous ", numcats); - fprintf(ofp, "Gamma-distribution with expectation 1\n"); - fprintf(ofp, "and variance "); - if (Geta == 1.0) fprintf(ofp, "infinity"); - else fprintf(ofp, "%.2f", Geta/(1.0-Geta)); - fprintf(ofp, ".\n"); - } - - if (typ_optn == TREERECON_OPTN && (puzzlemode == QUARTPUZ || puzzlemode == USERTREE)) - if (rhetmode != UNIFORMRATE) { - fprintf(ofp, "\nCombination of categories that contributes"); - fprintf(ofp, " the most to the likelihood\n"); - fprintf(ofp, "(computation done without clock assumption assuming "); - if (puzzlemode == QUARTPUZ) fprintf(ofp, "quartet-puzzling tree"); - if (puzzlemode == USERTREE) { - if (utree_optn) fprintf(ofp, "1st user tree"); - else fprintf(ofp, "NJ tree"); - } - fprintf(ofp, "):\n\n"); - if (bestratefound==0) findbestratecombination(); - printbestratecombination(ofp); - } - - fprintf(ofp, "\n\nSEQUENCE COMPOSITION (SEQUENCES IN INPUT ORDER)\n\n"); - fail = FALSE; - fprintf(ofp, " 5%% chi-square test p-value\n"); - for (i = 0; i < Maxspc; i++) { - fprintf(ofp, " "); - fputid10(ofp, i); - pval = homogentest(i); - if ( pval < 0.05 ) fprintf(ofp, " failed "); - else fprintf(ofp, " passed "); - if (chi2fail) fail = TRUE; - fprintf(ofp, " %6.2f%% ", pval*100.0); - fprintf(ofp, "\n"); - } - fprintf(ofp, "\n"); - fprintf(ofp, "The chi-square tests compares the "); - if (data_optn == AMINOACID) - fprintf(ofp, "amino acid"); - else if (data_optn == NUCLEOTIDE && SH_optn) - fprintf(ofp, "doublet"); - else if (data_optn == NUCLEOTIDE && nuc_optn) - fprintf(ofp, "nucleotide"); - else if (data_optn == BINARY) - fprintf(ofp, "binary state"); - fprintf(ofp," composition of each sequence\n"); - fprintf(ofp, "to the frequency distribution assumed in the maximum likelihood model.\n"); - if (fail) { - fprintf(ofp, "\nWARNING: Result of chi-square test may not be valid"); - fprintf(ofp, " because of small\nmaximum likelihood frequencies and"); - fprintf(ofp, " short sequence length!\n"); - } - fprintf(ofp, "\n\nIDENTICAL SEQUENCES\n\n"); - fprintf(ofp, "The sequences in each of the following groups are all identical. To speed\n"); - fprintf(ofp, "up computation please remove all but one of each group from the data set.\n\n"); - findidenticals(ofp); - fprintf(ofp, "\n\nMAXIMUM LIKELIHOOD DISTANCES\n\n"); - fprintf(ofp, "Maximum likelihood distances are computed using the "); - fprintf(ofp, "selected model of\nsubstitution and rate heterogeneity.\n\n"); - putdistance(ofp); - fprintf(ofp, "\nAverage distance (over all possible pairs of sequences): %.5f\n", - averagedist() / 100.0); - - - } /* if WRITEPARAMS) || WRITEALL */ - - if ((part == WRITEREST) || (part == WRITEALL)) { - - if (puzzlemode == QUARTPUZ &&typ_optn == TREERECON_OPTN) { - fprintf(ofp, "\n\nBAD QUARTET STATISTICS (SEQUENCES IN INPUT ORDER)\n\n"); - for (i = 0; i < Maxspc; i++) { - fprintf(ofp, " "); - fputid10(ofp, i); - if (badqs > 0) - fprintf(ofp, " [%lu] %6.2f%%\n", badtaxon[i], (double) (100 * badtaxon[i]) / (double) badqs); - else - fprintf(ofp, " [%lu]\n", badtaxon[i]); - } - fprintf(ofp, "\nThe number in square brackets indicates how often each sequence is\n"); - fprintf(ofp, "involved in one of the %lu completely unresolved quartets of the\n", badqs); - fprintf(ofp, "quartet puzzling tree search.\n"); - if (badqs > 0) - fprintf(ofp, "Additionally the according percentages are given.\n"); - } - - if (typ_optn == TREERECON_OPTN) { - - fprintf(ofp, "\n\nTREE SEARCH\n\n"); - if (puzzlemode == QUARTPUZ) { - fprintf(ofp, "Quartet puzzling is used to choose from the possible tree topologies\n"); - fprintf(ofp, "and to simultaneously infer support values for internal branches.\n\n"); - fprintf(ofp, "Number of puzzling steps: %lu\n", Numtrial); - fprintf(ofp, "Analysed quartets: %lu\n", Numquartets); - fprintf(ofp, "Unresolved quartets: %lu (= %.1f%%)\n", - badqs, (double) badqs / (double) Numquartets * 100); - fprintf(ofp, "\nQuartet trees are based on %s maximum likelihood values\n", - (approxqp ? "approximate" : "exact")); - fprintf(ofp, "using the selected model of substitution and rate heterogeneity.\n\n\n"); - } - if (puzzlemode == USERTREE) { - fprintf(ofp, "%d tree topologies were specified by the user.\n", numutrees); - } - if (puzzlemode == PAIRDIST) { - fprintf(ofp, "No tree search performed (maximum likelihood distances only).\n"); - } - - if (puzzlemode == QUARTPUZ) { - fprintf(ofp, "QUARTET PUZZLING TREE\n\n"); - fprintf(ofp, "Support for the internal branches of the unrooted quartet puzzling\n"); - fprintf(ofp, "tree topology is shown in percent.\n"); - if (consincluded == Maxspc - 3) - fprintf(ofp,"\nThis quartet puzzling tree is completely resolved.\n"); - else - fprintf(ofp,"\nThis quartet puzzling tree is not completely resolved!\n"); - fprintf(ofp, "\n\n"); - plotconsensustree(ofp); - fprintf(ofp, "\n\nQuartet puzzling tree (in CLUSTAL W notation):\n\n"); - writeconsensustree(ofp); - fprintf(ofp, "\n\nBIPARTITIONS\n\n"); - fprintf(ofp, "The following bipartitions occured at least once"); - fprintf(ofp, " in all intermediate\ntrees that have been generated "); - fprintf(ofp, "in the %lu puzzling steps:\n\n", Numtrial); - fprintf(ofp, "Bipartitions included in the quartet puzzling tree:\n"); - fprintf(ofp, - "(bipartition with sequences in input order : number of times seen)\n\n"); - for (li = 0; li < consincluded; li++) { - fprintf(ofp, " "); - printsplit(ofp, splitfreqs[2*li+1]); - fprintf(ofp, " : %lu\n", splitfreqs[2*li]); - } - if (consincluded == 0) fprintf(ofp, " None (no bipartition included)\n"); - fprintf(ofp, "\nBipartitions not included in the quartet puzzling tree:\n"); - fprintf(ofp, - "(bipartition with sequences in input order : number of times seen)\n\n"); - - if (consincluded == numbiparts) { - fprintf(ofp, " None (all bipartitions are included)\n"); - } else { - /* print first 20 bipartions not included */ - for (li = consincluded; (li < numbiparts) && (li < consincluded + 20UL); li++) { - fprintf(ofp, " "); - printsplit(ofp, splitfreqs[2*li+1]); - fprintf(ofp, " : %lu\n", splitfreqs[2*li]); - } - if ((li == consincluded + 20UL) && (li != numbiparts)) - fprintf(ofp, "\n(%lu other less frequent bipartitions not shown)\n", - numbiparts - consincluded - 20UL); - } - fprintfsortedpstrees(ofp, psteptreelist, psteptreenum, psteptreesum, 0, 5.0); - } - - if (puzzlemode == QUARTPUZ) { - fprintf(ofp, "\n\nMAXIMUM LIKELIHOOD BRANCH LENGTHS ON QUARTET"); - fprintf(ofp, " PUZZLING TREE (NO CLOCK)\n\nBranch lengths are computed using"); - fprintf(ofp, " the selected model of\nsubstitution and rate heterogeneity.\n\n\n"); - clockmode = 0; /* nonclocklike branch lengths */ - prtopology(ofp); - fprintf(ofp, "\n"); - resulttree(ofp); - fprintf(ofp, "\n\nQuartet puzzling tree with maximum likelihood branch lengths"); - fprintf(ofp, "\n(in CLUSTAL W notation):\n\n"); - fputphylogeny(ofp); - if (compclock) { - fprintf(ofp, "\n\nMAXIMUM LIKELIHOOD BRANCH LENGTHS OF QUARTET"); - fprintf(ofp, " PUZZLING TREE (WITH CLOCK)\n\nBranch lengths are computed using"); - fprintf(ofp, " the selected model of\nsubstitution and rate heterogeneity.\n"); - fprintf(ofp, "\nRoot located at branch: %d ", locroot+1); - if (rootsearch == 0) fprintf(ofp, "(user specified)\n\n\n"); - if (rootsearch == 1) { - fprintf(ofp, "(automatic search)"); - if (numbestroot > 1) fprintf(ofp, "- WARNING: %d best locations found! -", numbestroot); - fprintf(ofp, "\n\n"); - fprintf(ofp, "If the automatic search misplaces the root please rerun the analysis\n"); - fprintf(ofp, "(rename \"outtree\" to \"intree\") and select location of root manually!"); - fprintf(ofp, "\n\n\n"); - } - if (rootsearch == 2) fprintf(ofp, "(displayed outgroup)\n\n\n"); - clockmode = 1; /* clocklike branch lengths */ - prtopology(ofp); - fprintf(ofp, "\n"); - fprintf(ofp, "\nTree drawn as unrooted tree for better "); - fprintf(ofp, "comparison with non-clock tree!\n"); - resulttree(ofp); - fprintf(ofp, "\n"); - resultheights(ofp); - fprintf(ofp, "\n\nRooted quartet puzzling tree with clocklike"); - fprintf(ofp, " maximum likelihood branch lengths\n"); - fprintf(ofp, "(in CLUSTAL W notation):\n\n"); - fputrooted(ofp, locroot); - } - - if (compclock) { - fprintf(ofp, "\n\nMOLECULAR CLOCK LIKELIHOOD RATIO TEST\n\n"); - fprintf(ofp, "log L without clock: %.2f (independent branch parameters: %d)\n", - Ctree->lklhd, Numspc + Numibrnch); - fprintf(ofp, "log L with clock: %.2f (independent branch parameters: %d)\n\n", - Ctree->lklhdc, Numhts + 1); - delta = 2.0*((Ctree->lklhd) - (Ctree->lklhdc)); - fprintf(ofp, "Likelihood ratio test statistic delta: %.2f\n", delta); - df = Numspc + Numibrnch - Numhts - 1; - fprintf(ofp, "Degress of freedom of chi-square distribution: %d\n", df); - - pval = IncompleteGammaQ(df*0.5, delta*0.5); - - fprintf(ofp, "Critical significance level: %.2f%%\n\n", pval*100.0); - if (pval >= 0.05) { - fprintf(ofp, "The simpler (clocklike) tree can not be rejected on a significance\n"); - fprintf(ofp, "level of 5%%. The log-likelihood of the more complex (no clock) tree\n"); - fprintf(ofp, "is not significantly increased.\n"); - } else { - fprintf(ofp, "The simpler (clocklike) tree is rejected on a significance level\n"); - fprintf(ofp, "of 5%%. The log-likelihood of the more complex (no clock) tree is\n"); - fprintf(ofp, "significantly increased.\n"); - } - fprintf(ofp, "\nPlease take care that the correct root is used!\n"); - } - - } - } - - if (typ_optn == LIKMAPING_OPTN) { - - fprintf(ofp, "\n\nLIKELIHOOD MAPPING ANALYSIS\n\n"); - fprintf(ofp, "Number of quartets: %lu", Numquartets); - if (lmqts == 0) fprintf(ofp, " (all possible)\n"); - else fprintf(ofp, " (random choice)\n"); - fprintf(ofp, "\nQuartet trees are based on approximate maximum likelihood values\n"); - fprintf(ofp, "using the selected model of substitution and rate heterogeneity.\n\n\n"); - if (numclust == 1) { - fprintf(ofp, "Sequences are not grouped in clusters.\n"); - } else { - fprintf(ofp, "Sequences are grouped in %d clusters.\n", numclust); - fprintf(ofp, "\nCluster a: %d sequences\n\n", clustA); - for (i = 0; i < clustA; i++) { - fprintf(ofp, " "); - fputid(ofp, clusterA[i]); - fprintf(ofp, "\n"); - } - fprintf(ofp, "\nCluster b: %d sequences\n\n", clustB); - for (i = 0; i < clustB; i++) { - fprintf(ofp, " "); - fputid(ofp, clusterB[i]); - fprintf(ofp, "\n"); - } - if (numclust > 2) { - fprintf(ofp, "\nCluster c: %d sequences\n\n", clustC); - for (i = 0; i < clustC; i++) { - fprintf(ofp, " "); - fputid(ofp, clusterC[i]); - fprintf(ofp, "\n"); - } - } - if (numclust == 4) { - fprintf(ofp, "\nCluster d: %d sequences\n\n", clustD); - for (i = 0; i < clustD; i++) { - fprintf(ofp, " "); - fputid(ofp, clusterD[i]); - fprintf(ofp, "\n"); - } - } - fprintf(ofp, "\nQuartets of sequences used in the likelihood"); - fprintf(ofp, " mapping analysis are generated\n"); - if (numclust == 2) - fprintf(ofp, "by drawing two sequences from cluster a and two from cluster b."); - if (numclust == 3) - fprintf(ofp, "by drawing one sequence from clusters a and b and two from cluster c."); - if (numclust == 4) - fprintf(ofp, "by drawing one sequence from each of the clusters a, b, c, and d."); - } - - fprintf(ofp, "\n\nLIKELIHOOD MAPPING STATISTICS\n\n"); - fprintf(ofp, "Occupancies of the three areas 1, 2, 3:\n\n"); - if (numclust == 4) - fprintf(ofp, " (a,b)-(c,d)\n"); - if (numclust == 3) - fprintf(ofp, " (a,b)-(c,c)\n"); - if (numclust == 2) - fprintf(ofp, " (a,a)-(b,b)\n"); - fprintf(ofp, " /\\\n"); - fprintf(ofp, " / \\\n"); - fprintf(ofp, " / \\\n"); - fprintf(ofp, " / 1 \\\n"); - fprintf(ofp, " / \\ / \\\n"); - fprintf(ofp, " / \\ / \\\n"); - fprintf(ofp, " / \\/ \\\n"); - fprintf(ofp, " / 3 : 2 \\\n"); - fprintf(ofp, " / : \\\n"); - fprintf(ofp, " /__________________\\\n"); - if (numclust == 4) - fprintf(ofp, " (a,d)-(b,c) (a,c)-(b,d)\n"); - if (numclust == 3) - fprintf(ofp, " (a,c)-(b,c) (a,c)-(b,c)\n"); - if (numclust == 2) - fprintf(ofp, " (a,b)-(a,b) (a,b)-(a,b)\n"); - fprintf(ofp, "\n"); - fprintf(ofp, "Number of quartets in region 1: %lu (= %.1f%%)\n", - ar1, (double) ar1*100.0/Numquartets); - fprintf(ofp, "Number of quartets in region 2: %lu (= %.1f%%)\n", - ar2, (double) ar2*100.0/Numquartets); - fprintf(ofp, "Number of quartets in region 3: %lu (= %.1f%%)\n\n", - ar3, (double) ar3*100.0/Numquartets); - fprintf(ofp, "Occupancies of the seven areas 1, 2, 3, 4, 5, 6, 7:\n\n"); - if (numclust == 4) - fprintf(ofp, " (a,b)-(c,d)\n"); - if (numclust == 3) - fprintf(ofp, " (a,b)-(c,c)\n"); - if (numclust == 2) - fprintf(ofp, " (a,a)-(b,b)\n"); - fprintf(ofp, " /\\\n"); - fprintf(ofp, " / \\\n"); - fprintf(ofp, " / 1 \\\n"); - fprintf(ofp, " / \\ / \\\n"); - fprintf(ofp, " / /\\ \\\n"); - fprintf(ofp, " / 6 / \\ 4 \\\n"); - fprintf(ofp, " / / 7 \\ \\\n"); - fprintf(ofp, " / \\ /______\\ / \\\n"); - fprintf(ofp, " / 3 : 5 : 2 \\\n"); - fprintf(ofp, " /__________________\\\n"); - if (numclust == 4) - fprintf(ofp, " (a,d)-(b,c) (a,c)-(b,d)\n"); - if (numclust == 3) - fprintf(ofp, " (a,c)-(b,c) (a,c)-(b,c)\n"); - if (numclust == 2) - fprintf(ofp, " (a,b)-(a,b) (a,b)-(a,b)\n"); - fprintf(ofp, "\n"); - fprintf(ofp, "Number of quartets in region 1: %lu (= %.1f%%) left: %lu right: %lu\n", - reg1, (double) reg1*100.0/Numquartets, reg1l, reg1r); - fprintf(ofp, "Number of quartets in region 2: %lu (= %.1f%%) bottom: %lu top: %lu\n", - reg2, (double) reg2*100.0/Numquartets, reg2d, reg2u); - fprintf(ofp, "Number of quartets in region 3: %lu (= %.1f%%) bottom: %lu top: %lu\n", - reg3, (double) reg3*100.0/Numquartets, reg3d, reg3u); - fprintf(ofp, "Number of quartets in region 4: %lu (= %.1f%%) bottom: %lu top: %lu\n", - reg4, (double) reg4*100.0/Numquartets, reg4d, reg4u); - fprintf(ofp, "Number of quartets in region 5: %lu (= %.1f%%) left: %lu right: %lu\n", - reg5, (double) reg5*100.0/Numquartets, reg5l, reg5r); - fprintf(ofp, "Number of quartets in region 6: %lu (= %.1f%%) bottom: %lu top: %lu\n", - reg6, (double) reg6*100.0/Numquartets, reg6d, reg6u); - fprintf(ofp, "Number of quartets in region 7: %lu (= %.1f%%)\n", - reg7, (double) reg7*100.0/Numquartets); - } - - } /* if WRITEREST) || WRITEALL */ -} - - -#if PARALLEL -void writetimesstat(FILE *ofp) -{ - int n; - double cpusum = 0.0; - double wallmax = 0.0; - cputimes[0] = ((double)(cputimestop - cputimestart) / CLOCKS_PER_SEC); - walltimes[0] = difftime(walltimestop, walltimestart); - fullcpu = tarr.fullcpu; - fulltime = tarr.fulltime; - fullcputimes[0] = tarr.fullcpu; - fullwalltimes[0] = tarr.fulltime; - altcputimes[0] = tarr.cpu; - altwalltimes[0] = tarr.time; - fprintf(ofp, "\n\n\nPARALLEL LOAD STATISTICS\n\n"); - - fprintf(ofp, "The analysis was performed with %d parallel processes (1 master and \n", PP_NumProcs); - fprintf(ofp, "%d worker processes).\n\n", PP_NumProcs-1); - fprintf(ofp, "The following table the distribution of computation to the processes.\n"); - fprintf(ofp, "The first column gives the process number, where 0 is the master process.\n"); - fprintf(ofp, "The second and third column show the number of quartets computed (3 topologies \n"); - fprintf(ofp, "each) and the the number of scheduling blocks the came in. The last two columns \n"); - fprintf(ofp, "state the number of puzzling steps done by a process and number of scheduling \n"); - fprintf(ofp, "blocks.\n\n"); - fprintf(ofp, "process #quartets #chunks #puzzlings #chunks \n"); - fprintf(ofp, "-----------------------------------------------\n"); - for (n=0; n wallmax) wallmax=fullwalltimes[n]; - cpusum += fullcputimes[n]; - } /* for */ - fprintf(ofp, "----------------------------------------------------------------------------\n"); - fprintf(ofp, "Sum/Max: %11.1f %9.1f %9.1f | %11.1f %9.1f %9.1f \n", - cpusum, cpusum/60, cpusum/3600, wallmax, wallmax/60, wallmax/3600); -#else /* TIMEDEBUG */ - fprintf(ofp, "\n\nBelow the distribution of computing times (wallclock) per host is shown.\n"); - fprintf(ofp, "The times are shown in seconds, minutes, and hours. At the bottom of the table the\n"); - fprintf(ofp, "the maximum wallclock times is shown.\n\n"); - fprintf(ofp, "process wallclock[s] [min] [hours] \n"); - fprintf(ofp, "----------------------------------------------------------------------------\n"); - for (n=0; n wallmax) wallmax=fullwalltimes[n]; - cpusum += fullcputimes[n]; - } /* for */ - fprintf(ofp, "----------------------------------------------------------------------------\n"); - fprintf(ofp, "Sum/Max: %11.1f %9.1f %9.1f \n", - wallmax, wallmax/60, wallmax/3600); -#endif /* TIMEDEBUG */ - - fullcpu = cpusum; - fulltime = wallmax; - -} /* writetimesstat */ -#endif - - -/* write current user tree to file */ -void writecutree(FILE *ofp, int num) -{ - int df; - double pval, delta; - - - if (typ_optn == TREERECON_OPTN) { - - if (puzzlemode == USERTREE) { - fprintf(ofp, "\n\nMAXIMUM LIKELIHOOD BRANCH LENGTHS OF USER"); - fprintf(ofp, " DEFINED TREE # %d (NO CLOCK)\n\nBranch lengths are computed using", num); - fprintf(ofp, " the selected model of\nsubstitution and rate heterogeneity.\n\n\n"); - clockmode = 0; /* nonclocklike branch lengths */ - prtopology(ofp); - fprintf(ofp, "\n"); - resulttree(ofp); - fprintf(ofp, "\n\nUnrooted user defined tree with maximum likelihood branch lengths"); - fprintf(ofp, "\n(in CLUSTAL W notation):\n\n"); - fputphylogeny(ofp); - if (compclock) { - fprintf(ofp, "\n\nMAXIMUM LIKELIHOOD BRANCH LENGTHS OF USER"); - fprintf(ofp, " DEFINED TREE # %d (WITH CLOCK)\n\nBranch lengths are computed using", num); - fprintf(ofp, " the selected model of\nsubstitution and rate heterogeneity.\n"); - fprintf(ofp, "\nRoot located at branch: %d ", locroot+1); - if (rootsearch == 0) fprintf(ofp, "(user specified)\n\n\n"); - if (rootsearch == 1) { - fprintf(ofp, "(automatic search)"); - if (numbestroot > 1) fprintf(ofp, "- WARNING: %d best locations found! -", numbestroot); - fprintf(ofp, "\n\n"); - fprintf(ofp, "If the automatic search misplaces the root please rerun the analysis\n"); - fprintf(ofp, "and select location of root manually!"); - fprintf(ofp, "\n\n\n"); - - } - if (rootsearch == 2) fprintf(ofp, "(displayed outgroup)\n\n\n"); - clockmode = 1; /* clocklike branch lengths */ - prtopology(ofp); - fprintf(ofp, "\n"); - resulttree(ofp); - fprintf(ofp, "\n"); - resultheights(ofp); - fprintf(ofp, "\n\nRooted user defined tree with clocklike "); - fprintf(ofp, "maximum likelihood branch lengths\n"); - fprintf(ofp, "(in CLUSTAL W notation):\n\n"); - fputrooted(ofp, locroot); - } - - if (compclock) { - fprintf(ofp, "\n\nMOLECULAR CLOCK LIKELIHOOD RATIO TEST FOR USER TREE # %d\n\n", num); - fprintf(ofp, "log L without clock: %.2f (independent branch parameters: %d)\n", - Ctree->lklhd, Numspc + Numibrnch); - fprintf(ofp, "log L with clock: %.2f (independent branch parameters: %d)\n\n", - Ctree->lklhdc, Numhts + 1); - delta = 2.0*((Ctree->lklhd) - (Ctree->lklhdc)); - fprintf(ofp, "Likelihood ratio test statistic delta: %.2f\n", delta); - df = Numspc + Numibrnch - Numhts - 1; - fprintf(ofp, "Degrees of freedom of chi-square distribution: %d\n", df); - - pval = IncompleteGammaQ (df*0.5, delta*0.5); - - fprintf(ofp, "Critical significance level: %.2f%%\n\n", pval*100.0); - if (pval >= 0.05) { - fprintf(ofp, "The simpler (clocklike) tree can not be rejected on a significance\n"); - fprintf(ofp, "level of 5%%. The log-likelihood of the more complex (no clock) tree\n"); - fprintf(ofp, "is not significantly increased.\n"); - } else { - fprintf(ofp, "The simpler (clocklike) tree is rejected on a significance level\n"); - fprintf(ofp, "of 5%%. The log-likelihood of the more complex (no clock) tree is\n"); - fprintf(ofp, "significantly increased.\n"); - } - fprintf(ofp, "\nPlease take care that the correct root is used!\n"); - } - } - } -} - - -/******************************************************************************/ -/* timer routines */ -/******************************************************************************/ - -/* start timer */ -void starttimer() -{ - time(&time0); - time1 = time0; -} - -/* check remaining time and print message if necessary */ -void checktimer(uli numqts) -{ - double tc2, mintogo, minutes, hours; - - time(&time2); - if ( (time2 - time1) > 900) { /* generate message every 15 minutes */ - /* every 900 seconds */ - /* percentage of completed quartets */ - if (mflag == 0) { - mflag = 1; - FPRINTF(STDOUTFILE "\n"); - } - tc2 = 100.*numqts/Numquartets; - mintogo = (100.0-tc2) * - (double) (time2-time0)/60.0/tc2; - hours = floor(mintogo/60.0); - minutes = mintogo - 60.0*hours; - FPRINTF(STDOUTFILE "%.2f%%", tc2); - FPRINTF(STDOUTFILE " completed (remaining"); - FPRINTF(STDOUTFILE " time: %.0f", hours); - FPRINTF(STDOUTFILE " hours %.0f", minutes); - FPRINTF(STDOUTFILE " minutes)\n"); - fflush(STDOUT); - time1 = time2; - } - -} - -/* check remaining time and print message if necessary */ -void checktimer2(uli numqts, uli all, int flag) -{ - double tc2, mintogo, minutes, hours; - - static time_t tt1; - static time_t tt2; - - if (flag == 1) { - time(&tt1); - time(&tt2); - } else { - time(&tt2); - if ( (tt2 - tt1) > 900) { /* generate message every 15 minutes */ - /* every 900 seconds */ - /* percentage of completed quartets */ - if (mflag == 0) { - mflag = 1; - FPRINTF(STDOUTFILE "\n"); - } - tc2 = 100.*numqts/Numquartets; - mintogo = (100.0-tc2) * - (double) (tt2-time0)/60.0/tc2; - hours = floor(mintogo/60.0); - minutes = mintogo - 60.0*hours; - FPRINTF(STDOUTFILE "%.2f%%", tc2); - FPRINTF(STDOUTFILE " completed (remaining"); - FPRINTF(STDOUTFILE " time: %.0f", hours); - FPRINTF(STDOUTFILE " hours %.0f", minutes); - FPRINTF(STDOUTFILE " minutes)\n"); - fflush(STDOUT); - tt1 = tt2; - } - } -} - -void resetqblocktime(timearray_t *ta) -{ - ta->quartcpu += ta->quartblockcpu; - ta->quartblockcpu = 0.0; - ta->quarttime += ta->quartblocktime; - ta->quartblocktime = 0.0; -} /* resetqblocktime */ - - -void resetpblocktime(timearray_t *ta) -{ - ta->puzzcpu += ta->puzzblockcpu; - ta->puzzblockcpu = 0.0; - ta->puzztime += ta->puzzblocktime; - ta->puzzblocktime = 0.0; -} /* resetpblocktime */ - - -#ifdef TIMEDEBUG -void printtimearr(timearray_t *ta) -{ -# if ! PARALLEL - int PP_Myid; - PP_Myid = -1; -# endif - printf("(%2d) MMCPU: %11ld / %11ld \n", PP_Myid, ta->maxcpu, ta->mincpu); - printf("(%2d) CTick: %11.6f [tks] / %11.6f [s] \n", PP_Myid, ta->mincputick, ta->mincputicktime); - - printf("(%2d) MMTIM: %11ld / %11ld \n", PP_Myid, ta->maxtime, ta->mintime); - - printf("(%2d) Mxblk: %11.6e / %11.6e \n", PP_Myid, ta->maxcpublock, ta->maxtimeblock); - printf("(%2d) Mnblk: %11.6e / %11.6e \n", PP_Myid, ta->mincpublock, ta->mintimeblock); - - printf("(%2d) Gnrl: %11.6e / %11.6e \n", PP_Myid, ta->generalcpu, ta->generaltime); - printf("(%2d) Optn: %11.6e / %11.6e \n", PP_Myid, ta->optionscpu, ta->optionstime); - printf("(%2d) Estm: %11.6e / %11.6e \n", PP_Myid, ta->paramestcpu, ta->paramesttime); - printf("(%2d) Qurt: %11.6e / %11.6e \n", PP_Myid, ta->quartcpu, ta->quarttime); - printf("(%2d) QBlk: %11.6e / %11.6e \n", PP_Myid, ta->quartblockcpu, ta->quartblocktime); - printf("(%2d) QMax: %11.6e / %11.6e \n", PP_Myid, ta->quartmaxcpu, ta->quartmaxtime); - printf("(%2d) QMin: %11.6e / %11.6e \n", PP_Myid, ta->quartmincpu, ta->quartmintime); - - printf("(%2d) Puzz: %11.6e / %11.6e \n", PP_Myid, ta->puzzcpu, ta->puzztime); - printf("(%2d) PBlk: %11.6e / %11.6e \n", PP_Myid, ta->puzzblockcpu, ta->puzzblocktime); - printf("(%2d) PMax: %11.6e / %11.6e \n", PP_Myid, ta->puzzmaxcpu, ta->puzzmaxtime); - printf("(%2d) PMin: %11.6e / %11.6e \n", PP_Myid, ta->puzzmincpu, ta->puzzmintime); - - printf("(%2d) Tree: %11.6e / %11.6e \n", PP_Myid, ta->treecpu, ta->treetime); - printf("(%2d) TBlk: %11.6e / %11.6e \n", PP_Myid, ta->treeblockcpu, ta->treeblocktime); - printf("(%2d) TMax: %11.6e / %11.6e \n", PP_Myid, ta->treemaxcpu, ta->treemaxtime); - printf("(%2d) TMin: %11.6e / %11.6e \n", PP_Myid, ta->treemincpu, ta->treemintime); - - printf("(%2d) C/T : %11.6e / %11.6e \n", PP_Myid, - (ta->generalcpu + ta->optionscpu + ta->paramestcpu + ta->quartblockcpu + ta->puzzblockcpu + ta->treeblockcpu), - (ta->generaltime + ta->optionstime + ta->paramesttime + ta->quartblocktime + ta->puzzblocktime + ta->treeblocktime)); - printf("(%2d) CPU: %11.6e / Time: %11.6e \n", PP_Myid, ta->cpu, ta->time); - printf("(%2d) aCPU: %11.6e / aTime: %11.6e \n", PP_Myid, ta->fullcpu, ta->fulltime); - -} /* printtimearr */ -#endif /* TIMEDEBUG */ - -char *jtype [7]; - -void inittimearr(timearray_t *ta) -{ - clock_t c0, c1, c2; - - jtype[OVERALL] = "OVERALL"; - jtype[GENERAL] = "GENERAL"; - jtype[OPTIONS] = "OPTIONS"; - jtype[PARAMEST] = "PARAMeter ESTimation"; - jtype[QUARTETS] = "QUARTETS"; - jtype[PUZZLING] = "PUZZLING steps"; - jtype[TREEEVAL] = "TREE EVALuation"; - ta->currentjob = GENERAL; - - c1 = clock(); - c2 = clock(); - while (c1 == c2) - c2 = clock(); - ta->mincputick = (double)(c2 - c1); - ta->mincputicktime = ((double)(c2 - c1))/CLOCKS_PER_SEC; - - ta->tempcpu = clock(); - ta->tempcpustart = ta->tempcpu; - ta->tempfullcpu = ta->tempcpu; - time(&(ta->temptime)); - ta->temptimestart = ta->temptime; - ta->tempfulltime = ta->temptime; - - c0=0; c1=0; c2=(clock_t)((2 * c1) + 1);; - while (c1 < c2) { - c0 = c1; - c1 = c2; - c2 = (clock_t)((2 * c1) + 1); - } - if (c1 == c2) ta->maxcpu=c0; - if (c1 > c2) ta->maxcpu=c1; - - c0=0; c1=0; c2=(clock_t)((2 * c1) - 1); - while (c1 > c2) { - c0 = c1; - c1 = c2; - c2 = (clock_t)((2 * c1) - 1); - } - if (c1 == c2) ta->mincpu=c0; - if (c1 < c2) ta->mincpu=c1; - - - - ta->maxtime = 0; - ta->mintime = 0; - - ta->maxcpublock = 0; - ta->mincpublock = DBL_MAX; - ta->maxtimeblock = 0; - ta->mintimeblock = DBL_MAX; - - ta->cpu = 0.0; - ta->time = 0.0; - - ta->fullcpu = 0.0; - ta->fulltime = 0.0; - - ta->generalcpu = 0.0; - ta->optionscpu = 0.0; - ta->paramestcpu = 0.0; - ta->quartcpu = 0.0; - ta->quartblockcpu = 0.0; - ta->quartmaxcpu = 0.0; - ta->quartmincpu = ((double) ta->maxcpu)/CLOCKS_PER_SEC; - ta->puzzcpu = 0.0; - ta->puzzblockcpu = 0.0; - ta->puzzmaxcpu = 0.0; - ta->puzzmincpu = ((double) ta->maxcpu)/CLOCKS_PER_SEC; - ta->treecpu = 0.0; - ta->treeblockcpu = 0.0; - ta->treemaxcpu = 0.0; - ta->treemincpu = ((double) ta->maxcpu)/CLOCKS_PER_SEC; - - ta->generaltime = 0.0; - ta->optionstime = 0.0; - ta->paramesttime = 0.0; - ta->quarttime = 0.0; - ta->quartblocktime = 0.0; - ta->quartmaxtime = 0.0; - ta->quartmintime = DBL_MAX; - ta->puzztime = 0.0; - ta->puzzblocktime = 0.0; - ta->puzzmaxtime = 0.0; - ta->puzzmintime = DBL_MAX; - ta->treetime = 0.0; - ta->treeblocktime = 0.0; - ta->treemaxtime = 0.0; - ta->treemintime = DBL_MAX; -} /* inittimearr */ - - -/***************/ - -void addup(int jobtype, clock_t c1, clock_t c2, time_t t1, time_t t2, timearray_t *ta) -{ - double c, - t; - - if (t2 != t1) t = difftime(t2, t1); - else t = 0.0; - - if (c2 < c1) - c = ((double)(c2 - ta->mincpu))/CLOCKS_PER_SEC + - ((double)(ta->maxcpu - c1))/CLOCKS_PER_SEC; - else - c = ((double)(c2 - c1))/CLOCKS_PER_SEC; - - if (jobtype != OVERALL) { - - if (ta->mincpublock > c) ta->mincpublock = c; - if (ta->maxcpublock < c) ta->maxcpublock = c; - if (ta->mintimeblock > t) ta->mintimeblock = t; - if (ta->maxtimeblock < t) ta->maxtimeblock = t; - - switch (jobtype) { - case GENERAL: ta->generalcpu += c; - ta->generaltime += t; - break; - case OPTIONS: ta->optionscpu += c; - ta->optionstime += t; - break; - case PARAMEST: ta->paramestcpu += c; - ta->paramesttime += t; - break; - case QUARTETS: ta->quartblockcpu += c; - ta->quartblocktime += t; - if (ta->quartmincpu > c) ta->quartmincpu = c; - if (ta->quartmaxcpu < c) ta->quartmaxcpu = c; - if (ta->quartmintime > t) ta->quartmintime = t; - if (ta->quartmaxtime < t) ta->quartmaxtime = t; - break; - case PUZZLING: ta->puzzblockcpu += c; - ta->puzzblocktime += t; - if (ta->puzzmincpu > c) ta->puzzmincpu = c; - if (ta->puzzmaxcpu < c) ta->puzzmaxcpu = c; - if (ta->puzzmintime > t) ta->puzzmintime = t; - if (ta->puzzmaxtime < t) ta->puzzmaxtime = t; - break; - case TREEEVAL: ta->treeblockcpu += c; - ta->treeblocktime += t; - if (ta->treemincpu > c) ta->treemincpu = c; - if (ta->treemaxcpu < c) ta->treemaxcpu = c; - if (ta->treemintime > t) ta->treemintime = t; - if (ta->treemaxtime < t) ta->treemaxtime = t; - break; - } - ta->cpu += c; - ta->time += t; - - } else { - ta->fullcpu += c; - ta->fulltime += t; - } - -# ifdef TIMEDEBUG - { -# if ! PARALLEL - int PP_Myid = -1; -# endif /* !PARALLEL */ - printf("(%2d) CPU: +%10.6f / Time: +%10.6f (%s)\n", PP_Myid, c, t, jtype[jobtype]); - printf("(%2d) CPU: %11.6f / Time: %11.6f (%s)\n", PP_Myid, ta->cpu, ta->time, jtype[jobtype]); - printf("(%2d) CPU: %11.6f / Time: %11.6f (%s)\n", PP_Myid, ta->fullcpu, ta->fulltime, jtype[jobtype]); - } -# endif /* TIMEDEBUG */ -} /* addup */ - - -/***************/ - - -void addtimes(int jobtype, timearray_t *ta) -{ - clock_t tempc; - time_t tempt; - - time(&tempt); - tempc = clock(); - - if ((tempc < ta->tempfullcpu) || (jobtype == OVERALL)) { /* CPU counter overflow for overall time */ - addup(OVERALL, ta->tempfullcpu, tempc, ta->tempfulltime, tempt, ta); - ta->tempfullcpu = tempc; - ta->tempfulltime = tempt; - if (jobtype == OVERALL) { - addup(ta->currentjob, ta->tempcpustart, tempc, ta->temptimestart, tempt, ta); - ta->tempcpustart = ta->tempcpu; - ta->tempcpu = tempc; - ta->temptimestart = ta->temptime; - ta->temptime = tempt; - } - } - - if((jobtype != ta->currentjob) && (jobtype != OVERALL)) { /* change of job type */ - addup(ta->currentjob, ta->tempcpustart, ta->tempcpu, ta->temptimestart, ta->temptime, ta); - ta->tempcpustart = ta->tempcpu; - ta->tempcpu = tempc; - ta->temptimestart = ta->temptime; - ta->temptime = tempt; - ta->currentjob = jobtype; - } - - if (tempc < ta->tempcpustart) { /* CPU counter overflow */ - addup(jobtype, ta->tempcpustart, tempc, ta->temptimestart, tempt, ta); - ta->tempcpustart = ta->tempcpu; - ta->tempcpu = tempc; - ta->temptimestart = ta->temptime; - ta->temptime = tempt; - } - -} /* addtimes */ - - - -/******************************************************************************/ - -/* estimate parameters of substitution process and rate heterogeneity - no tree - n-taxon tree is not needed because of quartet method or NJ tree topology */ -void estimateparametersnotree() -{ - int it, nump, change; - double TSold, YRold, FIold, GEold; - - it = 0; - nump = 0; - - /* count number of parameters */ - if (data_optn == NUCLEOTIDE && optim_optn) nump++; - if (fracinv_optim || grate_optim) nump++; - - do { /* repeat until nothing changes any more */ - it++; - change = FALSE; - - /* optimize substitution parameters */ - if (data_optn == NUCLEOTIDE && optim_optn) { - - TSold = TSparam; - YRold = YRparam; - - - /* - * optimize - */ - - FPRINTF(STDOUTFILE "Optimizing missing substitution process parameters\n"); - fflush(STDOUT); - - if (qcalg_optn) { /* quartet sampling */ - optimseqevolparamsq(); - } else { /* NJ tree */ - tmpfp = tmpfile(); - njtree(tmpfp); - rewind(tmpfp); - readusertree(tmpfp); - closefile(tmpfp); - optimseqevolparamst(); - } - - computedistan(); /* update ML distances */ - - /* same tolerance as 1D minimization */ - if ((fabs(TSparam - TSold) > 3.3*PEPS1) || - (fabs(YRparam - YRold) > 3.3*PEPS1) - ) change = TRUE; - - } - - /* optimize rate heterogeneity variables */ - if (fracinv_optim || grate_optim) { - - FIold = fracinv; - GEold = Geta; - - - /* - * optimize - */ - - FPRINTF(STDOUTFILE "Optimizing missing rate heterogeneity parameters\n"); - fflush(STDOUT); - /* compute NJ tree */ - tmpfp = tmpfile(); - njtree(tmpfp); - /* use NJ tree topology to estimate parameters */ - rewind(tmpfp); - readusertree(tmpfp); - closefile(tmpfp); - - optimrateparams(); - computedistan(); /* update ML distances */ - - - /* same tolerance as 1D minimization */ - if ((fabs(fracinv - FIold) > 3.3*PEPS2) || - (fabs(Geta - GEold) > 3.3*PEPS2) - ) change = TRUE; - - } - - if (nump == 1) return; - - } while (it != MAXITS && change); - - return; -} - - -/* estimate parameters of substitution process and rate heterogeneity - tree - same as above but here the n-taxon tree is already in memory */ -void estimateparameterstree() -{ - int it, nump, change; - double TSold, YRold, FIold, GEold; - - it = 0; - nump = 0; - - /* count number of parameters */ - if (data_optn == NUCLEOTIDE && optim_optn) nump++; - if (fracinv_optim || grate_optim) nump++; - - do { /* repeat until nothing changes any more */ - it++; - change = FALSE; - - /* optimize substitution process parameters */ - if (data_optn == NUCLEOTIDE && optim_optn) { - - TSold = TSparam; - YRold = YRparam; - - - /* - * optimize - */ - - FPRINTF(STDOUTFILE "Optimizing missing substitution process parameters\n"); - fflush(STDOUT); - optimseqevolparamst(); - computedistan(); /* update ML distances */ - - - /* same tolerance as 1D minimization */ - if ((fabs(TSparam - TSold) > 3.3*PEPS1) || - (fabs(YRparam - YRold) > 3.3*PEPS1) - ) change = TRUE; - - } - - /* optimize rate heterogeneity variables */ - if (fracinv_optim || grate_optim) { - - FIold = fracinv; - GEold = Geta; - - - /* - * optimize - */ - - FPRINTF(STDOUTFILE "Optimizing missing rate heterogeneity parameters\n"); - fflush(STDOUT); - optimrateparams(); - computedistan(); /* update ML distances */ - - - /* same tolerance as 1D minimization */ - if ((fabs(fracinv - FIold) > 3.3*PEPS2) || - (fabs(Geta - GEold) > 3.3*PEPS2) - ) change = TRUE; - - } - - if (nump == 1) return; - - } while (it != MAXITS && change); - - return; -} - - -/******************************************************************************/ -/* exported from main */ -/******************************************************************************/ - -void compute_quartlklhds(int a, int b, int c, int d, double *d1, double *d2, double *d3, int approx) -{ - if (approx == APPROX) { - - *d1 = quartet_alklhd(a,b, c,d); /* (a,b)-(c,d) */ - *d2 = quartet_alklhd(a,c, b,d); /* (a,c)-(b,d) */ - *d3 = quartet_alklhd(a,d, b,c); /* (a,d)-(b,c) */ - - } else /* approx == EXACT */ { - - *d1 = quartet_lklhd(a,b, c,d); /* (a,b)-(c,d) */ - *d2 = quartet_lklhd(a,c, b,d); /* (a,c)-(b,d) */ - *d3 = quartet_lklhd(a,d, b,c); /* (a,d)-(b,c) */ - - } -} - -/***************************************************************/ - -void recon_tree() -{ - int i; -# if ! PARALLEL - int a, b, c; - uli nq; - double tc2, mintogo, minutes, hours; -# endif - - /* allocate memory for taxon list of bad quartets */ - badtaxon = new_ulivector(Maxspc); - for (i = 0; i < Maxspc; i++) badtaxon[i] = 0; - - /* allocate variable used for randomizing input order */ - trueID = new_ivector(Maxspc); - - /* allocate memory for quartets */ - quartetinfo = mallocquartets(Maxspc); - - /* prepare for consensus tree analysis */ - initconsensus(); - - if (!(readquart_optn) || (readquart_optn && savequart_optn)) { - /* compute quartets */ - FPRINTF(STDOUTFILE "Computing quartet maximum likelihood trees\n"); - fflush(STDOUT); - computeallquartets(); - } - - if (savequart_optn) - writeallquarts(Maxspc, ALLQUART, quartetinfo); - if (readquart_optn) { - int xx1, xx2, xx3, xx4, count; - readallquarts (Maxspc, ALLQUART, quartetinfo); - if (show_optn) { /* list all unresolved quartets */ - openfiletowrite(&unresfp, UNRESOLVED, "unresolved quartet trees"); - fprintf(unresfp, "List of all completely unresolved quartets:\n\n"); - } - - /* initialize bad quartet memory */ - for (count = 0; count < Maxspc; count++) badtaxon[count] = 0; - badqs = 0; - - for (xx4 = 3; xx4 < Maxspc; xx4++) - for (xx3 = 2; xx3 < xx4; xx3++) - for (xx2 = 1; xx2 < xx3; xx2++) - for (xx1 = 0; xx1 < xx2; xx1++) { - if (readquartet(xx1, xx2, xx3, xx4) == 7) { - badqs++; - badtaxon[xx1]++; - badtaxon[xx2]++; - badtaxon[xx3]++; - badtaxon[xx4]++; - if (show_optn) { - fputid10(unresfp, xx1); - fprintf(unresfp, " "); - fputid10(unresfp, xx2); - fprintf(unresfp, " "); - fputid10(unresfp, xx3); - fprintf(unresfp, " "); - fputid (unresfp, xx4); - fprintf(unresfp, "\n"); - } - } - } /* end for xx4; for xx3; for xx2; for xx1 */ - if (show_optn) /* list all unresolved quartets */ - fclose(unresfp); - } /* readquart_optn */ - -# if PARALLEL - PP_SendAllQuarts(numquarts(Maxspc), quartetinfo); -# endif /* PARALLEL */ - - FPRINTF(STDOUTFILE "Computing quartet puzzling tree\n"); - fflush(STDOUT); - - /* start timer - percentage of completed trees */ - time(&time0); - time1 = time0; - mflag = 0; - - /* open file for chronological list of puzzling step trees */ - if((listqptrees == PSTOUT_LIST) || (listqptrees == PSTOUT_LISTORDER)) - openfiletowrite(&qptlist, OUTPTLIST, "puzzling step trees (chonological)"); - -# if PARALLEL - { - PP_SendDoPermutBlock(Numtrial); - } -# else - addtimes(GENERAL, &tarr); - for (Currtrial = 0; Currtrial < Numtrial; Currtrial++) { - - /* randomize input order */ - chooser(Maxspc, Maxspc, trueID); - - /* initialize tree */ - inittree(); - - /* adding all other leafs */ - for (i = 3; i < Maxspc; i++) { - - /* clear all edgeinfos */ - resetedgeinfo(); - - /* clear counter of quartets */ - nq = 0; - - /* - * core of quartet puzzling algorithm - */ - - for (a = 0; a < nextleaf - 2; a++) - for (b = a + 1; b < nextleaf - 1; b++) - for (c = b + 1; c < nextleaf; c++) { - - /* check which two _leaves_ out of a, b, c - are closer related to each other than - to leaf i according to a least squares - fit of the continous Baysian weights to the - seven trivial "attractive regions". We assign - a score of 1 to all edges between these two leaves - chooseA and chooseB */ - - checkquartet(a, b, c, i); - incrementedgeinfo(chooseA, chooseB); - - nq++; - - /* generate message every 15 minutes */ - - /* check timer */ - time(&time2); - if ( (time2 - time1) > 900) { - /* every 900 seconds */ - /* percentage of completed trees */ - if (mflag == 0) { - FPRINTF(STDOUTFILE "\n"); - mflag = 1; - } - tc2 = 100.0*Currtrial/Numtrial + - 100.0*nq/Numquartets/Numtrial; - mintogo = (100.0-tc2) * - (double) (time2-time0)/60.0/tc2; - hours = floor(mintogo/60.0); - minutes = mintogo - 60.0*hours; - FPRINTF(STDOUTFILE "%2.2f%%", tc2); - FPRINTF(STDOUTFILE " completed (remaining"); - FPRINTF(STDOUTFILE " time: %.0f", hours); - FPRINTF(STDOUTFILE " hours %.0f", minutes); - FPRINTF(STDOUTFILE " minutes)\n"); - fflush(STDOUT); - time1 = time2; - } - } - - /* find out which edge has the lowest edgeinfo */ - minimumedgeinfo(); - - /* add the next leaf on minedge */ - addnextleaf(minedge); - } - - /* compute bipartitions of current tree */ - computebiparts(); - makenewsplitentries(); - - { - int *ctree, startnode; - char *trstr; - treelistitemtype *treeitem; - ctree = initctree(); - copytree(ctree); - startnode = sortctree(ctree); - trstr=sprintfctree(ctree, psteptreestrlen); - - - treeitem = addtree2list(&trstr, 1, &psteptreelist, &psteptreenum, &psteptreesum); - - if((listqptrees == PSTOUT_LIST) - || (listqptrees == PSTOUT_LISTORDER)) { - /* print: order no/# topol per this id/tree id/sum of unique topologies/sum of trees so far */ - fprintf(qptlist, "%ld.\t1\t%d\t%d\t%d\t%d\n", - Currtrial + 1, (*treeitem).count, (*treeitem).id, psteptreenum, psteptreesum); - } - -# ifdef VERBOSE1 - printf("%s\n", trstr); - printfsortedpstrees(psteptreelist); -# endif - freectree(&ctree); - } - - - - /* free tree before building the next tree */ - freetree(); - - addtimes(PUZZLING, &tarr); - } -# endif /* PARALLEL */ - - /* close file for list of puzzling step trees */ - if((listqptrees == PSTOUT_LIST) || (listqptrees == PSTOUT_LISTORDER)) - closefile(qptlist); - - if (mflag == 1) FPRINTF(STDOUTFILE "\n"); - - /* garbage collection */ - free(splitcomp); - free_ivector(trueID); - -# if ! PARALLEL - free_cmatrix(biparts); -# endif /* PARALLEL */ - - freequartets(); - - /* compute majority rule consensus tree */ - makeconsensus(); - - /* write consensus tree to tmp file */ - tmpfp = tmpfile(); - writeconsensustree(tmpfp); -} /* recon_tree */ - -/***************************************************************/ - -void map_lklhd() -{ - int i, a, a1, a2, b, b1, b2, c, c1, c2, d; - uli nq; - double logs[3], d1, d2, d3, temp; - ivector qts, mlorder, gettwo; - /* reset variables */ - ar1 = ar2 = ar3 = 0; - reg1 = reg2 = reg3 = reg4 = reg5 = reg6 = reg7 = 0; - reg1l = reg1r = reg2u = reg2d = reg3u = reg3d = reg4u = - reg4d = reg5l = reg5r = reg6u = reg6d = 0; - - /* place for random quartet */ - qts = new_ivector(4); - - /* initialize output file */ - openfiletowrite(&trifp, TRIANGLE, "Postscript output"); - initps(trifp); - FPRINTF(STDOUTFILE "Performing likelihood mapping analysis\n"); - fflush(STDOUT); - - /* start timer */ - starttimer(); - nq = 0; - mflag = 0; - - addtimes(GENERAL, &tarr); - if (lmqts == 0) { /* all possible quartets */ - - if (numclust == 4) { /* four-cluster analysis */ - - for (a = 0; a < clustA; a++) - for (b = 0; b < clustB; b++) - for (c = 0; c < clustC; c++) - for (d = 0; d < clustD; d++) { - - nq++; - - /* check timer */ - checktimer(nq); - - /* maximum likelihood values */ - /* approximate ML is sufficient */ - compute_quartlklhds(clusterA[a],clusterB[b],clusterC[c],clusterD[d],&d1,&d2,&d3, APPROX); - - /* draw point for LM analysis */ - makelmpoint(trifp, d1, d2, d3); - addtimes(QUARTETS, &tarr); - - } - } - - if (numclust == 3) { /* three-cluster analysis */ - - gettwo = new_ivector(2); - - for (a = 0; a < clustA; a++) - for (b = 0; b < clustB; b++) - for (c1 = 0; c1 < clustC-1; c1++) - for (c2 = c1+1; c2 < clustC; c2++) { - - nq++; - - /* check timer */ - checktimer(nq); - - /* maximum likelihood values */ - /* approximate ML is sufficient */ - compute_quartlklhds(clusterA[a],clusterB[b],clusterC[c1],clusterC[c2],&d1,&d2,&d3, APPROX); - - /* randomize order of d2 and d3 */ - if (randominteger(2) == 1) { - temp = d3; - d3 = d2; - d2 = temp; - } - - /* draw point for LM analysis */ - makelmpoint(trifp, d1, d2, d3); - addtimes(QUARTETS, &tarr); - - } - free_ivector(gettwo); - } - - if (numclust == 2) { /* two-cluster analysis */ - - gettwo = new_ivector(2); - - for (a1 = 0; a1 < clustA-1; a1++) - for (a2 = a1+1; a2 < clustA; a2++) - for (b1 = 0; b1 < clustB-1; b1++) - for (b2 = b1+1; b2 < clustB; b2++) { - - nq++; - - /* check timer */ - checktimer(nq); - - /* maximum likelihood values */ - /* approximate ML is sufficient */ - compute_quartlklhds(clusterA[a1],clusterA[a2],clusterB[b1],clusterB[b2],&d1,&d2,&d3, APPROX); - - /* randomize order of d2 and d3 */ - if (randominteger(2) == 1) { - temp = d3; - d3 = d2; - d2 = temp; - } - - /* draw point for LM analysis */ - makelmpoint(trifp, d1, d2, d3); - addtimes(QUARTETS, &tarr); - - } - - free_ivector(gettwo); - } - - if (numclust == 1) { /* normal likelihood mapping (one cluster) */ - - mlorder = new_ivector(3); - -#if 0 - for (i = 3; i < Maxspc; i++) - for (a = 0; a < i - 2; a++) - for (b = a + 1; b < i - 1; b++) - for (c = b + 1; c < i; c++) - for (d = 3; d < Maxspc; d++) - for (c = 2; c < d; c++) - for (b = 1; b < c; b++) - for (a = 0; a < b; a++) -#endif - - for (i = 3; i < Maxspc; i++) - for (c = 2; c < i; c++) - for (b = 1; b < c; b++) - for (a = 0; a < b; a++) { - - nq++; - - /* check timer */ - checktimer(nq); - - /* maximum likelihood values */ - /* approximate ML is sufficient */ - compute_quartlklhds(a,b,c,i,&logs[0],&logs[1],&logs[2], APPROX); - - /* randomize order */ - chooser(3,3,mlorder); - d1 = logs[mlorder[0]]; - d2 = logs[mlorder[1]]; - d3 = logs[mlorder[2]]; - - /* draw point for LM analysis */ - makelmpoint(trifp, d1, d2, d3); - addtimes(QUARTETS, &tarr); - - } - free_ivector(mlorder); - } - - } else { /* randomly selected quartets */ - - if (numclust == 4) { /* four-cluster analysis */ - - for (lmqts = 0; lmqts < Numquartets; lmqts++) { - - nq++; - - /* check timer */ - checktimer(nq); - - /* choose random quartet */ - qts[0] = clusterA[ randominteger(clustA) ]; - qts[1] = clusterB[ randominteger(clustB) ]; - qts[2] = clusterC[ randominteger(clustC) ]; - qts[3] = clusterD[ randominteger(clustD) ]; - - /* maximum likelihood values */ - /* approximate ML is sufficient */ - compute_quartlklhds(qts[0],qts[1],qts[2],qts[3],&d1,&d2,&d3, APPROX); - - /* draw point for LM analysis */ - makelmpoint(trifp, d1, d2, d3); - addtimes(QUARTETS, &tarr); - - } - } - - if (numclust == 3) { /* three-cluster analysis */ - - gettwo = new_ivector(2); - - for (lmqts = 0; lmqts < Numquartets; lmqts++) { - - nq++; - - /* check timer */ - checktimer(nq); - - /* choose random quartet */ - qts[0] = clusterA[ randominteger(clustA) ]; - qts[1] = clusterB[ randominteger(clustB) ]; - chooser(clustC, 2, gettwo); - qts[2] = clusterC[gettwo[0]]; - qts[3] = clusterC[gettwo[1]]; - - /* maximum likelihood values */ - /* approximate ML is sufficient */ - compute_quartlklhds(qts[0],qts[1],qts[2],qts[3],&d1,&d2,&d3, APPROX); - - /* order of d2 and d3 is already randomized! */ - - /* draw point for LM analysis */ - makelmpoint(trifp, d1, d2, d3); - addtimes(QUARTETS, &tarr); - - } - - free_ivector(gettwo); - } - - if (numclust == 2) { /* two-cluster analysis */ - - gettwo = new_ivector(2); - - for (lmqts = 0; lmqts < Numquartets; lmqts++) { - - nq++; - - /* check timer */ - checktimer(nq); - - /* choose random quartet */ - chooser(clustA, 2, gettwo); - qts[0] = clusterA[gettwo[0]]; - qts[1] = clusterA[gettwo[1]]; - chooser(clustB, 2, gettwo); - qts[2] = clusterB[gettwo[0]]; - qts[3] = clusterB[gettwo[1]]; - - /* maximum likelihood values */ - /* approximate ML is sufficient */ - compute_quartlklhds(qts[0],qts[1],qts[2],qts[3],&d1,&d2,&d3, APPROX); - - /* order of d2 and d3 is already randomized! */ - - /* draw point for LM analysis */ - makelmpoint(trifp, d1, d2, d3); - addtimes(QUARTETS, &tarr); - - } - free_ivector(gettwo); - } - - if (numclust == 1) { /* normal likelihood mapping (one cluster) */ - - for (lmqts = 0; lmqts < Numquartets; lmqts++) { - - nq++; - - /* check timer */ - checktimer(nq); - - /* choose random quartet */ - chooser(Maxspc, 4, qts); - - /* maximum likelihood values */ - /* approximate ML is sufficient */ - compute_quartlklhds(qts[0],qts[1],qts[2],qts[3],&d1,&d2,&d3, APPROX); - - /* order of d1, d2, and d3 is already randomized! */ - - /* draw point for LM analysis */ - makelmpoint(trifp, d1, d2, d3); - addtimes(QUARTETS, &tarr); - - } - } - } - - finishps(trifp); - closefile(trifp); - free_ivector(qts); - -} /* map_lklhd */ - -/***************************************************************/ - -void setdefaults() { - - strcpy(INFILE, INFILEDEFAULT); - strcpy(OUTFILE, OUTFILEDEFAULT); - strcpy(TREEFILE, TREEFILEDEFAULT); - strcpy(INTREE, INTREEDEFAULT); - strcpy(DISTANCES, DISTANCESDEFAULT); - strcpy(TRIANGLE, TRIANGLEDEFAULT); - strcpy(UNRESOLVED, UNRESOLVEDDEFAULT); - strcpy(ALLQUART, ALLQUARTDEFAULT); - strcpy(ALLQUARTLH, ALLQUARTLHDEFAULT); - strcpy(OUTPTLIST, OUTPTLISTDEFAULT); - strcpy(OUTPTORDER, OUTPTORDERDEFAULT); - - usebestq_optn = FALSE; - savequartlh_optn = FALSE; - savequart_optn = FALSE; - readquart_optn = FALSE; - - randseed = -1; /* to set random random seed */ - -} /* setdefaults */ - -/***************************************************************/ - -void printversion() -{ -# if ! PARALLEL - fprintf(stderr, "puzzle (%s) %s\n", PACKAGE, VERSION); -#else - fprintf(stderr, "ppuzzle (%s) %s\n", PACKAGE, VERSION); -# endif - exit (0); -} -/***************************************************************/ - -void printusage(char *fname) -{ - fprintf(stderr, "\n\nUsage: %s [-h] [ Infilename [ UserTreeFilename ] ]\n\n", fname); -# if PARALLEL - PP_SendDone(); - MPI_Finalize(); -# endif - exit (1); -} - -/***************************************************************/ - -#ifdef HHH -void printusagehhh(char *fname) -{ - fprintf(stderr, "\n\nUsage: %s [options] [ Infilename [ UserTreeFilename ] ]\n\n", fname); - fprintf(stderr, " -h - print usage\n"); - fprintf(stderr, " -wqf - write quartet file to Infilename.allquart\n"); - fprintf(stderr, " -rqf - read quartet file from Infilename.allquart\n"); - fprintf(stderr, " -wqlb - write quart lhs to Infilename.allquartlh (binary)\n"); - fprintf(stderr, " -wqla - write quart lhs to Infilename.allquartlh (ASCII)\n"); - fprintf(stderr, " -bestq - use best quart, no basian weights\n"); - fprintf(stderr, " -randseed<#> - use <#> as random number seed, for debug purposes only\n"); -# if PARALLEL - PP_SendDone(); - MPI_Finalize(); -# endif - exit (2); -} -#endif /* HHH */ - -/***************************************************************/ - - -void scancmdline(int *argc, char **argv[]) -{ - static short infileset = 0; - static short intreefileset = 0; - short flagused; - int n; - int count, dummyint; - - for (n = 1; n < *argc; n++) { -# ifdef VERBOSE1 - printf("argv[%d] = %s\n", n, (*argv)[n]); -# endif - - flagused = FALSE; - -# ifdef HHH - dummyint = 0; - count = sscanf((*argv)[n], "-wqlb%n", &dummyint); - if (dummyint == 5) { - savequartlh_optn = TRUE; - saveqlhbin_optn = TRUE; - flagused = TRUE; - } - - dummyint = 0; - count = sscanf((*argv)[n], "-wqla%n", &dummyint); - if (dummyint == 5) { - savequartlh_optn = TRUE; - saveqlhbin_optn = FALSE; - flagused = TRUE; - } - - dummyint = 0; - count = sscanf((*argv)[n], "-wqf%n", &dummyint); - if (dummyint == 4) { - savequart_optn = TRUE; - flagused = TRUE; - } - - dummyint = 0; - count = sscanf((*argv)[n],"-rqf%n", &dummyint); - if (dummyint == 4) { - readquart_optn = TRUE; - flagused = TRUE; - } - - dummyint = 0; - count = sscanf((*argv)[n],"-bestq%n", &dummyint); - if (dummyint == 6) { - usebestq_optn = TRUE; - flagused = TRUE; - } - - dummyint = 0; - count = sscanf((*argv)[n],"-hhh%n", &dummyint); - if (dummyint==4) { - printusagehhh((*argv)[0]); - flagused = TRUE; - } -# endif /* HHH */ - - dummyint = 0; - count = sscanf((*argv)[n],"-V%n", &dummyint); - if (dummyint==2) { - printversion((*argv)[0]); - flagused = TRUE; - } - - dummyint = 0; - count = sscanf((*argv)[n],"-version%n", &dummyint); - if (dummyint==8) { - printversion((*argv)[0]); - flagused = TRUE; - } - - dummyint = 0; - count = sscanf((*argv)[n],"--version%n", &dummyint); - if (dummyint>=4) { - printversion((*argv)[0]); - flagused = TRUE; - } - - dummyint = 0; - count = sscanf((*argv)[n],"-h%n", &dummyint); - if (dummyint==2) { - printusage((*argv)[0]); - flagused = TRUE; - } - - count = sscanf((*argv)[n],"-randseed%d", &dummyint); - if (count == 1) { - randseed = dummyint; - flagused = TRUE; - } - -#if 0 - count = sscanf((*argv)[n],"-h%n", &dummyint); - if ((count == 1) && (dummyint>=2)) printusage((*argv)[0]); - - count = sscanf((*argv)[n],"-writequarts%n", &dummyint); - if (count == 1) writequartstofile = 1;; - - count = sscanf((*argv)[n],"-ws%d", &dummyint); - if (count == 1) windowsize = dummyint; -#endif - - if ((*argv)[n][0] != '-') { - if (infileset == 0) { - strcpy(INFILE, (*argv)[n]); - infileset++; - sprintf(OUTFILE ,"%s.%s", INFILE, OUTFILEEXT); - sprintf(TREEFILE ,"%s.%s", INFILE, TREEFILEEXT); - sprintf(DISTANCES ,"%s.%s", INFILE, DISTANCESEXT); - sprintf(TRIANGLE ,"%s.%s", INFILE, TRIANGLEEXT); - sprintf(UNRESOLVED ,"%s.%s", INFILE, UNRESOLVEDEXT); - sprintf(ALLQUART ,"%s.%s", INFILE, ALLQUARTEXT); - sprintf(ALLQUARTLH ,"%s.%s", INFILE, ALLQUARTLHEXT); - sprintf(OUTPTLIST ,"%s.%s", INFILE, OUTPTLISTEXT); - sprintf(OUTPTORDER ,"%s.%s", INFILE, OUTPTORDEREXT); - FPRINTF(STDOUTFILE "Input file: %s\n", INFILE); - flagused = TRUE; - } else { - if (intreefileset == 0) { - strcpy(INTREE, (*argv)[n]); - intreefileset++; - sprintf(OUTFILE ,"%s.%s", INTREE, OUTFILEEXT); - sprintf(TREEFILE ,"%s.%s", INTREE, TREEFILEEXT); - sprintf(DISTANCES ,"%s.%s", INTREE, DISTANCESEXT); - FPRINTF(STDOUTFILE "Usertree file: %s\n", INTREE); - flagused = TRUE; - } - } - } - if (flagused == FALSE) { - fprintf(stderr, "WARNING: commandline parameter %d not recognized (\"%s\")\n", n, (*argv)[n]); - } - flagused = FALSE; - } - -} /* scancmdline */ - - -/***************************************************************/ - -void inputandinit(int *argc, char **argv[]) { - - int ci; - - /* vectors used in QP and LM analysis */ - qweight = new_dvector(3); - sqdiff = new_dvector(3); - qworder = new_ivector(3); - sqorder = new_ivector(3); - - /* Initialization and parsing of Commandline */ - setdefaults(); - scancmdline(argc, argv); - - /* initialize random numbers generator */ - if (randseed >= 0) - fprintf(stderr, "WARNING: random seed set to %d for debugging!\n", randseed); - randseed = initrandom(randseed); - - psteptreelist = NULL; - psteptreesum = 0; - bestratefound = 0; - -# ifndef ALPHA - FPRINTF(STDOUTFILE "\n\n\nWELCOME TO TREE-PUZZLE %s!\n\n\n", VERSION); -# else - FPRINTF(STDOUTFILE "\n\n\nWELCOME TO TREE-PUZZLE %s%s!\n\n\n", VERSION, ALPHA); -# endif - - - /* get sequences */ - openfiletoread(&seqfp, INFILE, "sequence data"); - getsizesites(seqfp); - FPRINTF(STDOUTFILE "\nInput data set contains %d sequences of length %d\n", Maxspc, Maxseqc); - getdataset(seqfp); - closefile(seqfp); - data_optn = guessdatatype(); - - /* translate characters into format used by ML engine */ - nuc_optn = TRUE; - SH_optn = FALSE; - Seqchar = NULL; - translatedataset(); - - /* estimate base frequencies from data set */ - Freqtpm = NULL; - Basecomp = NULL; - estimatebasefreqs(); - - /* guess model of substitution */ - guessmodel(); - - /* initialize guess variables */ - auto_datatype = AUTO_GUESS; - if (data_optn == AMINOACID) auto_aamodel = AUTO_GUESS; - else auto_aamodel = AUTO_DEFAULT; - /* save guessed amino acid options */ - guessDayhf_optn = Dayhf_optn; - guessJtt_optn = Jtt_optn; - guessmtrev_optn = mtrev_optn; - guesscprev_optn = cprev_optn; - guessblosum62_optn = blosum62_optn; - guessvtmv_optn = vtmv_optn; - guesswag_optn = wag_optn; - guessauto_aamodel = auto_aamodel; - - - /* check for user specified tree */ - if ((utfp = fopen(INTREE, "r")) != NULL) { - fclose(utfp); - puzzlemode = USERTREE; - } else { - puzzlemode = QUARTPUZ; - } - - /* reserve memory for cluster LM analysis */ - clusterA = new_ivector(Maxspc); - clusterB = new_ivector(Maxspc); - clusterC = new_ivector(Maxspc); - clusterD = new_ivector(Maxspc); - - /* set options interactively */ - setoptions(); - - /* open usertree file right after start */ - if (typ_optn == TREERECON_OPTN && puzzlemode == USERTREE) { - openfiletoread(&utfp, INTREE, "user trees"); - } - - /* start main timer */ - time(&Starttime); - Startcpu=clock(); - addtimes(OPTIONS, &tarr); - - /* symmetrize doublet frequencies if specified */ - symdoublets(); - - /* initialise ML */ - mlstart(); - - /* determine how many usertrees */ - if (typ_optn == TREERECON_OPTN && puzzlemode == USERTREE) { - numutrees = 0; - do { - ci = fgetc(utfp); - if ((char) ci == ';') numutrees++; - } while (ci != EOF); - rewind(utfp); - if (numutrees < 1) { - FPRINTF(STDOUTFILE "Unable to proceed (no tree in input tree file)\n\n\n"); - exit(1); - } - } - - /* check fraction of invariable sites */ - if ((rhetmode == TWORATE || rhetmode == MIXEDRATE) && !fracinv_optim) - /* fraction of invariable site was specified manually */ - if (fracinv > MAXFI) - fracinv = MAXFI; - - addtimes(GENERAL, &tarr); - /* estimate parameters */ - if (!(typ_optn == TREERECON_OPTN && puzzlemode == USERTREE)) { - /* no tree present */ - estimateparametersnotree(); - } else { - if (utree_optn) { - /* use 1st user tree */ - readusertree(utfp); - rewind(utfp); - estimateparameterstree(); - } else { - /* don't use first user tree */ - estimateparametersnotree(); - } - } - addtimes(PARAMEST, &tarr); - - /* compute expected Ts/Tv ratio */ - if (data_optn == NUCLEOTIDE) computeexpectations(); - -} /* inputandinit */ - - - -/***************************************************************/ - -void evaluatetree(FILE *intreefp, FILE *outtreefp, int pmode, int utreenum, int maxutree, int *oldlocroot) -{ - - switch (pmode) { - case QUARTPUZ: /* read QP tree */ - readusertree(intreefp); - FPRINTF(STDOUTFILE "Computing maximum likelihood branch lengths (without clock)\n"); - fflush(STDOUT); - usertree_lklhd(); - findbestratecombination(); - break; - case USERTREE: /* read user tree */ - readusertree(intreefp); - FPRINTF(STDOUTFILE "Computing maximum likelihood branch lengths (without clock) for tree # %d\n", utreenum+1); - fflush(STDOUT); - usertree_lklhd(); - if (maxutree > 1) { - ulkl[utreenum] = Ctree->lklhd; - allsitelkl(Ctree->condlkl, allsites[utreenum]); - } - if (utreenum==0) findbestratecombination(); - break; - } - - - if (compclock) { /* clocklike branch length */ - switch (pmode) { - case QUARTPUZ: - FPRINTF(STDOUTFILE "Computing maximum likelihood branch lengths (with clock)\n"); - fflush(STDOUT); - break; - case USERTREE: - FPRINTF(STDOUTFILE "Computing maximum likelihood branch lengths (with clock) for tree # %d\n", utreenum+1); - fflush(STDOUT); - break; - } - - /* find best place for root */ - rootsearch = 0; - - if (utreenum==0) locroot = *oldlocroot; - else *oldlocroot = locroot; - - if (locroot < 0) { - locroot = findrootedge(); - rootsearch = 1; - } - /* if user-specified edge for root does not exist use displayed outgroup */ - if (!checkedge(locroot)) { - locroot = outgroup; - rootsearch = 2; - } - /* compute likelihood */ - clock_lklhd(locroot); - if (maxutree > 1) { - ulklc[utreenum] = Ctree->lklhdc; - allsitelkl(Ctree->condlkl, allsitesc[utreenum]); - } - - } - - if (clockmode == 0) - fprintf(outtreefp, "[ lh=%.6f ]", Ctree->lklhd); - else - fprintf(outtreefp, "[ lh=%.6f ]", Ctree->lklhdc); - - /* write ML branch length tree to outree file */ - clockmode = 0; /* nonclocklike branch lengths */ - fputphylogeny(outtreefp); - - /* clocklike branch lengths */ - if (compclock) { - clockmode = 1; - fputrooted(outtreefp, locroot); - } -} /* evaluatetree */ - -/***************************************************************/ - -void memcleanup() { - if (puzzlemode == QUARTPUZ && typ_optn == TREERECON_OPTN) { - free(splitfreqs); - free(splitpatterns); - free(splitsizes); - free_ivector(consconfid); - free_ivector(conssizes); - free_cmatrix(consbiparts); - free_ulivector(badtaxon); - } - free_cmatrix(Identif); - free_dvector(Freqtpm); - free_imatrix(Basecomp); - free_ivector(clusterA); - free_ivector(clusterB); - free_ivector(clusterC); - free_ivector(clusterD); - free_dvector(qweight); - free_dvector(sqdiff); - free_ivector(qworder); - free_ivector(sqorder); - freetreelist(&psteptreelist, &psteptreenum, &psteptreesum); -} /* memcleanup */ - -/***************************************************************/ - - -/******************************************************************************/ -/* main part */ -/******************************************************************************/ - -int main(int argc, char *argv[]) -{ - int i, oldlocroot=0; - - /* start main timer */ - time(&walltimestart); - cputimestart = clock(); - inittimearr(&tarr); - -# if PARALLEL - PP_Init(&argc, &argv); - if (PP_IamSlave) { - slave_main(argc, argv); - } else { -# endif /* PARALLEL */ - - inputandinit(&argc, &argv); - - /* CZ 05/19/01 */ - /* FPRINTF(STDOUTFILE "Writing parameters to file %s\n", OUTFILE); */ - /* openfiletowrite(&ofp, OUTFILE, "general output"); */ - /* writeoutputfile(ofp,WRITEPARAMS); */ - /* fclose(ofp); */ - - - /* write distance matrix */ - FPRINTF(STDOUTFILE "Writing pairwise distances to file %s\n", DISTANCES); - openfiletowrite(&dfp, DISTANCES, "pairwise distances"); - putdistance(dfp); - closefile(dfp); - -# if PARALLEL - PP_SendSizes(Maxspc, Maxsite, numcats, Numptrn, tpmradix, outgroup, fracconst, randseed); - PP_SendData(Seqpat, /* cmatrix */ - Alias, Weight, constpat, /* ivector */ - Rates, Eval, Freqtpm, /* dvector */ - Evec, Ievc, iexp, Distanmat, /* dmatrix */ - ltprobr); /* dcube */ -# endif /* PARALLEL */ - psteptreestrlen = (Maxspc * (int)(1 + log10(Maxspc))) + - (Maxspc * 3); - - switch (typ_optn) { - case TREERECON_OPTN: /* tree reconstruction */ - - if (puzzlemode == QUARTPUZ) { /* quartet puzzling */ - recon_tree(); - } /* quartet puzzling */ - break; - - case LIKMAPING_OPTN: /* likelihood mapping */ - - map_lklhd(); - break; - } /* switch typ_optn */ - - - free_cmatrix(Seqchar); - free_cmatrix(seqchars); - - /* reserve memory for tree statistics */ - if (typ_optn == TREERECON_OPTN && puzzlemode == USERTREE && numutrees > 1) { - ulkl = new_dvector(numutrees); - allsites = new_dmatrix(numutrees,Numptrn); - if (compclock) { - ulklc = new_dvector(numutrees); - allsitesc = new_dmatrix(numutrees,Numptrn); - } - } - - /* write puzzling step tree list */ - if ((listqptrees == PSTOUT_ORDER) || (listqptrees == PSTOUT_LISTORDER)) { - openfiletowrite(&qptorder, OUTPTORDER, "puzzling step trees (unique)"); - - fprintfsortedpstrees(qptorder, psteptreelist, psteptreenum, psteptreesum, 1, 0.0); - closefile(qptorder); - } - - /* compute ML branch lengths for QP tree and for 1st user tree */ - switch(typ_optn) { - case TREERECON_OPTN: - - /* open outtree file */ - openfiletowrite(&tfp, TREEFILE, "output tree(s)"); - - addtimes(GENERAL, &tarr); - - switch (puzzlemode) { - case QUARTPUZ: /* read QP tree */ - rewind(tmpfp); - openfiletowrite(&tfp, TREEFILE, "output tree(s)"); - evaluatetree(tmpfp, tfp, puzzlemode, 0, 1, &oldlocroot); - addtimes(TREEEVAL, &tarr); - closefile(tmpfp); - closefile(tfp); - - /* CZ 05/19/01 */ - /*openfiletoappend(&ofp, OUTFILE, "general output");*/ - /*writeoutputfile(ofp,WRITEREST);*/ - break; - case USERTREE: /* read user tree */ - openfiletoappend(&ofp, OUTFILE, "general output"); - - openfiletowrite(&tfp, TREEFILE, "output tree(s)"); - for (i = 0; i < numutrees; i++) { - evaluatetree(utfp, tfp, puzzlemode, i, numutrees, &oldlocroot); - if (i==0) writeoutputfile(ofp,WRITEREST); - writecutree(ofp, i+1); - addtimes(TREEEVAL, &tarr); - } - closefile(tfp); - closefile(utfp); - break; - default: - /* CZ 05/19/01 */ - /*openfiletoappend(&ofp, OUTFILE, "general output");*/ - /*writeoutputfile(ofp,WRITEREST);*/ - break; - } /* switch puzzlemode */ - break; - default: - /* CZ 05/19/01 */ - /*openfiletoappend(&ofp, OUTFILE, "general output");*/ - /*writeoutputfile(ofp,WRITEREST);*/ - break; - } /* switch typ_optn */ - - /* print tree statistics */ - if (typ_optn == TREERECON_OPTN && puzzlemode == USERTREE && numutrees > 1) - printtreestats(ofp); - - /* free memory for tree statistics */ - if (typ_optn == TREERECON_OPTN && puzzlemode == USERTREE && numutrees > 1) { - free_dvector(ulkl); - free_dmatrix(allsites); - if (compclock) { - free_dvector(ulklc); - free_dmatrix(allsitesc); - } - } - -# if PARALLEL - PP_SendDone(); -# endif /* PARALLEL */ - - /* write CPU/Wallclock times and parallel statistics */ - time(&walltimestop); - cputimestop = clock(); - addtimes(OVERALL, &tarr); -# ifdef TIMEDEBUG - printtimearr(&tarr); -# endif /* TIMEDEBUG */ - fullcpu = tarr.fullcpu; - fulltime = tarr.fulltime; - -# if PARALLEL - writetimesstat(ofp); -# endif /* PARALLEL */ - - /* stop timer */ - time(&Stoptime); - Stopcpu=clock(); - /* CZ 05/19/01 */ - /*timestamp(ofp);*/ - /*closefile(ofp);*/ - - - /* printbestratecombination(stderr); */ - mlfinish(); - - FPRINTF(STDOUTFILE "\nAll results written to disk:\n"); - FPRINTF(STDOUTFILE " Puzzle report file: %s\n", OUTFILE); - FPRINTF(STDOUTFILE " Likelihood distances: %s\n", DISTANCES); - - if (typ_optn == TREERECON_OPTN && puzzlemode != PAIRDIST) - FPRINTF(STDOUTFILE " Phylip tree file: %s\n", TREEFILE); - if (typ_optn == TREERECON_OPTN && puzzlemode == QUARTPUZ) { - if ((listqptrees == PSTOUT_ORDER) ||(listqptrees == PSTOUT_LISTORDER)) - FPRINTF(STDOUTFILE " Unique puzzling step trees: %s\n", OUTPTORDER); - if ((listqptrees == PSTOUT_LIST) ||(listqptrees == PSTOUT_LISTORDER)) - FPRINTF(STDOUTFILE " Puzzling step tree list: %s\n", OUTPTLIST); - } - if (show_optn && typ_optn == TREERECON_OPTN && puzzlemode == QUARTPUZ) - FPRINTF(STDOUTFILE " Unresolved quartets: %s\n", UNRESOLVED); - if (typ_optn == LIKMAPING_OPTN) - FPRINTF(STDOUTFILE " Likelihood mapping diagram: %s\n", TRIANGLE); - FPRINTF(STDOUTFILE "\n"); - - /* runtime message */ - FPRINTF(STDOUTFILE - "The computation took %.0f seconds (= %.1f minutes = %.1f hours)\n", - difftime(Stoptime, Starttime), difftime(Stoptime, Starttime)/60., - difftime(Stoptime, Starttime)/3600.); - FPRINTF(STDOUTFILE - " including input %.0f seconds (= %.1f minutes = %.1f hours)\n", - fulltime, fulltime/60., fulltime/3600.); -#ifdef TIMEDEBUG - FPRINTF(STDOUTFILE - "and %.0f seconds CPU time (= %.1f minutes = %.1f hours)\n\n", - fullcpu, fullcpu/60., fullcpu/3600.); -#endif /* TIMEDEBUG */ - - /* free memory */ - memcleanup(); - -# if PARALLEL - } /* !IamSlave */ - PP_Finalize(); -# endif /* PARALLEL */ - - return 0; -} - - -/* compare function for uli - sort largest numbers first */ -int ulicmp(const void *ap, const void *bp) -{ - uli a, b; - - a = *((uli *) ap); - b = *((uli *) bp); - - if (a > b) return -1; - else if (a < b) return 1; - else return 0; -} - -/* compare function for int - sort smallest numbers first */ -int intcmp(const void *ap, const void *bp) -{ - int a, b; - - a = *((int *) ap); - b = *((int *) bp); - - if (a < b) return -1; - else if (a > b) return 1; - else return 0; -} diff --git a/forester/archive/RIO/others/puzzle_mod/src/puzzle2.c b/forester/archive/RIO/others/puzzle_mod/src/puzzle2.c deleted file mode 100644 index 429fe46..0000000 --- a/forester/archive/RIO/others/puzzle_mod/src/puzzle2.c +++ /dev/null @@ -1,2701 +0,0 @@ -/* - * puzzle2.c - * - * - * Part of TREE-PUZZLE 5.0 (June 2000) - * - * (c) 1999-2000 by Heiko A. Schmidt, Korbinian Strimmer, - * M. Vingron, and Arndt von Haeseler - * (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler - * - * All parts of the source except where indicated are distributed under - * the GNU public licence. See http://www.opensource.org for details. - */ - - -/* Modified by Christian Zmasek to: - - Allow 8000 seqs (for pairwise dist. calc.). - - Names of 26 chars. - - - !WARNING: Use ONLY together with FORESTER/RIO! - !For all other puposes download the excellent original! - - last modification: 05/19/01 - - - void getsizesites(FILE *ifp): - - 257 -> 8000 - - - - void readid(FILE *infp, int t): - - for (i = 0; i < 10; i++) { -> for (i = 0; i < 26; i++) { - - for (i = 9; i > -1; i--) { -> for (i = 25; i > -1; i--) { - - for (j = 0; (j < 10) && (flag == TRUE); j++) -> for (j = 0; (j < 26) && (flag == TRUE); j++) - - - - void initid(int t): - - Identif = new_cmatrix(t, 10); -> Identif = new_cmatrix(t, 26); - - for (j = 0; j < 10; j++) -> for (j = 0; j < 26; j++) - - - - fputid10(FILE *ofp, int t): - - for (i = 0; i < 10; i++) -> for (i = 0; i < 26; i++) - - - - int fputid(FILE *ofp, int t): - - while (Identif[t][i] != ' ' && i < 10) { -> while (Identif[t][i] != ' ' && i < 26) { - - - - -*/ - -#define EXTERN extern - -#include "puzzle.h" -#include - -#if PARALLEL -# include "sched.h" -#endif /* PARALLEL */ - - -/******************************************************************************/ -/* sequences */ -/******************************************************************************/ - -/* read ten characters of current line as identifier */ -void readid(FILE *infp, int t) -{ - int i, j, flag, ci; - - for (i = 0; i < 26; i++) { /* CZ 05/19/01 */ - ci = fgetc(infp); - if (ci == EOF || !isprint(ci)) { - FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (no name for sequence %d)\n\n\n", t+1); - exit(1); - } - Identif[t][i] = (char) ci; - } - /* convert leading blanks in taxon name to underscores */ - flag = FALSE; - for (i = 25; i > -1; i--) { /* CZ 05/19/01 */ - if (flag == FALSE) { - if (Identif[t][i] != ' ') flag = TRUE; - } else { - if (Identif[t][i] == ' ') Identif[t][i] = '_'; - } - } - /* check whether this name is already used */ - for (i = 0; i < t; i++) { /* compare with all other taxa */ - flag = TRUE; /* assume identity */ - for (j = 0; (j < 26) && (flag == TRUE); j++) /* CZ 05/19/01 */ - if (Identif[t][j] != Identif[i][j]) - flag = FALSE; - if (flag) { - FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (multiple occurence of sequence name '"); - fputid(STDOUT, t); - FPRINTF(STDOUTFILE "')\n\n\n"); - exit(1); - } - } -} - -/* read next allowed character */ -char readnextcharacter(FILE *ifp, int notu, int nsite) -{ - char c; - - /* ignore blanks and control characters except newline */ - do { - if (fscanf(ifp, "%c", &c) != 1) { - FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (missing character at position %d in sequence '", nsite + 1); - fputid(STDOUT, notu); - FPRINTF(STDOUTFILE "')\n\n\n"); - exit(1); - } - } while (c == ' ' || (iscntrl((int) c) && c != '\n')); - return c; -} - -/* skip rest of the line */ -void skiprestofline(FILE* ifp, int notu, int nsite) -{ - int ci; - - /* read chars until the first newline */ - do{ - ci = fgetc(ifp); - if (ci == EOF) { - FPRINTF(STDOUTFILE "Unable to proceed (missing newline at position %d in sequence '", nsite + 1); - fputid(STDOUT, notu); - FPRINTF(STDOUTFILE "')\n\n\n"); - exit(1); - } - } while ((char) ci != '\n'); -} - -/* skip control characters and blanks */ -void skipcntrl(FILE *ifp, int notu, int nsite) -{ - int ci; - - /* read over all control characters and blanks */ - do { - ci = fgetc(ifp); - if (ci == EOF) { - FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (missing character at position %d in sequence '", nsite + 1); - fputid(STDOUT, notu); - FPRINTF(STDOUTFILE "')\n\n\n"); - exit(1); - } - } while (iscntrl(ci) || (char) ci == ' '); - /* go one character back */ - if (ungetc(ci, ifp) == EOF) { - FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (positioning error at position %d in sequence '", nsite + 1); - fputid(STDOUT, notu); - FPRINTF(STDOUTFILE "')\n\n\n"); - exit(1); - } -} - -/* read sequences of one data set */ -void getseqs(FILE *ifp) -{ - int notu, nsite, endofline, linelength, i; - char c; - - seqchars = new_cmatrix(Maxspc, Maxseqc); - /* read all characters */ - nsite = 0; /* next site to be read */ - while (nsite < Maxseqc) { - /* read first taxon */ - notu = 0; - /* go to next true line */ - skiprestofline(ifp, notu, nsite); - skipcntrl(ifp, notu, nsite); - if (nsite == 0) readid(ifp, notu); - endofline = FALSE; - linelength = 0; - do { - c = readnextcharacter(ifp, notu, nsite + linelength); - if (c == '\n') endofline = TRUE; - else if (c == '.') { - FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (invalid character '.' at position "); - FPRINTF(STDOUTFILE "%d in first sequence)\n\n\n", nsite + linelength + 1); - exit(1); - } else if (nsite + linelength < Maxseqc) { - /* change to upper case */ - seqchars[notu][nsite + linelength] = (char) toupper((int) c); - linelength++; - } else { - endofline = TRUE; - skiprestofline(ifp, notu, nsite + linelength); - } - } while (!endofline); - if (linelength == 0) { - FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (line with length 0 at position %d in sequence '", nsite + 1); - fputid(STDOUT, notu); - FPRINTF(STDOUTFILE "')\n\n\n"); - exit(1); - } - /* read other taxa */ - for (notu = 1; notu < Maxspc; notu++) { - /* go to next true line */ - if (notu != 1) skiprestofline(ifp, notu, nsite); - skipcntrl(ifp, notu, nsite); - if (nsite == 0) readid(ifp, notu); - for (i = nsite; i < nsite + linelength; i++) { - c = readnextcharacter(ifp, notu, i); - if (c == '\n') { /* too short */ - FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (line to short at position %d in sequence '", i + 1); - fputid(STDOUT, notu); - FPRINTF(STDOUTFILE "')\n\n\n"); - exit(1); - } else if (c == '.') { - seqchars[notu][i] = seqchars[0][i]; - } else { - /* change to upper case */ - seqchars[notu][i] = (char) toupper((int) c); - } - } - } - nsite = nsite + linelength; - } -} - -/* initialize identifer array */ -void initid(int t) -{ - int i, j; - - Identif = new_cmatrix(t, 26); /* CZ 05/19/01 */ - for (i = 0; i < t; i++) - for (j = 0; j < 26; j++) /* CZ 05/19/01 */ - Identif[i][j] = ' '; -} - -/* print identifier of specified taxon in full 10 char length */ -void fputid10(FILE *ofp, int t) -{ - int i; - - for (i = 0; i < 26; i++) fputc(Identif[t][i], ofp); /* CZ 05/19/01 */ -} - -/* print identifier of specified taxon up to first space */ -int fputid(FILE *ofp, int t) -{ - int i; - - i = 0; - while (Identif[t][i] != ' ' && i < 26) { /* CZ 05/19/01 */ - fputc(Identif[t][i], ofp); - i++; - } - return i; -} - -/* read first line of sequence data set */ -void getsizesites(FILE *ifp) -{ - if (fscanf(ifp, "%d", &Maxspc) != 1) { - FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (missing number of sequences)\n\n\n"); - exit(1); - } - if (fscanf(ifp, "%d", &Maxseqc) != 1) { - FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (missing number of sites)\n\n\n"); - exit(1); - } - - if (Maxspc < 4) { - FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (less than 4 sequences)\n\n\n"); - exit(1); - } - if (Maxspc > 8000) { /* CZ 05/19/01 */ - FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (more than 8000 sequences)\n\n\n"); - exit(1); - } - if (Maxseqc < 1) { - FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (no sequence sites)\n\n\n"); - exit(1); - } - Maxbrnch = 2*Maxspc - 3; -} - -/* read one data set - PHYLIP interleaved */ -void getdataset(FILE *ifp) -{ - initid(Maxspc); - getseqs(ifp); -} - -/* guess data type */ -int guessdatatype() -{ - uli numnucs, numchars, numbins; - int notu, nsite; - char c; - - /* count A, C, G, T, U, N */ - numnucs = 0; - numchars = 0; - numbins = 0; - for (notu = 0; notu < Maxspc; notu++) - for (nsite = 0; nsite < Maxseqc; nsite++) { - c = seqchars[notu][nsite]; - if (c == 'A' || c == 'C' || c == 'G' || - c == 'T' || c == 'U' || c == 'N') numnucs++; - if (c != '-' && c != '?') numchars++; - if (c == '0' || c == '1') numbins++; - } - if (numchars == 0) numchars = 1; - /* more than 85 % frequency means nucleotide data */ - if ((double) numnucs / (double) numchars > 0.85) return 0; - else if ((double) numbins / (double) numchars > 0.2) return 2; - else return 1; -} - -/* translate characters into format used by ML engine */ -void translatedataset() -{ - int notu, sn, co; - char c; - cvector code; - - - /* determine Maxsite - number of ML sites per taxon */ - if (data_optn == 0 && SH_optn) { - if (SHcodon) - Maxsite = Maxseqc / 3; - else - Maxsite = Maxseqc / 2; /* assume doublets */ - - } else - Maxsite = Maxseqc; - if (data_optn == 0 && (Maxsite % 3) == 0 && !SH_optn) { - if (codon_optn == 1 || codon_optn == 2 || codon_optn == 3) - Maxsite = Maxsite / 3; /* only one of the three codon positions */ - if (codon_optn == 4) - Maxsite = 2*(Maxsite / 3); /* 1st + 2nd codon positions */ - } - - /* reserve memory */ - if (Seqchar != NULL) free_cmatrix(Seqchar); - Seqchar = new_cmatrix(Maxspc, Maxsite); - - /* code length */ - if (data_optn == 0 && SH_optn) - code = new_cvector(2); - else - code = new_cvector(1); - - /* decode characters */ - if (data_optn == 0 && SH_optn) { /* SH doublets */ - - for (notu = 0; notu < Maxspc; notu++) { - for (sn = 0; sn < Maxsite; sn++) { - for (co = 0; co < 2; co++) { - if (SHcodon) - c = seqchars[notu][sn*3 + co]; - else - c = seqchars[notu][sn*2 + co]; - code[co] = c; - } - Seqchar[notu][sn] = code2int(code); - } - } - - } else if (!(data_optn == 0 && (Maxseqc % 3) == 0)) { /* use all */ - - for (notu = 0; notu < Maxspc; notu++) { - for (sn = 0; sn < Maxsite; sn++) { - code[0] = seqchars[notu][sn]; - Seqchar[notu][sn] = code2int(code); - } - } - - } else { /* codons */ - - for (notu = 0; notu < Maxspc; notu++) { - for (sn = 0; sn < Maxsite; sn++) { - if (codon_optn == 1 || codon_optn == 2 || codon_optn == 3) - code[0] = seqchars[notu][sn*3+codon_optn-1]; - else if (codon_optn == 4) { - if ((sn % 2) == 0) - code[0] = seqchars[notu][(sn/2)*3]; - else - code[0] = seqchars[notu][((sn-1)/2)*3+1]; - } else - code[0] = seqchars[notu][sn]; - Seqchar[notu][sn] = code2int(code); - } - } - - } - free_cvector(code); -} - -/* estimate mean base frequencies from translated data set */ -void estimatebasefreqs() -{ - int tpmradix, i, j; - uli all, *gene; - - tpmradix = gettpmradix(); - - if (Freqtpm != NULL) free_dvector(Freqtpm); - Freqtpm = new_dvector(tpmradix); - - if (Basecomp != NULL) free_imatrix(Basecomp); - Basecomp = new_imatrix(Maxspc, tpmradix); - - gene = (uli *) malloc((unsigned) ((tpmradix + 1) * sizeof(uli))); - if (gene == NULL) maerror("gene in estimatebasefreqs"); - - for (i = 0; i < tpmradix + 1; i++) gene[i] = 0; - for (i = 0; i < Maxspc; i++) - for (j = 0; j < tpmradix; j++) Basecomp[i][j] = 0; - for (i = 0; i < Maxspc; i++) - for (j = 0; j < Maxsite; j++) { - gene[(int) Seqchar[i][j]]++; - if (Seqchar[i][j] != tpmradix) Basecomp[i][(int) Seqchar[i][j]]++; - } - - all = Maxspc * Maxsite - gene[tpmradix]; - if (all != 0) { /* normal case */ - for (i = 0; i < tpmradix; i++) - Freqtpm[i] = (double) gene[i] / (double) all; - } else { /* pathological case with no unique character in data set */ - for (i = 0; i < tpmradix; i++) - Freqtpm[i] = 1.0 / (double) tpmradix; - } - - free(gene); - - Frequ_optn = TRUE; -} - -/* guess model of substitution */ -void guessmodel() -{ - double c1, c2, c3, c4, c5, c6; - dvector f; - dmatrix a; - int i; - - Dayhf_optn = FALSE; - Jtt_optn = TRUE; - mtrev_optn = FALSE; - cprev_optn = FALSE; - blosum62_optn = FALSE; - vtmv_optn = FALSE; - wag_optn = FALSE; - TSparam = 2.0; - YRparam = 1.0; - optim_optn = TRUE; - HKY_optn = TRUE; - TN_optn = FALSE; - - if (data_optn == 1) { /* amino acids */ - - /* chi2 fit to amino acid frequencies */ - - f = new_dvector(20); - a = new_dmatrix(20,20); - /* chi2 distance Dayhoff */ - dyhfdata(a, f); - c1 = 0; - for (i = 0; i < 20; i++) - c1 = c1 + (Freqtpm[i]-f[i])*(Freqtpm[i]-f[i]); - /* chi2 distance JTT */ - jttdata(a, f); - c2 = 0; - for (i = 0; i < 20; i++) - c2 = c2 + (Freqtpm[i]-f[i])*(Freqtpm[i]-f[i]); - /* chi2 distance mtREV */ - mtrevdata(a, f); - c3 = 0; - for (i = 0; i < 20; i++) - c3 = c3 + (Freqtpm[i]-f[i])*(Freqtpm[i]-f[i]); - /* chi2 distance VT */ - vtmvdata(a, f); - c4 = 0; - for (i = 0; i < 20; i++) - c4 = c4 + (Freqtpm[i]-f[i])*(Freqtpm[i]-f[i]); - /* chi2 distance WAG */ - wagdata(a, f); - c5 = 0; - for (i = 0; i < 20; i++) - c5 = c5 + (Freqtpm[i]-f[i])*(Freqtpm[i]-f[i]); - /* chi2 distance cpREV */ - cprev45data(a, f); - c6 = 0; - for (i = 0; i < 20; i++) - c6 = c6 + (Freqtpm[i]-f[i])*(Freqtpm[i]-f[i]); - - free_dvector(f); - free_dmatrix(a); - -#ifndef CPREV - if ((c1 < c2) && (c1 < c3) && (c1 < c4) && (c1 < c5)) { - /* c1 -> Dayhoff */ - Dayhf_optn = TRUE; - Jtt_optn = FALSE; - mtrev_optn = FALSE; - cprev_optn = FALSE; - vtmv_optn = FALSE; - wag_optn = FALSE; - FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on nuclear DNA)\n"); - } else { - if ((c2 < c3) && (c2 < c4) && (c2 < c5)) { - /* c2 -> JTT */ - Dayhf_optn = FALSE; - Jtt_optn = TRUE; - mtrev_optn = FALSE; - cprev_optn = FALSE; - vtmv_optn = FALSE; - wag_optn = FALSE; - FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on nuclear DNA)\n"); - } else { - if ((c3 < c4) && (c3 < c5)) { - /* c3 -> mtREV */ - Dayhf_optn = FALSE; - Jtt_optn = FALSE; - mtrev_optn = TRUE; - cprev_optn = FALSE; - vtmv_optn = FALSE; - wag_optn = FALSE; - FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on mtDNA)\n"); - } else { - if ((c4 < c5)) { - /* c4 -> VT */ - Dayhf_optn = FALSE; - Jtt_optn = FALSE; - mtrev_optn = FALSE; - cprev_optn = FALSE; - vtmv_optn = TRUE; - wag_optn = FALSE; - FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on nuclear DNA)\n"); - } else { - /* c5 -> WAG */ - Dayhf_optn = FALSE; - Jtt_optn = FALSE; - mtrev_optn = FALSE; - cprev_optn = FALSE; - vtmv_optn = FALSE; - wag_optn = TRUE; - FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on nuclear DNA)\n"); - } /* if c4 else c5 */ - } /* if c3 else c4 */ - } /* if c2 */ - } /* if c1 */ - -#else /* CPREV */ - - if ((c1 < c2) && (c1 < c3) && (c1 < c4) && (c1 < c5) && (c1 < c6)) { - /* c1 -> Dayhoff */ - Dayhf_optn = TRUE; - Jtt_optn = FALSE; - mtrev_optn = FALSE; - cprev_optn = FALSE; - vtmv_optn = FALSE; - wag_optn = FALSE; - FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on nuclear DNA)\n"); - } else { - if ((c2 < c3) && (c2 < c4) && (c2 < c5) && (c2 < c6)) { - /* c2 -> JTT */ - Dayhf_optn = FALSE; - Jtt_optn = TRUE; - mtrev_optn = FALSE; - cprev_optn = FALSE; - vtmv_optn = FALSE; - wag_optn = FALSE; - FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on nuclear DNA)\n"); - } else { - if ((c3 < c4) && (c3 < c5) && (c3 < c6)) { - /* c3 -> mtREV */ - Dayhf_optn = FALSE; - Jtt_optn = FALSE; - mtrev_optn = TRUE; - cprev_optn = FALSE; - vtmv_optn = FALSE; - wag_optn = FALSE; - FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on mtDNA)\n"); - } else { - if ((c4 < c5) && (c4 < c6)) { - /* c4 -> VT */ - Dayhf_optn = FALSE; - Jtt_optn = FALSE; - mtrev_optn = FALSE; - cprev_optn = FALSE; - vtmv_optn = TRUE; - wag_optn = FALSE; - FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on nuclear DNA)\n"); - } else { - if (c5 < c6) { - /* c5 -> WAG */ - Dayhf_optn = FALSE; - Jtt_optn = FALSE; - mtrev_optn = FALSE; - cprev_optn = FALSE; - vtmv_optn = FALSE; - wag_optn = TRUE; - FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on nuclear DNA)\n"); - } else { - /* if (c6) */ - /* c6 -> cpREV */ - Dayhf_optn = FALSE; - Jtt_optn = FALSE; - mtrev_optn = FALSE; - cprev_optn = TRUE; - vtmv_optn = FALSE; - wag_optn = FALSE; - FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on cpDNA)\n"); - } /* if c5 else c6 */ - } /* if c4 else c5 */ - } /* if c3 else c4 */ - } /* if c2 */ - } /* if c1 */ -#endif /* CPREV */ - - } else if (data_optn == 0) { - FPRINTF(STDOUTFILE "(consists very likely of nucleotides)\n"); - } else { - FPRINTF(STDOUTFILE "(consists very likely of binary state data)\n"); - } -} /* guessmodel */ - - -/******************************************************************************/ -/* functions for representing and building puzzling step trees */ -/******************************************************************************/ - -/* initialize tree with the following starting configuration - - 2 - 0 +------- C(=2) - A(=0) -----+ - +------- B(=1) - 1 - */ -void inittree() -{ - int i; - - /* allocate the memory for the whole tree */ - - /* allocate memory for vector with all the edges of the tree */ - edge = (ONEEDGE *) calloc(Maxbrnch, sizeof(ONEEDGE) ); - if (edge == NULL) maerror("edge in inittree"); - - /* allocate memory for vector with edge numbers of leaves */ - edgeofleaf = (int *) calloc(Maxspc, sizeof(int) ); - if (edgeofleaf == NULL) maerror("edgeofleaf in inittree"); - - /* allocate memory for all the edges the edge map */ - for (i = 0; i < Maxbrnch; i++) { - edge[i].edgemap = (int *) calloc(Maxbrnch, sizeof(int) ); - if (edge[i].edgemap == NULL) maerror("edgemap in inittree"); - } - - /* number all edges */ - for (i = 0; i < Maxbrnch; i++) edge[i].numedge = i; - - /* initialize tree */ - - nextedge = 3; - nextleaf = 3; - - /* edge maps */ - (edge[0].edgemap)[0] = 0; /* you are on the right edge */ - (edge[0].edgemap)[1] = 4; /* go down left for leaf 1 */ - (edge[0].edgemap)[2] = 5; /* go down right for leaf 2 */ - (edge[1].edgemap)[0] = 1; /* go up for leaf 0 */ - (edge[1].edgemap)[1] = 0; /* you are on the right edge */ - (edge[1].edgemap)[2] = 3; /* go up/down right for leaf 2 */ - (edge[2].edgemap)[0] = 1; /* go up for leaf 0 */ - (edge[2].edgemap)[1] = 2; /* go up/down left for leaf 1 */ - (edge[2].edgemap)[2] = 0; /* you are on the right edge */ - - /* interconnection */ - edge[0].up = NULL; - edge[0].downleft = &edge[1]; - edge[0].downright = &edge[2]; - edge[1].up = &edge[0]; - edge[1].downleft = NULL; - edge[1].downright = NULL; - edge[2].up = &edge[0]; - edge[2].downleft = NULL; - edge[2].downright = NULL; - - /* edges of leaves */ - edgeofleaf[0] = 0; - edgeofleaf[1] = 1; - edgeofleaf[2] = 2; -} /* inittree */ - -/* add next leaf on the specified edge */ -void addnextleaf(int dockedge) -{ - int i; - - if (dockedge >= nextedge) { - /* Trying to add leaf nextleaf to nonexisting edge dockedge */ - FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR F TO DEVELOPERS\n\n\n"); - exit(1); - } - - if (nextleaf >= Maxspc) { - /* Trying to add leaf nextleaf to a tree with Maxspc leaves */ - FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR G TO DEVELOPERS\n\n\n"); - exit(1); - } - - /* necessary change in edgeofleaf if dockedge == edgeofleaf[0] */ - if (edgeofleaf[0] == dockedge) edgeofleaf[0] = nextedge; - - /* adding nextedge to the tree */ - edge[nextedge].up = edge[dockedge].up; - edge[nextedge].downleft = &edge[dockedge]; - edge[nextedge].downright = &edge[nextedge+1]; - edge[dockedge].up = &edge[nextedge]; - - if (edge[nextedge].up != NULL) { - if ( ((edge[nextedge].up)->downleft) == &edge[dockedge] ) - (edge[nextedge].up)->downleft = &edge[nextedge]; - else - (edge[nextedge].up)->downright = &edge[nextedge]; - } - - /* adding nextedge + 1 to the tree */ - edge[nextedge+1].up = &edge[nextedge]; - edge[nextedge+1].downleft = NULL; - edge[nextedge+1].downright = NULL; - edgeofleaf[nextleaf] = nextedge+1; - - /* the two new edges get info about the old edges */ - /* nextedge */ - for (i = 0; i < nextedge; i++) { - switch ( (edge[dockedge].edgemap)[i] ) { - - /* down right changes to down left */ - case 5: (edge[nextedge].edgemap)[i] = 4; - break; - - /* null changes to down left */ - case 0: (edge[nextedge].edgemap)[i] = 4; - break; - - default: (edge[nextedge].edgemap)[i] = - (edge[dockedge].edgemap)[i]; - break; - } - } - - /* nextedge + 1 */ - for (i = 0; i < nextedge; i++) { - switch ( (edge[dockedge].edgemap)[i] ) { - - /* up/down left changes to up */ - case 2: (edge[nextedge+1].edgemap)[i] = 1; - break; - - /* up/down right changes to up */ - case 3: (edge[nextedge+1].edgemap)[i] = 1; - break; - - /* down left changes to up/down left */ - case 4: (edge[nextedge+1].edgemap)[i] = 2; - break; - - /* down right changes to up/down left */ - case 5: (edge[nextedge+1].edgemap)[i] = 2; - break; - - /* null changes to up/down left */ - case 0: (edge[nextedge+1].edgemap)[i] = 2; - break; - - /* up stays up */ - default: (edge[nextedge+1].edgemap)[i] = - (edge[dockedge].edgemap)[i]; - break; - } - } - - /* dockedge */ - for (i = 0; i < nextedge; i++) { - switch ( (edge[dockedge].edgemap)[i] ) { - - /* up/down right changes to up */ - case 3: (edge[dockedge].edgemap)[i] = 1; - break; - - /* up/down left changes to up */ - case 2: (edge[dockedge].edgemap)[i] = 1; - break; - - default: break; - } - } - - /* all edgemaps are updated for the two new edges */ - /* nextedge */ - (edge[nextedge].edgemap)[nextedge] = 0; - (edge[nextedge].edgemap)[nextedge+1] = 5; /* down right */ - - /* nextedge + 1 */ - (edge[nextedge+1].edgemap)[nextedge] = 1; /* up */ - (edge[nextedge+1].edgemap)[nextedge+1] = 0; - - /* all other edges */ - for (i = 0; i < nextedge; i++) { - (edge[i].edgemap)[nextedge] = (edge[i].edgemap)[dockedge]; - (edge[i].edgemap)[nextedge+1] = (edge[i].edgemap)[dockedge]; - } - - /* an extra for dockedge */ - (edge[dockedge].edgemap)[nextedge] = 1; /* up */ - (edge[dockedge].edgemap)[nextedge+1] = 3; /* up/down right */ - - nextleaf++; - nextedge = nextedge + 2; -} /* addnextleaf */ - - -/* free memory (to be called after inittree) */ -void freetree() -{ - int i; - - for (i = 0; i < 2 * Maxspc - 3; i++) free(edge[i].edgemap); - free(edge); - free(edgeofleaf); -} /* freetree */ - -/* writes OTU sitting on edge ed */ -void writeOTU(FILE *outfp, int ed) -{ - int i; - - /* test whether we are on a leaf */ - if (edge[ed].downright == NULL && edge[ed].downleft == NULL) { - for (i = 1; i < nextleaf; i++) { - if (edgeofleaf[i] == ed) { /* i is the leaf of ed */ - column += fputid(outfp, trueID[i]); - return; - } - } - } - - /* we are NOT on a leaf */ - fprintf(outfp, "("); - column++; - writeOTU(outfp, edge[ed].downleft->numedge); - fprintf(outfp, ","); - column++; - column++; - if (column > 55) { - column = 2; - fprintf(outfp, "\n "); - } - writeOTU(outfp, edge[ed].downright->numedge); - fprintf(outfp, ")"); - column++; -} /* writeOTU */ - -/* write tree */ -void writetree(FILE *outfp) -{ - column = 1; - fprintf(outfp, "("); - column += fputid(outfp, trueID[0]) + 3; - fprintf(outfp, ","); - writeOTU(outfp, edge[edgeofleaf[0]].downleft->numedge); - column++; - column++; - fprintf(outfp, ","); - writeOTU(outfp, edge[edgeofleaf[0]].downright->numedge); - fprintf(outfp, ");\n"); -} /* writetree */ - - -/* clear all edgeinfos */ -void resetedgeinfo() -{ - int i; - - for (i = 0; i < nextedge; i++) - edge[i].edgeinfo = 0; -} /* resetedgeinfo */ - -/* increment all edgeinfo between leaf A and B */ -void incrementedgeinfo(int A, int B) -{ - int curredge, finaledge, nextstep; - - if (A == B) return; - - finaledge = edgeofleaf[B]; - - curredge = edgeofleaf[A]; - edge[curredge].edgeinfo = edge[curredge].edgeinfo + 1; - - while (curredge != finaledge) { - nextstep = (edge[curredge].edgemap)[finaledge]; - switch (nextstep) { - - /* up */ - case 1: curredge = (edge[curredge].up)->numedge; - break; - - /* up/down left */ - case 2: curredge = ((edge[curredge].up)->downleft)->numedge; - break; - - /* up/down right */ - case 3: curredge = ((edge[curredge].up)->downright)->numedge; - break; - - /* down left */ - case 4: curredge = (edge[curredge].downleft)->numedge; - break; - - /* down right */ - case 5: curredge = (edge[curredge].downright)->numedge; - break; - - } - edge[curredge].edgeinfo = edge[curredge].edgeinfo + 1; - } -} /* incrementedgeinfo */ - -/* checks which edge has the lowest edgeinfo - if there are several edges with the same lowest edgeinfo, - one of them will be selected randomly */ -void minimumedgeinfo() -{ - int i, k, howmany, randomnum; - - howmany = 1; - minedge = 0; - mininfo = edge[0].edgeinfo; - for (i = 1; i < nextedge; i++) - if (edge[i].edgeinfo <= mininfo) { - if (edge[i].edgeinfo == mininfo) { - howmany++; - } else { - minedge = i; - mininfo = edge[i].edgeinfo; - howmany = 1; - } - } - - if (howmany > 1) { /* draw random edge */ - randomnum = randominteger(howmany) + 1; /* 1 to howmany */ - i = -1; - for (k = 0; k < randomnum; k++) { - do { - i++; - } while (edge[i].edgeinfo != mininfo); - minedge = i; - } - } -} /* minimumedgeinfo */ - - - - -/*******************************************/ -/* tree sorting */ -/*******************************************/ - -/* compute address of the 4 int (sort key) in the 4 int node */ -int ct_sortkeyaddr(int addr) -{ - int a, res; - a = addr % 4; - res = addr - a + 3; - return res; -} - - -/**********/ - -/* compute address of the next edge pointer in a 4 int node (0->1->2->0) */ -int ct_nextedgeaddr(int addr) -{ - int a, res; - a = addr % 4; - if ( a == 2 ) { res = addr - 2; } - else { res = addr + 1; } - return res; -} - - -/**********/ - -/* compute address of 1st edge of a 4 int node from node number */ -int ct_1stedge(int node) -{ - int res; - res = 4 * node; - return res; -} - - -/**********/ - -/* compute address of 2nd edge of a 4 int node from node number */ -int ct_2ndedge(int node) -{ - int res; - res = 4 * node +1; - return res; -} - - -/**********/ - -/* compute address of 3rd edge of a 4 int node from node number */ -int ct_3rdedge(int node) -{ - int res; - res = 4 * node +2; - return res; -} - - -/**********/ - -/* check whether node 'node' is a leaf (2nd/3rd edge pointer = -1) */ -int ct_isleaf(int node, int *ctree) -{ - return (ctree[ct_3rdedge(node)] < 0); -} - - -/**********/ - -/* compute node number of 4 int node from an edge addr. */ -int ct_addr2node(int addr) -{ - int a, res; - a = addr % 4; - res = (int) ((addr - a) / 4); - return res; -} - - -/**********/ - -/* print graph pointers for checking */ -void printctree(int *ctree) -{ - int n; - for (n=0; n < 2*Maxspc; n++) { - printf("n[%3d] = (%3d.%2d, %3d.%2d, %3d.%2d | %3d)\n", n, - (int) ctree[ct_1stedge(n)]/4, - (int) ctree[ct_1stedge(n)]%4, - (int) ctree[ct_2ndedge(n)]/4, - (int) ctree[ct_2ndedge(n)]%4, - (int) ctree[ct_3rdedge(n)]/4, - (int) ctree[ct_3rdedge(n)]%4, - ctree[ct_3rdedge(n)+1]); - } - printf("\n"); -} /* printctree */ - - -/**********/ - -/* allocate memory for ctree 3 ints pointer plus 1 check byte */ -int *initctree() -{ - int *snodes; - int n; - - snodes = (int *) malloc(4 * 2 * Maxspc * sizeof(int)); - if (snodes == NULL) maerror("snodes in copytree"); - - for (n=0; n<(4 * 2 * Maxspc); n++) { - snodes[n]=-1; - } - return snodes; -} - - -/**********/ - -/* free memory of a tree for sorting */ -void freectree(int **snodes) -{ - free(*snodes); - *snodes = NULL; -} - - -/**********/ - -/* copy subtree recursively */ -void copyOTU(int *ctree, /* tree array struct */ - int *ct_nextnode, /* next free node */ - int ct_curredge, /* currende edge to add subtree */ - int *ct_nextleaf, /* next free leaf (0-maxspc) */ - int ed) /* edge in puzzling step tree */ -{ - int i, nextcurredge; - - /* test whether we are on a leaf */ - if (edge[ed].downright == NULL && edge[ed].downleft == NULL) { - for (i = 1; i < nextleaf; i++) { - if (edgeofleaf[i] == ed) { /* i is the leaf of ed */ - nextcurredge = ct_1stedge(*ct_nextleaf); - ctree[ct_curredge] = nextcurredge; - ctree[nextcurredge] = ct_curredge; - ctree[ct_sortkeyaddr(nextcurredge)] = trueID[i]; - (*ct_nextleaf)++; - return; - } - } - } - - /* we are NOT on a leaf */ - nextcurredge = ct_1stedge(*ct_nextnode); - ctree[ct_curredge] = nextcurredge; - ctree[nextcurredge] = ct_curredge; - (*ct_nextnode)++; - nextcurredge = ct_nextedgeaddr(nextcurredge); - copyOTU(ctree, ct_nextnode, nextcurredge, - ct_nextleaf, edge[ed].downleft->numedge); - - nextcurredge = ct_nextedgeaddr(nextcurredge); - copyOTU(ctree, ct_nextnode, nextcurredge, - ct_nextleaf, edge[ed].downright->numedge); -} - - -/**********/ - -/* copy treestructure to sorting structure */ -void copytree(int *ctree) -{ - int ct_curredge; - int ct_nextleaf; - int ct_nextnode; - - ct_nextnode = Maxspc; - ct_curredge = ct_1stedge(ct_nextnode); - ct_nextleaf = 1; - - ctree[ct_1stedge(0)] = ct_curredge; - ctree[ct_curredge] = ct_1stedge(0); - ctree[ct_sortkeyaddr(0)] = trueID[0]; - - ct_nextnode++; - - ct_curredge = ct_nextedgeaddr(ct_curredge); - copyOTU(ctree, &ct_nextnode, ct_curredge, - &ct_nextleaf, edge[edgeofleaf[0]].downleft->numedge); - - ct_curredge = ct_nextedgeaddr(ct_curredge); - copyOTU(ctree, &ct_nextnode, ct_curredge, - &ct_nextleaf, edge[edgeofleaf[0]].downright->numedge); -} - - -/**********/ - -/* sort subtree from edge recursively by indices */ -int sortOTU(int edge, int *ctree) -{ - int key1, key2; - int edge1, edge2; - int tempedge; - - if (ctree[ct_2ndedge((int) (edge / 4))] < 0) - return ctree[ct_sortkeyaddr(edge)]; - - edge1 = ctree[ct_nextedgeaddr(edge)]; - edge2 = ctree[ct_nextedgeaddr(ct_nextedgeaddr(edge))]; - - /* printf ("visiting [%5d] -> [%5d], [%5d]\n", edge, edge1, edge2); */ - /* printf ("visiting [%2d.%2d] -> [%2d.%2d], [%2d.%2d]\n", - (int)(edge/4), edge%4, (int)(edge1/4), edge1%4, - (int)(edge2/4), edge2%4); */ - - key1 = sortOTU(edge1, ctree); - key2 = sortOTU(edge2, ctree); - - if (key2 < key1) { - tempedge = ctree[ctree[edge1]]; - ctree[ctree[edge1]] = ctree[ctree[edge2]]; - ctree[ctree[edge2]] = tempedge; - tempedge = ctree[edge1]; - ctree[edge1] = ctree[edge2]; - ctree[edge2] = tempedge; - ctree[ct_sortkeyaddr(edge)] = key2; - - } else { - ctree[ct_sortkeyaddr(edge)] = key1; - } - return ctree[ct_sortkeyaddr(edge)]; -} - - -/**********/ - -/* sort ctree recursively by indices */ -int sortctree(int *ctree) -{ - int n, startnode=-1; - for(n=0; n>>>\n"); - tmpptr = list; - *sortlist = list; - while (tmpptr != NULL) { - (*tmpptr).sortnext = (*tmpptr).succ; - (*tmpptr).sortlast = (*tmpptr).pred; - tmpptr = (*tmpptr).succ; - } - - while (xchange > 0) { - curr = *sortlist; - xchange = 0; - if (curr == NULL) fprintf(stderr, "Grrrrrrrrr>>>>\n"); - while((*curr).sortnext != NULL) { - next = (*curr).sortnext; - if ((*curr).count >= (*next).count) - curr = (*curr).sortnext; - else { - if ((*curr).sortlast != NULL) - (*((*curr).sortlast)).sortnext = next; - if (*sortlist == curr) - *sortlist = next; - (*next).sortlast = (*curr).sortlast; - - if ((*next).sortnext != NULL) - (*((*next).sortnext)).sortlast = curr; - (*curr).sortnext = (*next).sortnext; - - (*curr).sortlast = next; - (*next).sortnext = curr; - - xchange++; - } - } - } -} /* sortbynum */ - - -/**********/ - -/* print puzzling step tree stuctures for checking */ -void printfpstrees(treelistitemtype *list) -{ - char ch; - treelistitemtype *tmpptr = NULL; - tmpptr = list; - ch = '-'; - while (tmpptr != NULL) { - printf ("%c[%2d] %5d %s\n", ch, (*tmpptr).idx, (*tmpptr).count, (*tmpptr).tree); - tmpptr = (*tmpptr).succ; - ch = ' '; - } -} - -/**********/ - -/* print sorted puzzling step tree stucture with names */ -void fprintffullpstree(FILE *outf, char *treestr) -{ - int count = 0; - int idnum = 0; - int n; - for(n=0; treestr[n] != '\0'; n++){ - while(isdigit((int)treestr[n])){ - idnum = (10 * idnum) + ((int)treestr[n]-48); - n++; - count++; - } - if (count > 0){ -# ifdef USEQUOTES - fprintf(outf, "'"); -# endif - (void)fputid(outf, idnum); -# ifdef USEQUOTES - fprintf(outf, "'"); -# endif - count = 0; - idnum = 0; - } - fprintf(outf, "%c", treestr[n]); - } -} - - -/**********/ - -/* print sorted puzzling step tree stuctures with names */ -void fprintfsortedpstrees(FILE *output, - treelistitemtype *list, /* tree list */ - int itemnum, /* order number */ - int itemsum, /* number of trees */ - int comment, /* with statistics, or puzzle report ? */ - float cutoff) /* cutoff percentage */ -{ - treelistitemtype *tmpptr = NULL; - treelistitemtype *slist = NULL; - int num = 1; - float percent; - - if (list == NULL) fprintf(stderr, "Grrrrrrrrr>>>>\n"); - sortbynum(list, &slist); - - tmpptr = slist; - while (tmpptr != NULL) { - percent = (float)(100.0 * (*tmpptr).count / itemsum); - if ((cutoff == 0.0) || (cutoff <= percent)) { - if (comment) - fprintf (output, "[ %d. %d %.2f %d %d %d ]", num++, (*tmpptr).count, percent, (*tmpptr).id, itemnum, itemsum); - else { - if (num == 1){ - fprintf (output, "\n"); - fprintf (output, "The following tree(s) occured in more than %.2f%% of the %d puzzling steps.\n", cutoff, itemsum); - fprintf (output, "The trees are orderd descending by the number of occurences.\n"); - fprintf (output, "\n"); - fprintf (output, "\n occurences ID Phylip tree\n"); - } - fprintf (output, "%2d. %5d %6.2f%% %5d ", num++, (*tmpptr).count, percent, (*tmpptr).id); - } - fprintffullpstree(output, (*tmpptr).tree); - fprintf (output, "\n"); - } - tmpptr = (*tmpptr).sortnext; - } - - if (!comment) { - fprintf (output, "\n"); - switch(num) { - case 1: fprintf (output, "There were no tree topologies (out of %d) occuring with a percentage >= %.2f%% of the %d puzzling steps.\n", itemnum, cutoff, itemsum); break; - case 2: fprintf (output, "There was one tree topology (out of %d) occuring with a percentage >= %.2f%%.\n", itemnum, cutoff); break; - default: fprintf (output, "There were %d tree topologies (out of %d) occuring with a percentage >= %.2f%%.\n", num-1, itemnum, cutoff); break; - } - fprintf (output, "\n"); - fprintf (output, "\n"); - } - -} /* fprintfsortedpstrees */ - -/**********/ - -/* print sorted tree topologies for checking */ -void printfsortedpstrees(treelistitemtype *list) -{ - treelistitemtype *tmpptr = NULL; - treelistitemtype *slist = NULL; - - sortbynum(list, &slist); - - tmpptr = slist; - while (tmpptr != NULL) { - printf ("[%2d] %5d %s\n", (*tmpptr).idx, (*tmpptr).count, (*tmpptr).tree); - tmpptr = (*tmpptr).sortnext; - } -} /* printfsortedpstrees */ - - -/*******************************************/ -/* end of tree sorting */ -/*******************************************/ - - - -/******************************************************************************/ -/* functions for computing the consensus tree */ -/******************************************************************************/ - -/* prepare for consensus tree analysis */ -void initconsensus() -{ -# if ! PARALLEL - biparts = new_cmatrix(Maxspc-3, Maxspc); -# endif /* PARALLEL */ - - if (Maxspc % 32 == 0) - splitlength = Maxspc/32; - else splitlength = (Maxspc + 32 - (Maxspc % 32))/32; - numbiparts = 0; /* no pattern stored so far */ - maxbiparts = 0; /* no memory reserved so far */ - splitfreqs = NULL; - splitpatterns = NULL; - splitsizes = NULL; - splitcomp = (uli *) malloc(splitlength * sizeof(uli) ); - if (splitcomp == NULL) maerror("splitcomp in initconsensus"); -} - -/* prototype needed for recursive function */ -void makepart(int i, int curribrnch); - -/* recursive function to get bipartitions */ -void makepart(int i, int curribrnch) -{ - int j; - - if ( edge[i].downright == NULL || - edge[i].downleft == NULL) { /* if i is leaf */ - - /* check out what leaf j sits on this edge i */ - for (j = 1; j < Maxspc; j++) { - if (edgeofleaf[j] == i) { - biparts[curribrnch][trueID[j]] = '*'; - return; - } - } - } else { /* still on inner branch */ - makepart(edge[i].downleft->numedge, curribrnch); - makepart(edge[i].downright->numedge, curribrnch); - } -} - -/* compute bipartitions of tree of current puzzling step */ -void computebiparts() -{ - int i, j, curribrnch; - - curribrnch = -1; - - for (i = 0; i < Maxspc - 3; i++) - for (j = 0; j < Maxspc; j++) - biparts[i][j] = '.'; - - for (i = 0; i < Maxbrnch; i++) { - if (!( edgeofleaf[0] == i || - edge[i].downright == NULL || - edge[i].downleft == NULL) ) { /* check all inner branches */ - curribrnch++; - makepart(i, curribrnch); - - /* make sure that the root is always a '*' */ - if (biparts[curribrnch][outgroup] == '.') { - for (j = 0; j < Maxspc; j++) { - if (biparts[curribrnch][j] == '.') - biparts[curribrnch][j] = '*'; - else - biparts[curribrnch][j] = '.'; - } - } - } - } -} - -/* print out the bipartition n of all different splitpatterns */ -void printsplit(FILE *fp, uli n) -{ - int i, j, col; - uli z; - - col = 0; - for (i = 0; i < splitlength; i++) { - z = splitpatterns[n*splitlength + i]; - for (j = 0; j < 32 && col < Maxspc; j++) { - if (col % 10 == 0 && col != 0) fprintf(fp, " "); - if (z & 1) fprintf(fp, "."); - else fprintf(fp, "*"); - z = (z >> 1); - col++; - } - } -} - -/* make new entries for new different bipartitions and count frequencies */ -void makenewsplitentries() -{ - int i, j, bpc, identical, idflag, bpsize; - uli nextentry, obpc; - - /* where the next entry would be in splitpatterns */ - nextentry = numbiparts; - - for (bpc = 0; bpc < Maxspc - 3; bpc++) { /* for every new bipartition */ - /* convert bipartition into a more compact format */ - bpsize = 0; - for (i = 0; i < splitlength; i++) { - splitcomp[i] = 0; - for (j = 0; j < 32; j++) { - splitcomp[i] = splitcomp[i] >> 1; - if (i*32 + j < Maxspc) - if (biparts[bpc][i*32 + j] == '.') { - /* set highest bit */ - splitcomp[i] = (splitcomp[i] | 2147483648UL); - bpsize++; /* count the '.' */ - } - } - } - /* compare to the *old* patterns */ - identical = FALSE; - for (obpc = 0; (obpc < numbiparts) && (!identical); obpc++) { - /* compare first partition size */ - if (splitsizes[obpc] == bpsize) idflag = TRUE; - else idflag = FALSE; - /* if size is identical compare whole partition */ - for (i = 0; (i < splitlength) && idflag; i++) - if (splitcomp[i] != splitpatterns[obpc*splitlength + i]) - idflag = FALSE; - if (idflag) identical = TRUE; - } - if (identical) { /* if identical increase frequency */ - splitfreqs[2*(obpc-1)]++; - } else { /* create new entry */ - if (nextentry == maxbiparts) { /* reserve more memory */ - maxbiparts = maxbiparts + 2*Maxspc; - splitfreqs = (uli *) myrealloc(splitfreqs, - 2*maxbiparts * sizeof(uli) ); - /* 2x: splitfreqs contains also an index (sorting!) */ - if (splitfreqs == NULL) maerror("splitfreqs in makenewsplitentries"); - splitpatterns = (uli *) myrealloc(splitpatterns, - splitlength*maxbiparts * sizeof(uli) ); - if (splitpatterns == NULL) maerror("splitpatterns in makenewsplitentries"); - splitsizes = (int *) myrealloc(splitsizes, - maxbiparts * sizeof(int) ); - if (splitsizes == NULL) maerror("splitsizes in makenewsplitentries"); - } - splitfreqs[2*nextentry] = 1; /* frequency */ - splitfreqs[2*nextentry+1] = nextentry; /* index for sorting */ - for (i = 0; i < splitlength; i++) - splitpatterns[nextentry*splitlength + i] = splitcomp[i]; - splitsizes[nextentry] = bpsize; - nextentry++; - } - } - numbiparts = nextentry; -} - -/* general remarks: - - - every entry in consbiparts is one node of the consensus tree - - for each node one has to know which taxa and which other nodes - are *directly* descending from it - - for every taxon/node number there is a flag that shows - whether it descends from the node or not - - '0' means that neither a taxon nor another node with the - corresponding number decends from the node - '1' means that the corresponding taxon descends from the node - '2' means that the corresponding node descends from the node - '3' means that the corresponding taxon and node descends from the node -*/ - -/* copy bipartition n of all different splitpatterns to consbiparts[k] */ -void copysplit(uli n, int k) -{ - int i, j, col; - uli z; - - col = 0; - for (i = 0; i < splitlength; i++) { - z = splitpatterns[n*splitlength + i]; - for (j = 0; j < 32 && col < Maxspc; j++) { - if (z & 1) consbiparts[k][col] = '1'; - else consbiparts[k][col] = '0'; - z = (z >> 1); - col++; - } - } -} - -/* compute majority rule consensus tree */ -void makeconsensus() -{ - int i, j, k, size, subnode; - char chari, charj; - - /* sort bipartition frequencies */ - qsort(splitfreqs, numbiparts, 2*sizeof(uli), ulicmp); - /* how many bipartitions are included in the consensus tree */ - consincluded = 0; - for (i = 0; i < numbiparts && i == consincluded; i++) { - if (2*splitfreqs[2*i] > Numtrial) consincluded = i + 1; - } - - /* collect all info about majority rule consensus tree */ - /* the +1 is due to the edge with the root */ - consconfid = new_ivector(consincluded + 1); - conssizes = new_ivector(2*consincluded + 2); - consbiparts = new_cmatrix(consincluded + 1, Maxspc); - - for (i = 0; i < consincluded; i++) { - /* copy partition to consbiparts */ - copysplit(splitfreqs[2*i+1], i); - /* frequency in percent (rounded to integer) */ - consconfid[i] = (int) floor(100.0*splitfreqs[2*i]/Numtrial + 0.5); - /* size of partition */ - conssizes[2*i] = splitsizes[splitfreqs[2*i+1]]; - conssizes[2*i+1] = i; - } - for (i = 0; i < Maxspc; i++) consbiparts[consincluded][i] = '1'; - consbiparts[consincluded][outgroup] = '0'; - consconfid[consincluded] = 100; - conssizes[2*consincluded] = Maxspc - 1; - conssizes[2*consincluded + 1] = consincluded; - - /* sort bipartitions according to cluster size */ - qsort(conssizes, consincluded + 1, 2*sizeof(int), intcmp); - - /* reconstruct consensus tree */ - for (i = 0; i < consincluded; i++) { /* try every node */ - size = conssizes[2*i]; /* size of current node */ - for (j = i + 1; j < consincluded + 1; j++) { - - /* compare only with nodes with more descendants */ - if (size == conssizes[2*j]) continue; - - /* check whether node i is a subnode of j */ - subnode = FALSE; - for (k = 0; k < Maxspc && !subnode; k++) { - chari = consbiparts[ conssizes[2*i+1] ][k]; - if (chari != '0') { - charj = consbiparts[ conssizes[2*j+1] ][k]; - if (chari == charj || charj == '3') subnode = TRUE; - } - } - - /* if i is a subnode of j change j accordingly */ - if (subnode) { - /* remove subnode i from j */ - for (k = 0; k < Maxspc; k++) { - chari = consbiparts[ conssizes[2*i+1] ][k]; - if (chari != '0') { - charj = consbiparts[ conssizes[2*j+1] ][k]; - if (chari == charj) - consbiparts[ conssizes[2*j+1] ][k] = '0'; - else if (charj == '3') { - if (chari == '1') - consbiparts[ conssizes[2*j+1] ][k] = '2'; - else if (chari == '2') - consbiparts[ conssizes[2*j+1] ][k] = '1'; - else { - /* Consensus tree [1] */ - FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR H TO DEVELOPERS\n\n\n"); - exit(1); - } - } else { - /* Consensus tree [2] */ - FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR I TO DEVELOPERS\n\n\n"); - exit(1); - } - } - } - /* add link to subnode i in node j */ - charj = consbiparts[ conssizes[2*j+1] ][ conssizes[2*i+1] ]; - if (charj == '0') - consbiparts[ conssizes[2*j+1] ][ conssizes[2*i+1] ] = '2'; - else if (charj == '1') - consbiparts[ conssizes[2*j+1] ][ conssizes[2*i+1] ] = '3'; - else { - /* Consensus tree [3] */ - FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR J TO DEVELOPERS\n\n\n"); - exit(1); - } - } - } - } -} - -/* prototype for recursion */ -void writenode(FILE *treefile, int node); - -/* write node (writeconsensustree) */ -void writenode(FILE *treefile, int node) -{ - int i, first; - - fprintf(treefile, "("); - column++; - /* write descending nodes */ - first = TRUE; - for (i = 0; i < Maxspc; i++) { - if (consbiparts[node][i] == '2' || - consbiparts[node][i] == '3') { - if (first) first = FALSE; - else { - fprintf(treefile, ","); - column++; - } - if (column > 60) { - column = 2; - fprintf(treefile, "\n"); - } - /* write node i */ - writenode(treefile, i); - - /* reliability value as internal label */ - fprintf(treefile, "%d", consconfid[i]); - - column = column + 3; - } - } - /* write descending taxa */ - for (i = 0; i < Maxspc; i++) { - if (consbiparts[node][i] == '1' || - consbiparts[node][i] == '3') { - if (first) first = FALSE; - else { - fprintf(treefile, ","); - column++; - } - if (column > 60) { - column = 2; - fprintf(treefile, "\n"); - } - column += fputid(treefile, i); - } - } - fprintf(treefile, ")"); - column++; -} - -/* write consensus tree */ -void writeconsensustree(FILE *treefile) -{ - int i, first; - - column = 1; - fprintf(treefile, "("); - column += fputid(treefile, outgroup) + 2; - fprintf(treefile, ","); - /* write descending nodes */ - first = TRUE; - for (i = 0; i < Maxspc; i++) { - if (consbiparts[consincluded][i] == '2' || - consbiparts[consincluded][i] == '3') { - if (first) first = FALSE; - else { - fprintf(treefile, ","); - column++; - } - if (column > 60) { - column = 2; - fprintf(treefile, "\n"); - } - /* write node i */ - writenode(treefile, i); - - /* reliability value as internal label */ - fprintf(treefile, "%d", consconfid[i]); - - column = column + 3; - } - } - /* write descending taxa */ - for (i = 0; i < Maxspc; i++) { - if (consbiparts[consincluded][i] == '1' || - consbiparts[consincluded][i] == '3') { - if (first) first = FALSE; - else { - fprintf(treefile, ","); - column++; - } - if (column > 60) { - column = 2; - fprintf(treefile, "\n"); - } - column += fputid(treefile, i); - } - } - fprintf(treefile, ");\n"); -} - -/* prototype for recursion */ -void nodecoordinates(int node); - -/* establish node coordinates (plotconsensustree) */ -void nodecoordinates(int node) -{ - int i, ymin, ymax, xcoordinate; - - /* first establish coordinates of descending nodes */ - for (i = 0; i < Maxspc; i++) { - if (consbiparts[node][i] == '2' || - consbiparts[node][i] == '3') - nodecoordinates(i); - } - - /* then establish coordinates of descending taxa */ - for (i = 0; i < Maxspc; i++) { - if (consbiparts[node][i] == '1' || - consbiparts[node][i] == '3') { - /* y-coordinate of taxon i */ - ycortax[i] = ytaxcounter; - ytaxcounter = ytaxcounter - 2; - } - } - - /* then establish coordinates of this node */ - ymin = 2*Maxspc - 2; - ymax = 0; - xcoordinate = 0; - for (i = 0; i < Maxspc; i++) { - if (consbiparts[node][i] == '2' || - consbiparts[node][i] == '3') { - if (ycor[i] > ymax) ymax = ycor[i]; - if (ycor[i] < ymin) ymin = ycor[i]; - if (xcor[i] > xcoordinate) xcoordinate = xcor[i]; - } - } - for (i = 0; i < Maxspc; i++) { - if (consbiparts[node][i] == '1' || - consbiparts[node][i] == '3') { - if (ycortax[i] > ymax) ymax = ycortax[i]; - if (ycortax[i] < ymin) ymin = ycortax[i]; - } - } - ycormax[node] = ymax; - ycormin[node] = ymin; - ycor[node] = (int) floor(0.5*(ymax + ymin) + 0.5); - if (xcoordinate == 0) xcoordinate = 9; - xcor[node] = xcoordinate + 4; -} - -/* prototype for recursion */ -void drawnode(int node, int xold); - -/* drawnode (plotconsensustree) */ -void drawnode(int node, int xold) -{ - int i, j; - char buf[4]; - - /* first draw vertical line */ - for (i = ycormin[node] + 1; i < ycormax[node]; i++) - treepict[xcor[node]][i] = ':'; - - /* then draw descending nodes */ - for (i = 0; i < Maxspc; i++) { - if (consbiparts[node][i] == '2' || - consbiparts[node][i] == '3') - drawnode(i, xcor[node]); - } - - /* then draw descending taxa */ - for (i = 0; i < Maxspc; i++) { - if (consbiparts[node][i] == '1' || - consbiparts[node][i] == '3') { - treepict[xcor[node]][ycortax[i]] = ':'; - for (j = xcor[node] + 1; j < xsize-10; j++) - treepict[j][ycortax[i]] = '-'; - for (j = 0; j < 10; j++) - treepict[xsize-10+j][ycortax[i]] = Identif[i][j]; - } - } - - /* then draw internal edge with consensus value */ - treepict[xold][ycor[node]] = ':'; - treepict[xcor[node]][ycor[node]] = ':'; - for (i = xold + 1; i < xcor[node]-3; i++) - treepict[i][ycor[node]] = '-'; - sprintf(buf, "%d", consconfid[node]); - if (consconfid[node] == 100) { - treepict[xcor[node]-3][ycor[node]] = buf[0]; - treepict[xcor[node]-2][ycor[node]] = buf[1]; - treepict[xcor[node]-1][ycor[node]] = buf[2]; - } else { - treepict[xcor[node]-3][ycor[node]] = '-'; - treepict[xcor[node]-2][ycor[node]] = buf[0]; - treepict[xcor[node]-1][ycor[node]] = buf[1]; - } -} - -/* plot consensus tree */ -void plotconsensustree(FILE *plotfp) -{ - int i, j, yroot, startree; - - /* star tree or no star tree */ - if (consincluded == 0) { - startree = TRUE; - consincluded = 1; /* avoids problems with malloc */ - } else - startree = FALSE; - - /* memory for x-y-coordinates of each bipartition */ - xcor = new_ivector(consincluded); - ycor = new_ivector(consincluded); - ycormax = new_ivector(consincluded); - ycormin = new_ivector(consincluded); - if (startree) consincluded = 0; /* avoids problems with malloc */ - - /* y-coordinates of each taxon */ - ycortax = new_ivector(Maxspc); - ycortax[outgroup] = 0; - - /* establish coordinates */ - ytaxcounter = 2*Maxspc - 2; - - /* first establish coordinates of descending nodes */ - for (i = 0; i < Maxspc; i++) { - if (consbiparts[consincluded][i] == '2' || - consbiparts[consincluded][i] == '3') - nodecoordinates(i); - } - - /* then establish coordinates of descending taxa */ - for (i = 0; i < Maxspc; i++) { - if (consbiparts[consincluded][i] == '1' || - consbiparts[consincluded][i] == '3') { - /* y-coordinate of taxon i */ - ycortax[i] = ytaxcounter; - ytaxcounter = ytaxcounter - 2; - } - } - - /* then establish length of root edge and size of whole tree */ - yroot = 0; - xsize = 0; - for (i = 0; i < Maxspc; i++) { - if (consbiparts[consincluded][i] == '2' || - consbiparts[consincluded][i] == '3') { - if (ycor[i] > yroot) yroot = ycor[i]; - if (xcor[i] > xsize) xsize = xcor[i]; - } - } - for (i = 0; i < Maxspc; i++) { - if (consbiparts[consincluded][i] == '1' || - consbiparts[consincluded][i] == '3') { - if (ycortax[i] > yroot) yroot = ycortax[i]; - } - } - if (xsize == 0) xsize = 9; - /* size in x direction inclusive one blank on the left */ - xsize = xsize + 6; - - /* change all x-labels so that (0,0) is down-left */ - for (i = 0; i < consincluded; i++) - xcor[i] = xsize-1-xcor[i]; - - /* draw tree */ - treepict = new_cmatrix(xsize, 2*Maxspc-1); - for (i = 0; i < xsize; i++) - for (j = 0; j < 2*Maxspc-1; j++) - treepict[i][j] = ' '; - - /* draw root */ - for (i = 1; i < yroot; i++) - treepict[1][i] = ':'; - treepict[1][0] = ':'; - for (i = 2; i < xsize - 10; i++) - treepict[i][0] = '-'; - for (i = 0; i < 10; i++) - treepict[xsize-10+i][0] = Identif[outgroup][i]; - - /* then draw descending nodes */ - for (i = 0; i < Maxspc; i++) { - if (consbiparts[consincluded][i] == '2' || - consbiparts[consincluded][i] == '3') - drawnode(i, 1); - } - - /* then draw descending taxa */ - for (i = 0; i < Maxspc; i++) { - if (consbiparts[consincluded][i] == '1' || - consbiparts[consincluded][i] == '3') { - treepict[1][ycortax[i]] = ':'; - for (j = 2; j < xsize-10; j++) - treepict[j][ycortax[i]] = '-'; - for (j = 0; j < 10; j++) - treepict[xsize-10+j][ycortax[i]] = Identif[i][j]; - } - } - - /* plot tree */ - for (i = 2*Maxspc-2; i > -1; i--) { - for (j = 0; j < xsize; j++) - fputc(treepict[j][i], plotfp); - fputc('\n', plotfp); - } - - free_ivector(xcor); - free_ivector(ycor); - free_ivector(ycormax); - free_ivector(ycormin); - free_ivector(ycortax); - free_cmatrix(treepict); -} - - - -/******************************************************************************/ -/* storing and evaluating quartet branching information */ -/******************************************************************************/ - -/* general remarks: - - for a quartet with the taxa a, b, c, d there are - three possible binary trees: - - 1) (a,b)-(c,d) - 2) (a,c)-(b,d) - 3) (a,d)-(b,c) - - For every quartet information about its branching structure is - stored. With the functions readquartet and writequartet - this information can be accessed. For every quartet (a,b,c,d) - with a < b < c < d (taxa) the branching information is encoded - using 4 bits: - - value 8 4 2 1 - +-------------+-------------+-------------+-------------+ - | not used | tree 3 | tree 2 | tree 1 | - +-------------+-------------+-------------+-------------+ - - If the branching structure of the taxa corresponds to one of the - three trees the corresponding bit is set. If the branching structure - is unclear because two of the three trees have the same maximum - likelihood value the corresponding two bits are set. If the branching - structure is completely unknown all the bits are set (the highest - bit is always cleared because it is not used). - -*/ - -/* allocate memory for quartets */ -unsigned char *mallocquartets(int taxa) -{ - uli nc, numch; - unsigned char *qinfo; - - /* compute number of quartets */ - Numquartets = (uli) taxa*(taxa-1)*(taxa-2)*(taxa-3)/24; - if (Numquartets % 2 == 0) { /* even number */ - numch = Numquartets/2; - } else { /* odd number */ - numch = (Numquartets + 1)/2; - } - /* allocate memory */ - qinfo = (unsigned char *) malloc(numch * sizeof(unsigned char) ); - if (qinfo == NULL) maerror("quartetinfo in mallocquartets"); - for (nc = 0; nc < numch; nc++) qinfo[nc] = 0; - return(qinfo); -} - -/* free quartet memory */ -void freequartets() -{ - free(quartetinfo); -} - -/* read quartet info - a < b < c < d */ -unsigned char readquartet(int a, int b, int c, int d) -{ - uli qnum; - - qnum = (uli) a - + (uli) b*(b-1)/2 - + (uli) c*(c-1)*(c-2)/6 - + (uli) d*(d-1)*(d-2)*(d-3)/24; - if (qnum % 2 == 0) { /* even number */ - /* bits 0 to 3 */ - return (quartetinfo[qnum/2] & (unsigned char) 15); - } else { /* odd number */ - /* bits 4 to 7 */ - return ((quartetinfo[(qnum-1)/2] & (unsigned char) 240)>>4); - } -} - -/* write quartet info - a < b < c < d, 0 <= info <= 15 */ -void writequartet(int a, int b, int c, int d, unsigned char info) -{ - uli qnum; - - qnum = (uli) a - + (uli) b*(b-1)/2 - + (uli) c*(c-1)*(c-2)/6 - + (uli) d*(d-1)*(d-2)*(d-3)/24; - if (qnum % 2 == 0) { /* even number */ - /* bits 0 to 3 */ - quartetinfo[qnum/2] = - ((quartetinfo[qnum/2] & (unsigned char) 240) | - (info & (unsigned char) 15)); - } else { /* odd number */ - /* bits 4 to 7 */ - quartetinfo[(qnum-1)/2] = - ((quartetinfo[(qnum-1)/2] & (unsigned char) 15) | - ((info & (unsigned char) 15)<<4)); - } -} - -/* prototypes */ -void openfiletowrite(FILE **, char[], char[]); -void closefile(FILE *); - -/* sorts three doubles in descending order */ -void sort3doubles(dvector num, ivector order) -{ - if (num[0] > num[1]) { - if(num[2] > num[0]) { - order[0] = 2; - order[1] = 0; - order[2] = 1; - } else if (num[2] < num[1]) { - order[0] = 0; - order[1] = 1; - order[2] = 2; - } else { - order[0] = 0; - order[1] = 2; - order[2] = 1; - } - } else { - if(num[2] > num[1]) { - order[0] = 2; - order[1] = 1; - order[2] = 0; - } else if (num[2] < num[0]) { - order[0] = 1; - order[1] = 0; - order[2] = 2; - } else { - order[0] = 1; - order[1] = 2; - order[2] = 0; - } - } -} - -/* checks out all possible quartets */ -void computeallquartets() -{ - double onethird; - uli nq; - unsigned char treebits[3]; - FILE *lhfp; -# if ! PARALLEL - int a, b, c, i; - double qc2, mintogo, minutes, hours, temp; - double temp1, temp2, temp3; - unsigned char discreteweight[3]; -# endif - - onethird = 1.0/3.0; - treebits[0] = (unsigned char) 1; - treebits[1] = (unsigned char) 2; - treebits[2] = (unsigned char) 4; - - if (show_optn) { /* list all unresolved quartets */ - openfiletowrite(&unresfp, UNRESOLVED, "unresolved quartet trees"); - fprintf(unresfp, "List of all completely unresolved quartets:\n\n"); - } - - nq = 0; - badqs = 0; - - /* start timer - percentage of completed quartets */ - time(&time0); - time1 = time0; - mflag = 0; - -# if PARALLEL - { - schedtype sched; - int flag; - MPI_Status stat; - int dest = 1; - uli qaddr =0; - uli qamount=0; - int qblocksent = 0; - int apr; - uli sq, noq; - initsched(&sched, numquarts(Maxspc), PP_NumProcs-1, 4); - qamount=sgss(&sched); - while (qamount > 0) { - if (PP_emptyslave()) { - PP_RecvQuartBlock(0, &sq, &noq, quartetinfo, &apr); - qblocksent -= noq; - } - dest = PP_getslave(); - PP_SendDoQuartBlock(dest, qaddr, qamount, (approxqp ? APPROX : EXACT)); - qblocksent += qamount; - qaddr += qamount; - qamount=sgss(&sched); - - MPI_Iprobe(MPI_ANY_SOURCE, PP_QUARTBLOCKSPECS, PP_Comm, &flag, &stat); - while (flag) { - PP_RecvQuartBlock(0, &sq, &noq, quartetinfo, &apr); - qblocksent -= noq; - MPI_Iprobe(MPI_ANY_SOURCE, PP_QUARTBLOCKSPECS, PP_Comm, &flag, &stat); - } - } - while (qblocksent > 0) { - PP_RecvQuartBlock(0, &sq, &noq, quartetinfo, &apr); - qblocksent -= noq; - } - } -# else /* PARALLEL */ - - addtimes(GENERAL, &tarr); - if (savequartlh_optn) { - openfiletowrite(&lhfp, ALLQUARTLH, "all quartet likelihoods"); - if (saveqlhbin_optn) writetpqfheader(Maxspc, lhfp, 3); - else writetpqfheader(Maxspc, lhfp, 4); - } - - for (i = 3; i < Maxspc; i++) - for (c = 2; c < i; c++) - for (b = 1; b < c; b++) - for (a = 0; a < b; a++) { - nq++; - - /* generate message every 15 minutes */ - /* check timer */ - time(&time2); - if ( (time2 - time1) > 900) { - /* every 900 seconds */ - /* percentage of completed quartets */ - if (mflag == 0) { - FPRINTF(STDOUTFILE "\n"); - mflag = 1; - } - qc2 = 100.*nq/Numquartets; - mintogo = (100.0-qc2) * - (double) (time2-time0)/60.0/qc2; - hours = floor(mintogo/60.0); - minutes = mintogo - 60.0*hours; - FPRINTF(STDOUTFILE "%.2f%%", qc2); - FPRINTF(STDOUTFILE " completed (remaining"); - FPRINTF(STDOUTFILE " time: %.0f", hours); - FPRINTF(STDOUTFILE " hours %.0f", minutes); - FPRINTF(STDOUTFILE " minutes)\n"); - fflush(STDOUT); - time1 = time2; - } - - /* maximum likelihood values */ - - /* exact or approximate maximum likelihood values */ - compute_quartlklhds(a,b,c,i,&qweight[0],&qweight[1],&qweight[2], (approxqp ? APPROX : EXACT)); - - if (savequartlh_optn) { - if (saveqlhbin_optn) - fwrite(qweight, sizeof(double), 3, lhfp); - else - fprintf(lhfp, "(%d,%d,%d,%d)\t%f\t%f\t%f\n", a, b, c, i, - qweight[0], qweight[1], qweight[2]); - } - - /* sort in descending order */ - sort3doubles(qweight, qworder); - - if (usebestq_optn) { - sqorder[2] = 2; - discreteweight[sqorder[2]] = treebits[qworder[0]]; - if (qweight[qworder[0]] == qweight[qworder[1]]) { - discreteweight[sqorder[2]] = discreteweight[sqorder[2]] || treebits[qworder[1]]; - if (qweight[qworder[1]] == qweight[qworder[2]]) { - discreteweight[sqorder[2]] = discreteweight[sqorder[2]] || treebits[qworder[2]]; - discreteweight[sqorder[2]] = 7; - } - } - } else { - - /* compute Bayesian weights */ - qweight[qworder[1]] = exp(qweight[qworder[1]]-qweight[qworder[0]]); - qweight[qworder[2]] = exp(qweight[qworder[2]]-qweight[qworder[0]]); - qweight[qworder[0]] = 1.0; - temp = qweight[0] + qweight[1] + qweight[2]; - qweight[0] = qweight[0]/temp; - qweight[1] = qweight[1]/temp; - qweight[2] = qweight[2]/temp; - - /* square deviations */ - temp1 = 1.0 - qweight[qworder[0]]; - sqdiff[0] = temp1 * temp1 + - qweight[qworder[1]] * qweight[qworder[1]] + - qweight[qworder[2]] * qweight[qworder[2]]; - discreteweight[0] = treebits[qworder[0]]; - - temp1 = 0.5 - qweight[qworder[0]]; - temp2 = 0.5 - qweight[qworder[1]]; - sqdiff[1] = temp1 * temp1 + temp2 * temp2 + - qweight[qworder[2]] * qweight[qworder[2]]; - discreteweight[1] = treebits[qworder[0]] + treebits[qworder[1]]; - - temp1 = onethird - qweight[qworder[0]]; - temp2 = onethird - qweight[qworder[1]]; - temp3 = onethird - qweight[qworder[2]]; - sqdiff[2] = temp1 * temp1 + temp2 * temp2 + temp3 * temp3; - discreteweight[2] = (unsigned char) 7; - - /* sort in descending order */ - sort3doubles(sqdiff, sqorder); - } - - /* determine best discrete weight */ - writequartet(a, b, c, i, discreteweight[sqorder[2]]); - - /* counting completely unresolved quartets */ - if (discreteweight[sqorder[2]] == 7) { - badqs++; - badtaxon[a]++; - badtaxon[b]++; - badtaxon[c]++; - badtaxon[i]++; - if (show_optn) { - fputid10(unresfp, a); - fprintf(unresfp, " "); - fputid10(unresfp, b); - fprintf(unresfp, " "); - fputid10(unresfp, c); - fprintf(unresfp, " "); - fputid(unresfp, i); - fprintf(unresfp, "\n"); - } - } - addtimes(QUARTETS, &tarr); - } - if (savequartlh_optn) { - closefile(lhfp); - } - if (show_optn) - closefile(unresfp); - if (mflag == 1) - FPRINTF(STDOUTFILE "\n"); -# endif /* PARALLEL */ - -} - -/* check the branching structure between the leaves (not the taxa!) - A, B, C, and I (A, B, C, I don't need to be ordered). As a result, - the two leaves that are closer related to each other than to leaf I - are found in chooseA and chooseB. If the branching structure is - not uniquely defined, ChooseA and ChooseB are chosen randomly - from the possible taxa */ -void checkquartet(int A, int B, int C, int I) -{ - int i, j, a, b, taxon[5], leaf[5], ipos; - unsigned char qresult; - int notunique = FALSE; - - /* The relationship between leaves and taxa is defined by trueID */ - taxon[1] = trueID[A]; /* taxon number */ - leaf[1] = A; /* leaf number */ - taxon[2] = trueID[B]; - leaf[2] = B; - taxon[3] = trueID[C]; - leaf[3] = C; - taxon[4] = trueID[I]; - leaf[4] = I; - - /* sort for taxa */ - /* Source: Numerical Recipes (PIKSR2.C) */ - for (j = 2; j <= 4; j++) { - a = taxon[j]; - b = leaf[j]; - i = j-1; - while (i > 0 && taxon[i] > a) { - taxon[i+1] = taxon[i]; - leaf[i+1] = leaf[i]; - i--; - } - taxon[i+1] = a; - leaf[i+1] = b; - } - - /* where is leaf I ? */ - ipos = 1; - while (leaf[ipos] != I) ipos++; - - /* look at sequence quartet */ - qresult = readquartet(taxon[1], taxon[2], taxon[3], taxon[4]); - - /* chooseA and chooseB */ - do { - switch (qresult) { - - /* one single branching structure */ - - /* 001 */ - case 1: if (ipos == 1 || ipos == 2) { - chooseA = leaf[3]; - chooseB = leaf[4]; - } else { - chooseA = leaf[1]; - chooseB = leaf[2]; - } - notunique = FALSE; - break; - - /* 010 */ - case 2: if (ipos == 1 || ipos == 3) { - chooseA = leaf[2]; - chooseB = leaf[4]; - } else { - chooseA = leaf[1]; - chooseB = leaf[3]; - } - notunique = FALSE; - break; - - /* 100 */ - case 4: if (ipos == 1 || ipos == 4) { - chooseA = leaf[2]; - chooseB = leaf[3]; - } else { - chooseA = leaf[1]; - chooseB = leaf[4]; - } - notunique = FALSE; - break; - - /* two possible branching structures */ - - /* 011 */ - case 3: if (randominteger(2)) qresult = 1; - else qresult = 2; - notunique = TRUE; - break; - - /* 101 */ - case 5: if (randominteger(2)) qresult = 1; - else qresult = 4; - notunique = TRUE; - break; - - /* 110 */ - case 6: if (randominteger(2)) qresult = 2; - else qresult = 4; - notunique = TRUE; - break; - - /* three possible branching structures */ - - /* 111 */ - case 7: qresult = (1 << randominteger(3)); /* 1, 2, or 4 */ - notunique = TRUE; - break; - - default: /* Program error [checkquartet] */ -#if PARALLEL - FPRINTF(STDOUTFILE "\n\n\n(%2d)HALT: PLEASE REPORT ERROR K-PARALLEL TO DEVELOPERS (%d,%d,%d,%d) = %ld\n\n\n", - PP_Myid, taxon[1], taxon[2], taxon[3], taxon[4], - quart2num(taxon[1], taxon[2], taxon[3], taxon[4])); -#else - FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR K TO DEVELOPERS\n\n\n"); -#endif - - } - } while (notunique); - - return; -} - diff --git a/forester/archive/RIO/others/puzzle_mod/src/sched.c b/forester/archive/RIO/others/puzzle_mod/src/sched.c deleted file mode 100644 index 3f1c0f6..0000000 --- a/forester/archive/RIO/others/puzzle_mod/src/sched.c +++ /dev/null @@ -1,423 +0,0 @@ -/* - * sched.c - * - * - * Part of TREE-PUZZLE 5.0 (June 2000) - * - * (c) 1999-2000 by Heiko A. Schmidt, Korbinian Strimmer, - * M. Vingron, and Arndt von Haeseler - * (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler - * - * All parts of the source except where indicated are distributed under - * the GNU public licence. See http://www.opensource.org for details. - */ - - -#include -#include -#include -#include "sched.h" -/* #include "ppuzzle.h" */ - -#define STDOUT stdout -#ifndef PARALLEL /* because printf() runs significantly faster */ - /* than fprintf(stdout) on an Apple McIntosh */ - /* (HS) */ -# define FPRINTF printf -# define STDOUTFILE -#else -# define FPRINTF fprintf -# define STDOUTFILE STDOUT, -#endif - -int scinit; -int ssinit; -int fscinit; -int gssinit; -int tssinit; - -int n, chunksize; -int p; - -#ifdef SCHEDTEST - schedtype testsched; -#endif - -void printsched(schedtype sch) -{ - FPRINTF(STDOUTFILE "Current scheduling status:\n"); - FPRINTF(STDOUTFILE " truetasks=%5ld - alltasks=%5ld - numtasks=%5ld - numprocs=%5d\n", - sch.truetasks, sch.alltasks, sch.numtasks, sch.numprocs); - FPRINTF(STDOUTFILE " delta =%5d - overhead=%5d - rest =%5d - inited =%5d\n", - sch.delta, sch.overhead, sch.rest, sch.inited); - FPRINTF(STDOUTFILE " nconst =%5d - fconst =%5f - lconst =%5f - kconst =%5f\n", - sch.nconst, sch.fconst, sch.lconst, sch.kconst); -} - -void initsched(schedtype *sch, uli tasks, int procs, uli minchunk) -{ - if (minchunk < 1) minchunk = 1; - (*sch).minchunk = minchunk; - (*sch).truetasks = tasks; - (*sch).rest = (int)((*sch).truetasks % (*sch).minchunk); - (*sch).alltasks = (tasks - (*sch).rest); - (*sch).numtasks = (*sch).alltasks; - (*sch).numprocs = procs; - (*sch).delta = 0; - (*sch).overhead = 0; - (*sch).nconst = 0; - (*sch).fconst = 0; - (*sch).lconst = 0; - (*sch).kconst = 0; - (*sch).inited = 0; - -# ifdef PVERBOSE1 - printsched(*sch); -# endif /* PVERBOSE1 */ -} - -/************************************** -* Static Chunking -**************************************/ -uli sc(schedtype *sch) -{ - uli tmp; - - if ((*sch).inited == 0) { - (*sch).overhead = (*sch).alltasks % (*sch).numprocs; - (*sch).delta = ((*sch).alltasks - (*sch).overhead) / (*sch).numprocs; - (*sch).inited ++; - } - - if (!(*sch).overhead) { - if ((*sch).numtasks >= (*sch).delta) - tmp = (uli)(*sch).delta; - else - tmp = 0; - } else { - if ((*sch).numtasks >= ((*sch).delta + 1)) { - tmp = (uli)(*sch).delta + 1; - (*sch).overhead--; - } else - tmp = 0; - } - - /* correction */ - if ((tmp % (*sch).minchunk) > 0) { - tmp += (*sch).minchunk - (tmp % (*sch).minchunk); - } - - (*sch).numtasks -= tmp; - - if ((*sch).numtasks == 0) { - tmp += (uli)(*sch).rest; - (*sch).rest = 0; - } - return tmp; -} /* SC */ - - -/************************************** -* Self Scheduling -**************************************/ -uli ss(schedtype *sch) -{ - uli tmp; - - if ((*sch).inited == 0) { - (*sch).inited ++; - } - - if ((*sch).numtasks >= 1) - tmp = 1; - else - tmp = (*sch).numtasks; - - /* correction */ - if ((tmp % (*sch).minchunk) > 0) { - tmp += (*sch).minchunk - (tmp % (*sch).minchunk); - } - - (*sch).numtasks -= tmp; - - if ((*sch).numtasks == 0) { - tmp += (uli)(*sch).rest; - (*sch).rest = 0; - } - - return tmp; -} /* SS */ - - -/************************************** -* fixed-size chunking -**************************************/ -int fsc() -{ - static int R ; - static int delta ; - static int overhead; - - int tmp; - - if (fscinit == 0) { - R = n; - overhead = n % p; - delta = (n - overhead) / p; - fscinit ++; - } - - if (!overhead) { - if (R >= delta) - tmp = delta; - else - tmp = 0; - } else { - if (R >= (delta + 1)) { - tmp = delta + 1; - overhead--; - } else - tmp = 0; - } - - R -= tmp; - return tmp; -} /* FSC */ - - -/************************************** -* Guided Self Scheduling -**************************************/ -uli gss(schedtype *sch) -{ - uli tmp; - - if ((*sch).inited == 0) { - (*sch).inited ++; - } - - if ((*sch).numtasks >= 1) { - tmp = (uli)ceil((*sch).numtasks / (*sch).numprocs); - if (tmp == 0) tmp = 1; - } else - tmp = 0; - - /* correction */ - if ((tmp % (*sch).minchunk) > 0) { - tmp += (*sch).minchunk - (tmp % (*sch).minchunk); - } - - (*sch).numtasks -= tmp; - - if ((*sch).numtasks == 0) { - tmp += (uli)(*sch).rest; - (*sch).rest = 0; - } - return tmp; -} /* GSS */ - -/************************************** -* Smooth Guided Self Scheduling -**************************************/ -uli sgss(schedtype *sch) -{ - uli tmp; - - if ((*sch).inited == 0) { - (*sch).inited ++; - } - - if ((*sch).numtasks >= 1) { - tmp = (uli)ceil(((*sch).numtasks / (*sch).numprocs) / 2); - if (tmp == 0) tmp = 1; - } else - tmp = 0; - - /* correction */ - if ((tmp % (*sch).minchunk) > 0) { - tmp += (*sch).minchunk - (tmp % (*sch).minchunk); - } - - (*sch).numtasks -= tmp; - - if ((*sch).numtasks == 0) { - tmp += (uli)(*sch).rest; - (*sch).rest = 0; - } - return tmp; -} /* SGSS */ - - -/************************************** -* Trapezoid Self Scheduling -**************************************/ -uli tss(schedtype *sch) -{ - uli tmp; - - if ((*sch).inited == 0) { - (*sch).fconst = ceil((*sch).numtasks / (2*(*sch).numprocs)); - if ((*sch).fconst == 0) (*sch).fconst = 1; - (*sch).lconst = 1; - (*sch).nconst = ceil( (2*n) / ((*sch).fconst + (*sch).lconst) ); - (*sch).ddelta = (((*sch).fconst - (*sch).lconst) / ((*sch).nconst - 1)); - (*sch).kconst = (*sch).fconst; - FPRINTF(STDOUTFILE "f = n/2p = %.2f ; l = %.2f\n", (*sch).fconst, (*sch).lconst); - FPRINTF(STDOUTFILE "N = 2n/(f+l) = %d ; delta = (f-l)/(N-1) = %.2f\n", (*sch).nconst, (*sch).ddelta); - (*sch).inited ++; - } - - if ((*sch).kconst <= (double) (*sch).numtasks) { - tmp = (uli)ceil((*sch).kconst); - (*sch).kconst -= (*sch).ddelta; - } else { - tmp = (uli)(*sch).numtasks; - (*sch).kconst = 0.0; - } - - /* correction */ - if ((tmp % (*sch).minchunk) > 0) { - tmp += (*sch).minchunk - (tmp % (*sch).minchunk); - } - - (*sch).numtasks -= tmp; - - if ((*sch).numtasks == 0) { - tmp += (uli)(*sch).rest; - (*sch).rest = 0; - } - return tmp; - -} /* TSS */ - - -/******************/ - - -#ifdef SCHEDTEST - uli numquarts(int maxspc) - { - uli tmp; - int a, b, c, d; - - if (maxspc < 4) - return (uli)0; - else { - maxspc--; - a = maxspc-3; - b = maxspc-2; - c = maxspc-1; - d = maxspc; - - tmp = (uli) 1 + a + - (uli) b * (b-1) / 2 + - (uli) c * (c-1) * (c-2) / 6 + - (uli) d * (d-1) * (d-2) * (d-3) / 24; - return (tmp); - } - } /* numquarts */ -#endif - - - - -/************************************** -* main -**************************************/ -#ifdef SCHEDTEST -int main(int argc, char *argv[]) -{ - int tcount, - count, - lastsize, - size; - if ((argc > 4) || (argc < 3)) { - FPRINTF(STDOUTFILE "\n\n Usage: %s <# species> <# processors> []\n\n", argv[0]); - exit(1); - } - - chunksize = 1; - - switch(argc) { - case 4: - chunksize = atoi(argv[3]); - case 3: - n = numquarts(atoi(argv[1])); - p = atoi(argv[2]); - } - - FPRINTF(STDOUTFILE "proc=%6d\n", p); - FPRINTF(STDOUTFILE "task=%6d\n", n); - - initsched(&testsched, n, p, chunksize); - printsched(testsched); - - count=1; tcount = 0; - FPRINTF(STDOUTFILE "\n\n---------------------------\n"); - FPRINTF(STDOUTFILE "SC(sched) - Static Chunking\n"); - FPRINTF(STDOUTFILE "---------------------------\n\n"); - do { size = sc(&testsched); - if (size > 0) {FPRINTF(STDOUTFILE "%6d. chunk = %6d %c\n", count++, size , (size%chunksize) ? '!' : ' '); - tcount+=size;} - else FPRINTF(STDOUTFILE "%d tasks in %d chunks\n", tcount, (count-1)); - } while (size > 0); - - - initsched(&testsched, n, p, chunksize); - printsched(testsched); - - count=1; tcount = 0; - FPRINTF(STDOUTFILE "\n\n---------------------------\n"); - FPRINTF(STDOUTFILE "SS(sched) - Self Scheduling\n"); - FPRINTF(STDOUTFILE "---------------------------\n\n"); - do { size = ss(&testsched); - if (size > 0) {if (count==1) FPRINTF(STDOUTFILE "%6d. chunk = %6d %c\n", count++, size , (size%chunksize) ? '!' : ' '); - count++; - tcount+=size; - lastsize = size;} - else {FPRINTF(STDOUTFILE " ...\n"); - FPRINTF(STDOUTFILE "%6d. chunk = %6d %c\n", count++, lastsize , (lastsize%chunksize) ? '!' : ' '); - FPRINTF(STDOUTFILE "%d tasks in %d chunks\n", tcount, (count-1));} - } while (size > 0); - - -/**/ - count=1; tcount = 0; - FPRINTF(STDOUTFILE "\n\n---------------------------\n"); - FPRINTF(STDOUTFILE "FSC() - Fixed-Size Chunking\n"); - FPRINTF(STDOUTFILE "---------------------------\n\n"); - do { size = fsc(); - if (size > 0) {FPRINTF(STDOUTFILE "%6d. chunk = %6d %c\n", count++, size , (size%chunksize) ? '!' : ' '); - tcount+=size;} - else FPRINTF(STDOUTFILE "%d tasks in %d chunks\n", tcount, (count-1)); - } while (size > 0); -/**/ - - initsched(&testsched, n, p, chunksize); - printsched(testsched); - - count=1; tcount = 0; - FPRINTF(STDOUTFILE "\n\n-----------------------------------\n"); - FPRINTF(STDOUTFILE "GSS(sched) - Guided Self Scheduling\n"); - FPRINTF(STDOUTFILE "-----------------------------------\n\n"); - do { size = gss(&testsched); - if (size > 0) {FPRINTF(STDOUTFILE "%6d. chunk = %6d %c\n", count++, size , (size%chunksize) ? '!' : ' '); - tcount+=size;} - else FPRINTF(STDOUTFILE "%d tasks in %d chunks\n", tcount, (count-1)); - } while (size > 0); - - initsched(&testsched, n, p, chunksize); - printsched(testsched); - - count=1; tcount = 0; - FPRINTF(STDOUTFILE "\n\n--------------------------------------\n"); - FPRINTF(STDOUTFILE "TSS(sched) - Trapezoid Self Scheduling\n"); - FPRINTF(STDOUTFILE "--------------------------------------\n\n"); - do { size = tss(&testsched); - if (size > 0) {FPRINTF(STDOUTFILE "%6d. chunk = %6d %c\n", count++, size , (size%chunksize) ? '!' : ' '); - tcount+=size;} - else FPRINTF(STDOUTFILE "%d tasks in %d chunks\n", tcount, (count-1)); - } while (size > 0); - return (0); -} -#endif diff --git a/forester/archive/RIO/others/puzzle_mod/src/sched.h b/forester/archive/RIO/others/puzzle_mod/src/sched.h deleted file mode 100644 index e75bdd2..0000000 --- a/forester/archive/RIO/others/puzzle_mod/src/sched.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * sched.h - * - * - * Part of TREE-PUZZLE 5.0 (June 2000) - * - * (c) 1999-2000 by Heiko A. Schmidt, Korbinian Strimmer, - * M. Vingron, and Arndt von Haeseler - * (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler - * - * All parts of the source except where indicated are distributed under - * the GNU public licence. See http://www.opensource.org for details. - */ - - -#ifndef SCHED_H -#define SCHED_H -#ifndef SCHEDTEST -# include "util.h" -#else - typedef unsigned long int uli; -#endif - - -typedef struct sched_t{ - uli truetasks; - uli alltasks; - uli numtasks; - uli minchunk; - int numprocs; - int delta; - double ddelta; - int overhead; - int rest; - int nconst; - double fconst; - double lconst; - double kconst; - int inited; -} schedtype; - -void num2quart(uli qnum, int *a, int *b, int *c, int *d); -uli numquarts(int maxspc); -uli quart2num (int a, int b, int c, int d); - -void printsched(schedtype sch); -void initsched(schedtype *sch, uli tasks, int procs, uli minchunk); -uli sc(schedtype *sch); -uli gss(schedtype *sch); -uli sgss(schedtype *sch); -uli tss(schedtype *sch); - -#endif /* SCHED_H */ diff --git a/forester/archive/RIO/others/puzzle_mod/src/test b/forester/archive/RIO/others/puzzle_mod/src/test deleted file mode 100644 index a680df2..0000000 --- a/forester/archive/RIO/others/puzzle_mod/src/test +++ /dev/null @@ -1,19 +0,0 @@ -CC gcc -LIBS -lm -CFLAGS -g -O2 -DEFS -DPACKAGE=\"tree-puzzle\" -DVERSION=\"5.0\" -DHAVE_LIBM=1 -DSTDC_HEADERS=1 -DHAVE_LIMITS_H=1 -SET_MAKE - -HCC @HCC@ -MPICC -MPCC @MPCC@ - -MPICC -MPILIBS -MPIDEFS -MPICFLAGS - -PCC @PCC@ -PLIBS @PLIBS@ -PDEFS @PDEFS@ -PCFLAGS @PCFLAGS@ diff --git a/forester/archive/RIO/others/puzzle_mod/src/test.in b/forester/archive/RIO/others/puzzle_mod/src/test.in deleted file mode 100644 index 0dc7ddc..0000000 --- a/forester/archive/RIO/others/puzzle_mod/src/test.in +++ /dev/null @@ -1,19 +0,0 @@ -CC @CC@ -LIBS @LIBS@ -CFLAGS @CFLAGS@ -DEFS @DEFS@ -SET_MAKE @SET_MAKE@ - -HCC @HCC@ -MPICC @MPICC@ -MPCC @MPCC@ - -MPICC @MPICC@ -MPILIBS @MPILIBS@ -MPIDEFS @MPIDEFS@ -MPICFLAGS @MPICFLAGS@ - -PCC @PCC@ -PLIBS @PLIBS@ -PDEFS @PDEFS@ -PCFLAGS @PCFLAGS@ diff --git a/forester/archive/RIO/others/puzzle_mod/src/util.c b/forester/archive/RIO/others/puzzle_mod/src/util.c deleted file mode 100644 index 667758b..0000000 --- a/forester/archive/RIO/others/puzzle_mod/src/util.c +++ /dev/null @@ -1,748 +0,0 @@ -/* - * util.c - * - * - * Part of TREE-PUZZLE 5.0 (June 2000) - * - * (c) 1999-2000 by Heiko A. Schmidt, Korbinian Strimmer, - * M. Vingron, and Arndt von Haeseler - * (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler - * - * All parts of the source except where indicated are distributed under - * the GNU public licence. See http://www.opensource.org for details. - */ - - -#include "util.h" - -#define STDOUT stdout -#ifndef PARALLEL /* because printf() runs significantly faster */ - /* than fprintf(stdout) on an Apple McIntosh */ - /* (HS) */ -# define FPRINTF printf -# define STDOUTFILE -#else -# define FPRINTF fprintf -# define STDOUTFILE STDOUT, - extern int PP_NumProcs; - extern int PP_Myid; - long int PP_randn; - long int PP_rand; -#endif - - -/* - * memory allocation error handler - */ - -void maerror(char *message) -{ - FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (lack of memory: %s)\n\n", message); - FPRINTF(STDOUTFILE "Hint for Macintosh users:\n"); - FPRINTF(STDOUTFILE "Use the command of the Finder to increase the memory partition!\n\n"); - exit(1); -} - - -/* - * memory allocate double vectors, matrices, and cubes - */ - -dvector new_dvector(int n) -{ - dvector v; - - v = (dvector) malloc((unsigned) (n * sizeof(double))); - if (v == NULL) maerror("step 1 in new_dvector"); - - return v; -} - -dmatrix new_dmatrix(int nrow, int ncol) -{ - int i; - dmatrix m; - - m = (dmatrix) malloc((unsigned) (nrow * sizeof(dvector))); - if (m == NULL) maerror("step 1 in in new_dmatrix"); - - *m = (dvector) malloc((unsigned) (nrow * ncol * sizeof(double))); - if (*m == NULL) maerror("step 2 in in new_dmatrix"); - - for (i = 1; i < nrow; i++) m[i] = m[i-1] + ncol; - - return m; -} - -dcube new_dcube(int ntri, int nrow, int ncol) -{ - int i, j; - dcube c; - - c = (dcube) malloc((unsigned) (ntri * sizeof(dmatrix))); - if (c == NULL) maerror("step 1 in in new_dcube"); - - *c = (dmatrix) malloc((unsigned) (ntri * nrow * sizeof(dvector))); - if (*c == NULL) maerror("step 2 in in new_dcube"); - - **c = (dvector) malloc((unsigned) (ntri * nrow * ncol * sizeof(double))); - if (**c == NULL) maerror("step 3 in in new_dcube"); - - for (j = 1; j < nrow; j++) c[0][j] = c[0][j-1] + ncol; - - for (i = 1; i < ntri; i++) { - c[i] = c[i-1] + nrow; - c[i][0] = c[i-1][0] + nrow * ncol; - for (j = 1; j < nrow; j++) c[i][j] = c[i][j-1] + ncol; - } - - return c; -} - -void free_dvector(dvector v) -{ - free((double *) v); -} - -void free_dmatrix(dmatrix m) -{ - free((double *) *m); - free((double *) m); -} - -void free_dcube(dcube c) -{ - free((double *) **c); - free((double *) *c); - free((double *) c); -} - - -/* - * memory allocate char vectors, matrices, and cubes - */ - -cvector new_cvector(int n) -{ - cvector v; - - v = (cvector) malloc((unsigned)n * sizeof(char)); - if (v == NULL) maerror("step1 in new_cvector"); - - return v; -} - -cmatrix new_cmatrix(int nrow, int ncol) -{ - int i; - cmatrix m; - - m = (cmatrix) malloc((unsigned) (nrow * sizeof(cvector))); - if (m == NULL) maerror("step 1 in new_cmatrix"); - - *m = (cvector) malloc((unsigned) (nrow * ncol * sizeof(char))); - if (*m == NULL) maerror("step 2 in new_cmatrix"); - - for (i = 1; i < nrow; i++) m[i] = m[i-1] + ncol; - - return m; -} - -ccube new_ccube(int ntri, int nrow, int ncol) -{ - int i, j; - ccube c; - - c = (ccube) malloc((unsigned) (ntri * sizeof(cmatrix))); - if (c == NULL) maerror("step 1 in new_ccube"); - - *c = (cmatrix) malloc((unsigned) (ntri * nrow * sizeof(cvector))); - if (*c == NULL) maerror("step 2 in new_ccube"); - - **c = (cvector) malloc((unsigned) (ntri * nrow * ncol * sizeof(char))); - if (**c == NULL) maerror("step 3 in new_ccube"); - - for (j = 1; j < nrow; j++) c[0][j] = c[0][j-1] + ncol; - - for (i = 1; i < ntri; i++) { - c[i] = c[i-1] + nrow; - c[i][0] = c[i-1][0] + nrow * ncol; - for (j = 1; j < nrow; j++) c[i][j] = c[i][j-1] + ncol; - } - - return c; -} - -void free_cvector(cvector v) -{ - free((char *) v); -} - -void free_cmatrix(cmatrix m) -{ - free((char *) *m); - free((char *) m); -} - -void free_ccube(ccube c) -{ - free((char *) **c); - free((char *) *c); - free((char *) c); -} - - -/* - * memory allocate int vectors, matrices, and cubes - */ - -ivector new_ivector(int n) -{ - ivector v; - - v = (ivector) malloc((unsigned) (n * sizeof(int))); - if (v == NULL) maerror("step 1 in new_ivector"); - - return v; -} - -imatrix new_imatrix(int nrow, int ncol) -{ - int i; - imatrix m; - - m = (imatrix) malloc((unsigned) (nrow * sizeof(ivector))); - if (m == NULL) maerror("step 1 in new_imatrix"); - - *m = (ivector) malloc((unsigned) (nrow * ncol * sizeof(int))); - if (*m == NULL) maerror("step 2 in new_imatrix"); - - for (i = 1; i < nrow; i++) m[i] = m[i-1] + ncol; - - return m; -} - -icube new_icube(int ntri, int nrow, int ncol) -{ - int i, j; - icube c; - - c = (icube) malloc((unsigned) (ntri * sizeof(imatrix))); - if (c == NULL) maerror("step 1 in new_icube"); - - *c = (imatrix) malloc((unsigned) (ntri * nrow * sizeof(ivector))); - if (*c == NULL) maerror("step 2 in new_icube"); - - **c = (ivector) malloc((unsigned) (ntri * nrow * ncol * sizeof(int))); - if (**c == NULL) maerror("step 3 in new_icube"); - - for (j = 1; j < nrow; j++) c[0][j] = c[0][j-1] + ncol; - - for (i = 1; i < ntri; i++) { - c[i] = c[i-1] + nrow; - c[i][0] = c[i-1][0] + nrow * ncol; - for (j = 1; j < nrow; j++) c[i][j] = c[i][j-1] + ncol; - } - - return c; -} - -void free_ivector(ivector v) -{ - free((int *) v); -} - -void free_imatrix(imatrix m) -{ - free((int *) *m); - free((int *) m); -} - -void free_icube(icube c) -{ - free((int *) **c); - free((int *) *c); - free((int *) c); -} - - -/* - * memory allocate uli vectors, matrices, and cubes - */ - -ulivector new_ulivector(int n) -{ - ulivector v; - - v = (ulivector) malloc((unsigned) (n * sizeof(uli))); - if (v == NULL) maerror("step 1 in new_ulivector"); - - return v; -} - -ulimatrix new_ulimatrix(int nrow, int ncol) -{ - int i; - ulimatrix m; - - m = (ulimatrix) malloc((unsigned) (nrow * sizeof(ulivector))); - if (m == NULL) maerror("step 1 in new_ulimatrix"); - - *m = (ulivector) malloc((unsigned) (nrow * ncol * sizeof(uli))); - if (*m == NULL) maerror("step 2 in new_ulimatrix"); - - for (i = 1; i < nrow; i++) m[i] = m[i-1] + ncol; - - return m; -} - -ulicube new_ulicube(int ntri, int nrow, int ncol) -{ - int i, j; - ulicube c; - - c = (ulicube) malloc((unsigned) (ntri * sizeof(ulimatrix))); - if (c == NULL) maerror("step 1 in new_ulicube"); - - *c = (ulimatrix) malloc((unsigned) (ntri * nrow * sizeof(ulivector))); - if (*c == NULL) maerror("step 2 in new_ulicube"); - - **c = (ulivector) malloc((unsigned) (ntri * nrow * ncol * sizeof(uli))); - if (**c == NULL) maerror("step 3 in new_ulicube"); - - for (j = 1; j < nrow; j++) c[0][j] = c[0][j-1] + ncol; - - for (i = 1; i < ntri; i++) { - c[i] = c[i-1] + nrow; - c[i][0] = c[i-1][0] + nrow * ncol; - for (j = 1; j < nrow; j++) c[i][j] = c[i][j-1] + ncol; - } - - return c; -} - -void free_ulivector(ulivector v) -{ - free((uli *) v); -} - -void free_ulimatrix(ulimatrix m) -{ - free((uli *) *m); - free((uli *) m); -} - -void free_ulicube(ulicube c) -{ - free((uli *) **c); - free((uli *) *c); - free((uli *) c); -} - - -/******************************************************************************/ -/* random numbers generator (Numerical recipes) */ -/******************************************************************************/ - -/* definitions */ -#define IM1 2147483563 -#define IM2 2147483399 -#define AM (1.0/IM1) -#define IMM1 (IM1-1) -#define IA1 40014 -#define IA2 40692 -#define IQ1 53668 -#define IQ2 52774 -#define IR1 12211 -#define IR2 3791 -#define NTAB 32 -#define NDIV (1+IMM1/NTAB) -#define EPS 1.2e-7 -#define RNMX (1.0-EPS) - -/* variable */ -long idum; - -double randomunitintervall() -/* Long period (> 2e18) random number generator. Returns a uniform random - deviate between 0.0 and 1.0 (exclusive of endpoint values). - - Source: - Press et al., "Numerical recipes in C", Cambridge University Press, 1992 - (chapter 7 "Random numbers", ran2 random number generator) */ -{ - int j; - long k; - static long idum2=123456789; - static long iy=0; - static long iv[NTAB]; - double temp; - - if (idum <= 0) { - if (-(idum) < 1) - idum=1; - else - idum=-(idum); - idum2=(idum); - for (j=NTAB+7;j>=0;j--) { - k=(idum)/IQ1; - idum=IA1*(idum-k*IQ1)-k*IR1; - if (idum < 0) - idum += IM1; - if (j < NTAB) - iv[j] = idum; - } - iy=iv[0]; - } - k=(idum)/IQ1; - idum=IA1*(idum-k*IQ1)-k*IR1; - if (idum < 0) - idum += IM1; - k=idum2/IQ2; - idum2=IA2*(idum2-k*IQ2)-k*IR2; - if (idum2 < 0) - idum2 += IM2; - j=iy/NDIV; - iy=iv[j]-idum2; - iv[j] = idum; - if (iy < 1) - iy += IMM1; - if ((temp=AM*iy) > RNMX) - return RNMX; - else - return temp; -} - -#undef IM1 -#undef IM2 -#undef AM -#undef IMM1 -#undef IA1 -#undef IA2 -#undef IQ1 -#undef IQ2 -#undef IR1 -#undef IR2 -#undef NTAB -#undef NDIV -#undef EPS -#undef RNMX - -int initrandom(int seed) -{ - srand((unsigned) time(NULL)); - if (seed < 0) - seed = rand(); - idum=-(long) seed; -# ifdef PARALLEL - { - int n; - for (n=0; n= 0.0 ? fabs(a) : -fabs(a)) - -/* Brents method in one dimension */ -double brent(double ax, double bx, double cx, double (*f)(double), double tol, - double *foptx, double *f2optx, double fax, double fbx, double fcx) -{ - int iter; - double a,b,d=0,etemp,fu,fv,fw,fx,p,q,r,tol1,tol2,u,v,w,x,xm; - double xw,wv,vx; - double e=0.0; - - a=(ax < cx ? ax : cx); - b=(ax > cx ? ax : cx); - x=bx; - fx=fbx; - if (fax < fcx) { - w=ax; - fw=fax; - v=cx; - fv=fcx; - } else { - w=cx; - fw=fcx; - v=ax; - fv=fax; - } - for (iter=1;iter<=ITMAX;iter++) { - xm=0.5*(a+b); - tol2=2.0*(tol1=tol*fabs(x)+ZEPS); - if (fabs(x-xm) <= (tol2-0.5*(b-a))) { - *foptx = fx; - xw = x-w; - wv = w-v; - vx = v-x; - *f2optx = 2.0*(fv*xw + fx*wv + fw*vx)/ - (v*v*xw + x*x*wv + w*w*vx); - return x; - } - if (fabs(e) > tol1) { - r=(x-w)*(fx-fv); - q=(x-v)*(fx-fw); - p=(x-v)*q-(x-w)*r; - q=2.0*(q-r); - if (q > 0.0) p = -p; - q=fabs(q); - etemp=e; - e=d; - if (fabs(p) >= fabs(0.5*q*etemp) || p <= q*(a-x) || p >= q*(b-x)) - d=CGOLD*(e=(x >= xm ? a-x : b-x)); - else { - d=p/q; - u=x+d; - if (u-a < tol2 || b-u < tol2) - d=SIGN(tol1,xm-x); - } - } else { - d=CGOLD*(e=(x >= xm ? a-x : b-x)); - } - u=(fabs(d) >= tol1 ? x+d : x+SIGN(tol1,d)); - fu=(*f)(u); - if (fu <= fx) { - if (u >= x) a=x; else b=x; - SHFT(v,w,x,u) - SHFT(fv,fw,fx,fu) - } else { - if (u < x) a=u; else b=u; - if (fu <= fw || w == x) { - v=w; - w=u; - fv=fw; - fw=fu; - } else if (fu <= fv || v == x || v == w) { - v=u; - fv=fu; - } - } - } - *foptx = fx; - xw = x-w; - wv = w-v; - vx = v-x; - *f2optx = 2.0*(fv*xw + fx*wv + fw*vx)/ - (v*v*xw + x*x*wv + w*w*vx); - return x; -} -#undef ITMAX -#undef CGOLD -#undef ZEPS -#undef SHFT -#undef SIGN -#undef GOLD -#undef GLIMIT -#undef TINY - -/* one-dimensional minimization - as input a lower and an upper limit and a trial - value for the minimum is needed: xmin < xguess < xmax - the function and a fractional tolerance has to be specified - onedimenmin returns the optimal x value and the value of the function - and its second derivative at this point - */ -double onedimenmin(double xmin, double xguess, double xmax, double (*f)(double), - double tol, double *fx, double *f2x) -{ - double eps, optx, ax, bx, cx, fa, fb, fc; - - /* first attempt to bracketize minimum */ - eps = xguess*tol*50.0; - ax = xguess - eps; - if (ax < xmin) ax = xmin; - bx = xguess; - cx = xguess + eps; - if (cx > xmax) cx = xmax; - - /* check if this works */ - fa = (*f)(ax); - fb = (*f)(bx); - fc = (*f)(cx); - - /* if it works use these borders else be conservative */ - if ((fa < fb) || (fc < fb)) { - if (ax != xmin) fa = (*f)(xmin); - if (cx != xmax) fc = (*f)(xmax); - optx = brent(xmin, xguess, xmax, f, tol, fx, f2x, fa, fb, fc); - } else - optx = brent(ax, bx, cx, f, tol, fx, f2x, fa, fb, fc); - - return optx; /* return optimal x */ -} - -/* two-dimensional minimization with borders and calculations of standard errors */ -/* we optimize along basis vectors - not very optimal but it seems to work well */ -void twodimenmin(double tol, - int active1, double min1, double *x1, double max1, double (*func1)(double), double *err1, - int active2, double min2, double *x2, double max2, double (*func2)(double), double *err2) -{ - int it, nump, change; - double x1old, x2old; - double fx, f2x; - - it = 0; - nump = 0; - - /* count number of parameters */ - if (active1) nump++; - if (active2) nump++; - - do { /* repeat until nothing changes any more */ - it++; - change = FALSE; - - /* optimize first variable */ - if (active1) { - - if ((*x1) <= min1) (*x1) = min1 + 0.2*(max1-min1); - if ((*x1) >= max1) (*x1) = max1 - 0.2*(max1-min1); - x1old = (*x1); - (*x1) = onedimenmin(min1, (*x1), max1, func1, tol, &fx, &f2x); - if ((*x1) < min1) (*x1) = min1; - if ((*x1) > max1) (*x1) = max1; - /* same tolerance as 1D minimization */ - if (fabs((*x1) - x1old) > 3.3*tol) change = TRUE; - - /* standard error */ - f2x = fabs(f2x); - if (1.0/(max1*max1) < f2x) (*err1) = sqrt(1.0/f2x); - else (*err1) = max1; - - } - - /* optimize second variable */ - if (active2) { - - if ((*x2) <= min2) (*x2) = min2 + 0.2*(max2-min2); - if ((*x2) >= max2) (*x2) = max2 - 0.2*(max2-min2); - x2old = (*x2); - (*x2) = onedimenmin(min2, (*x2), max2, func2, tol, &fx, &f2x); - if ((*x2) < min2) (*x2) = min2; - if ((*x2) > max2) (*x2) = max2; - /* same tolerance as 1D minimization */ - if (fabs((*x2) - x2old) > 3.3*tol) change = TRUE; - - /* standard error */ - f2x = fabs(f2x); - if (1.0/(max2*max2) < f2x) (*err2) = sqrt(1.0/f2x); - else (*err2) = max2; - - } - - if (nump == 1) return; - - } while (it != MAXITS && change); - - return; -} - diff --git a/forester/archive/RIO/others/puzzle_mod/src/util.h b/forester/archive/RIO/others/puzzle_mod/src/util.h deleted file mode 100644 index 20f37e5..0000000 --- a/forester/archive/RIO/others/puzzle_mod/src/util.h +++ /dev/null @@ -1,96 +0,0 @@ -/* - * util.h - * - * - * Part of TREE-PUZZLE 5.0 (June 2000) - * - * (c) 1999-2000 by Heiko A. Schmidt, Korbinian Strimmer, - * M. Vingron, and Arndt von Haeseler - * (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler - * - * All parts of the source except where indicated are distributed under - * the GNU public licence. See http://www.opensource.org for details. - */ - - -#ifndef _UTIL_ -#define _UTIL_ - -#include -#include -#include -#include - - -/* - * general definitions - */ - -#define TRUE 1 -#define FALSE 0 - -#ifdef PARALLEL - extern long int PP_randn; - extern long int PP_rand; -#endif - -/* - * type definitions - */ - -typedef unsigned long int uli; - -typedef double *dvector, **dmatrix, ***dcube; -typedef char *cvector, **cmatrix, ***ccube; -typedef int *ivector, **imatrix, ***icube; -typedef uli *ulivector, **ulimatrix, ***ulicube; - - -/* - * prototypes of functions defined in util.c - */ - -void maerror(char *message); - -dvector new_dvector(int n); -dmatrix new_dmatrix(int nrow, int ncol); -dcube new_dcube(int ntri, int nrow, int ncol); -void free_dvector(dvector v); -void free_dmatrix(dmatrix m); -void free_dcube(dcube c); - -cvector new_cvector(int n); -cmatrix new_cmatrix(int nrow, int ncol); -ccube new_ccube(int ntri, int nrow, int ncol); -void free_cvector(cvector v); -void free_cmatrix(cmatrix m); -void free_ccube(ccube c); - -ivector new_ivector(int n); -imatrix new_imatrix(int nrow, int ncol); -icube new_icube(int ntri, int nrow, int ncol); -void free_ivector(ivector v); -void free_imatrix(imatrix m); -void free_icube(icube c); - -ulivector new_ulivector(int n); -ulimatrix new_ulimatrix(int nrow, int ncol); -ulicube new_ulicube(int ntri, int nrow, int ncol); -void free_ulivector(ulivector v); -void free_ulimatrix(ulimatrix m); -void free_ulicube(ulicube c); - -double randomunitintervall(void); -int initrandom(int seed); -int randominteger(int n); -void chooser(int t, int s, ivector slist); -void *myrealloc(void *, size_t); -cvector mygets(void); - -#define MAXITS 10 /* maximum number of iterations in twoedimenmin */ -double onedimenmin(double, double, double, double (*f )(double ), double, double *, double *); -void twodimenmin(double, int, double, double *, double, double (*func1 )(double ), double *, int, double, double *, double, double (*func2 )(double ), double *); - - - -#endif -- 1.7.10.2