1 # Copyright (C) 2002-2003 Washington University School of Medicine
2 # and Howard Hughes Medical Institute
5 # Author: Christian M. Zmasek
6 # zmasek@genetics.wustl.edu
7 # http://www.genetics.wustl.edu/eddy/people/zmasek/
9 # Last modified 03/13/03
18 our @ISA = qw( Exporter );
20 our @EXPORT = qw( executeConsense
23 executePuzzleDQObootstrapped
25 startsWithSWISS_PROTname
28 containsPfamNamedSequence
33 setRateHeterogeneityOptionForPuzzle
34 setParameterEstimatesOptionForPuzzle
35 executePuzzleBootstrapped
38 addDistsToQueryToPWDfile
39 testForTextFilePresence
41 dieWithUnexpectedError
42 addSlashAtEndIfNotPresent
46 $SWISSPROT_ACDEOS_FILE
48 $SPECIES_TREE_FILE_DEFAULT
49 $MULTIPLE_TREES_FILE_SUFFIX
53 $ADDITION_FOR_RIO_ANNOT_TREE
83 $TRANSFERSBRANCHLENGHTS
95 $EXPASY_SPROT_SEARCH_DE
96 $EXPASY_SPROT_SEARCH_AC
102 # =============================================================================
103 # =============================================================================
105 # THESE VARIABLES ARE ENVIRONMENT DEPENDENT, AND NEED TO BE SET ACCORDINGLY
107 # -------------------------------------------------------------------------
114 our $PATH_TO_FORESTER = "/nfs/dm3/homedir1/czmasek/RIO1.24/";
117 # Java virtual machine:
118 # ---------------------
119 our $JAVA = "/usr/local/java/jdk/bin/java";
123 # Where all the temporary files can be created:
124 # ---------------------------------------------
125 our $TEMP_DIR_DEFAULT = "/tmp/";
131 our $PFAM_FULL_DIRECTORY = "/path/to/Pfam/Full/";
132 our $PFAM_SEED_DIRECTORY = "/path/to/Pfam/Seed/";
133 our $PFAM_HMM_DB = "/path/to/Pfam/Pfam_ls"; # Need to run "hmmindex" on this
134 # to produce .ssi file.
136 # "setenv HMMERDB /home/rio/pfam-6.6/"
139 $PATH_TO_FORESTER = &addSlashAtEndIfNotPresent( $PATH_TO_FORESTER );
142 # Description lines and species from SWISS-PROT and TrEMBL:
143 # ---------------------------------------------------------
144 our $TREMBL_ACDEOS_FILE = $PATH_TO_FORESTER."data/trembl22_ACDEOS_1-6";
146 our $SWISSPROT_ACDEOS_FILE = $PATH_TO_FORESTER."data/sp40_ACDEOS_1-6";
150 # Names of species which can be analyzed and analyzed
151 # against (must also be in tree $SPECIES_TREE_FILE_DEFAULT).
152 # By using a list with less species, RIO analyses become faster
153 # but lose phylogenetic resolution.
154 # For many purposes, list "tree_of_life_bin_1-6_species_list"
155 # in "data/species/" might be sufficient:
156 # --------------------------------------------------------------
157 our $SPECIES_NAMES_FILE = $PATH_TO_FORESTER."data/species/tree_of_life_bin_1-6_species_list";
161 # A default species tree in NHX format.
162 # For many purposes, tree "tree_of_life_bin_1-6.nhx"
163 # in "data/species/" might be fine:
164 # --------------------------------------------------
165 our $SPECIES_TREE_FILE_DEFAULT = $PATH_TO_FORESTER."data/species/tree_of_life_bin_1-6.nhx";
169 # Data for using precalculated distances:
170 # ---------------------------------------
171 our $MATRIX_FOR_PWD = 2; # The matrix which has been used for the pwd in $RIO_PWD_DIRECTORY.
172 # 0=JTT, 1=PAM, 2=BLOSUM 62, 3=mtREV24, 5=VT, 6=WAG.
174 our $RIO_PWD_DIRECTORY = $PATH_TO_FORESTER."example_data/"; # all must end with "/"
175 our $RIO_BSP_DIRECTORY = $PATH_TO_FORESTER."example_data/";
176 our $RIO_NBD_DIRECTORY = $PATH_TO_FORESTER."example_data/";
177 our $RIO_ALN_DIRECTORY = $PATH_TO_FORESTER."example_data/";
178 our $RIO_HMM_DIRECTORY = $PATH_TO_FORESTER."example_data/";
183 # End of variables which need to be set by the user.
185 # =============================================================================
186 # =============================================================================
192 $TEMP_DIR_DEFAULT = &addSlashAtEndIfNotPresent( $TEMP_DIR_DEFAULT );
193 $PFAM_FULL_DIRECTORY = &addSlashAtEndIfNotPresent( $PFAM_FULL_DIRECTORY );
194 $PFAM_SEED_DIRECTORY = &addSlashAtEndIfNotPresent( $PFAM_SEED_DIRECTORY );
198 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
199 # These variables should normally not be changed:
202 our $PRIOR_FILE_DIR = $PATH_TO_FORESTER."data/priors_for_hmmbuild/";
203 # Directory containing dirichlet prior
204 # files needed for certain aligments
205 # by hmmbuild (e.g. Collagen).
209 our $SEQBOOT = $PATH_TO_FORESTER."phylip_mod/exe/seqboot";
210 our $NEIGHBOR = $PATH_TO_FORESTER."phylip_mod/exe/neighbor";
211 our $PROTPARS = $PATH_TO_FORESTER."phylip_mod/exe/protpars";
212 our $CONSENSE = $PATH_TO_FORESTER."phylip_mod/exe/consense";
215 our $PUZZLE = $PATH_TO_FORESTER."puzzle_mod/src/puzzle";
216 our $PUZZLE_DQO = $PATH_TO_FORESTER."puzzle_dqo/src/puzzle";
219 our $HMMALIGN = $PATH_TO_FORESTER."hmmer/binaries/hmmalign";
220 our $HMMSEARCH = $PATH_TO_FORESTER."hmmer/binaries/hmmsearch";
221 our $HMMBUILD = $PATH_TO_FORESTER."hmmer/binaries/hmmbuild";
222 our $HMMFETCH = $PATH_TO_FORESTER."hmmer/binaries/hmmfetch";
223 our $SFE = $PATH_TO_FORESTER."hmmer/binaries/sfetch";
224 our $HMMCALIBRATE = $PATH_TO_FORESTER."hmmer/binaries/hmmcalibrate";
226 our $P7EXTRACT = $PATH_TO_FORESTER."perl/p7extract.pl";
227 our $MULTIFETCH = $PATH_TO_FORESTER."perl/multifetch.pl";
231 our $BOOTSTRAP_CZ = $PATH_TO_FORESTER."C/bootstrap_cz";
232 our $BOOTSTRAP_CZ_PL = $PATH_TO_FORESTER."perl/bootstrap_cz.pl";
233 our $TRANSFERSBRANCHLENGHTS = $JAVA." -cp $PATH_TO_FORESTER"."java forester.tools.transfersBranchLenghts";
234 our $MAKETREE = $PATH_TO_FORESTER."perl/makeTree.pl";
235 our $RIO_PL = $PATH_TO_FORESTER."perl/rio.pl";
236 our $DORIO = $JAVA." -cp $PATH_TO_FORESTER"."java forester.tools.DoRIO";
238 our $RIO_SLAVE_DRIVER = $PATH_TO_FORESTER."perl/rio_slave_driver.pl";
239 our $RIO_SLAVE = $PATH_TO_FORESTER."perl/rio_slave.pl";
240 our $NODE_LIST = $PATH_TO_FORESTER."data/node_list.dat";
243 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
246 our $BOOTSTRAPS = 100;
247 our $MIN_NUMBER_OF_AA = 20; # After removal of gaps, if less, gaps are not removed.
248 our $LENGTH_OF_NAME = 26;
253 our $MULTIPLE_TREES_FILE_SUFFIX = ".mlt";
254 our $LOG_FILE_SUFFIX = ".log";
255 our $ALIGN_FILE_SUFFIX = ".aln";
256 our $TREE_FILE_SUFFIX = ".nhx";
257 our $ADDITION_FOR_RIO_ANNOT_TREE = ".rio";
258 our $SUFFIX_PWD = ".pwd";
259 our $SUFFIX_BOOT_STRP_POS = ".bsp";
260 our $SUFFIX_PWD_NOT_BOOTS = ".nbd";
261 our $SUFFIX_HMM = ".hmm";
263 our $EXPASY_SPROT_SEARCH_DE = "http://www.expasy.org/cgi-bin/sprot-search-de?";
264 our $EXPASY_SPROT_SEARCH_AC = "http://www.expasy.org/cgi-bin/sprot-search-ac?";
268 # One argument: input multiple trees file
269 # Last modified: 07/05/01
270 sub executeConsense {
273 &testForTextFilePresence( "$in" );
275 system( "$CONSENSE >/dev/null 2>&1 << !
279 && &dieWithUnexpectedError( "Could not execute \"$CONSENSE \"" );
287 # 1. options ("-" is not necessary)
288 # 2. alignment or pwd file
291 # Last modified: 07/05/01
292 sub executeMakeTree {
299 &testForTextFilePresence( $B );
303 system( "$MAKETREE $opts $B $C $D" )
304 && &dieWithUnexpectedError( "Could not execute \"$MAKETREE $opts $B $C $D\"" );
312 # 1. Name of inputfile
313 # 2. matrix option: 0 = JTT; 2 = BLOSUM 62; 3 = mtREV24;
314 # 5 = VT; 6 = WAG; 7 = auto; PAM otherwise
315 sub executePuzzleDQO {
317 my $matrix_option = $_[ 1 ];
320 &testForTextFilePresence( $in );
322 $mat = setModelForPuzzle( $matrix_option );
324 system( "$PUZZLE_DQO $in >/dev/null 2>&1 << !$mat
327 && &dieWithUnexpectedError( "Could not execute \"$PUZZLE_DQO\"" );
331 } ## executePuzzleDQO
337 # 1. Name of inputfile
338 # 2. matrix option: 0 = JTT; 2 = BLOSUM 62; 3 = mtREV24;
339 # 5 = VT; 6 = WAG; 7 = auto; PAM otherwise
340 # Last modified: 01/28/02
341 sub executePuzzleDQObootstrapped {
343 my $matrix_option = $_[ 1 ];
353 &testForTextFilePresence( $in );
355 open( GRP, "<$in" ) || &dieWithUnexpectedError( "Cannot open file \"$in\"" );
357 if ( $_ =~ /^\s*\d+\s+\d+\s*$/ ) {
363 $l = `cat $in | wc -l`;
364 $slen = $l / $counter;
366 system( "split -$slen $in $in.splt." )
367 && &dieWithUnexpectedError( "Could not execute \"split -$slen $in $in.splt.\"" );
371 $mat = setModelForPuzzle( $matrix_option );
375 system( "$PUZZLE_DQO $a >/dev/null 2>&1 << !$mat
378 && &dieWithUnexpectedError( "Could not execute \"$PUZZLE_DQO $a\"" );
380 system( "cat $a.dist >> $in.dist" )
381 && &dieWithUnexpectedError( "Could not execute \"cat outdist >> $in.dist\"" );
383 unlink( $a, $a.".dist" );
388 } ## executePuzzleDQObootstrapped
392 # Transfers a Pfam (SELEX) alignment to a
393 # PHYLIP sequential style alignment.
394 # It only writes "match columns" as indicated by the
395 # "# RF" line ('x' means match).
400 # 3. 1 to NOT ensure that match states contain only 'A'-'Z' or '-'
402 # Returns the number of match states (=length of output alignment),
403 # the length of the input alignment,
404 # the number of seqs in the input alignment
406 # Last modified: 07/07/01
408 sub pfam2phylipMatchOnly {
410 my $infile = $_[ 0 ];
411 my $outfile = $_[ 1 ];
415 my $return_line = "";
423 my $number_colum = 0;
427 if ( $ne && $ne == 1 ) {
431 &testForTextFilePresence( $infile );
433 open( INPP, "$infile" ) || &dieWithUnexpectedError( "Cannot open file \"$infile\"" );
435 # This reads in the first block. It reads in the seq names.
437 if ( &isPfamSequenceLine( $return_line ) ) {
438 $return_line =~ /^(\S+)\s+(\S+)/;
439 $seq_name[ $y ] = substr( $1, 0, $LENGTH_OF_NAME );
441 for ( $x = 0; $x < length( $seq ); $x++ ) {
442 $seq_array[ $x ][ $y ] = substr( $seq, $x, 1 );
446 elsif ( &isRFline( $return_line ) ) {
448 $return_line =~ /\s+(\S+)\s*$/;
450 $x_offset = length( $seq );
452 for ( $x = 0; $x < $x_offset; $x++ ) {
453 $seq_array[ $x ][ $rf_y ] = substr( $seq, $x, 1 );
458 $return_line = <INPP>;
460 if ( !$return_line ) {
461 &dieWithUnexpectedError( "Alignment not in expected format (no RF line)" );
465 if ( $saw_rf_line != 1 ) {
466 &dieWithUnexpectedError( "Alignment not in expected format (no RF line)" );
472 # This reads all blocks after the 1st one.
473 while ( $return_line = <INPP> ) {
474 if ( &isPfamSequenceLine( $return_line ) ) {
475 $return_line =~ /^\S+\s+(\S+)/;
477 for ( $x = 0; $x < length( $seq ); $x++ ) {
478 $seq_array[ $x + $x_offset ][ $y % $rf_y ] = substr( $seq, $x, 1 );
482 elsif ( &isRFline( $return_line ) ) {
484 &dieWithUnexpectedError( "Alignment not in expected format" );
487 $return_line =~ /\s+(\S+)\s*$/;
489 $max_x = length( $seq );
491 for ( $x = 0; $x < length( $seq ); $x++ ) {
492 $seq_array[ $x + $x_offset ][ $rf_y ] = substr( $seq, $x, 1 );
496 $x_offset = $x_offset + $max_x;
503 # Counts the match states, and hence the number of aa in the alignment:
504 for ( $x = 0; $x < $x_offset; $x++ ) {
505 if ( !$seq_array[ $x ][ $rf_y ] ) {
506 &dieWithUnexpectedError( "Alignment not in expected format" );
508 if ( $seq_array[ $x ][ $rf_y ] eq 'x' ) {
515 open( OUTPP, ">$outfile" ) || &dieWithUnexpectedError( "Cannot create file \"$outfile\"" );
516 print OUTPP "$rf_y $number_colum\n";
517 for ( $y = 0; $y < $rf_y; $y++ ) {
518 print OUTPP "$seq_name[ $y ]";
519 for ( $i = 0; $i < ( $LENGTH_OF_NAME - length( $seq_name[ $y ] ) ); $i++ ) {
522 for ( $x = 0; $x < $x_offset; $x++ ) {
523 if ( $seq_array[ $x ][ $rf_y ] eq 'x' ) {
524 if ( !$seq_array[ $x ][ $y ] ) {
525 &dieWithUnexpectedError( "Alignment not in expected format" );
527 if ( $not_ensure != 1 && $seq_array[ $x ][ $y ] !~ /[A-Z]|-/ ) {
528 &dieWithUnexpectedError( "Alignment not in expected format (match states must only contain 'A'-'Z' or '-')" );
530 print OUTPP "$seq_array[ $x ][ $y ]";
537 return $number_colum, $x_offset, $rf_y;
539 } ## pfam2phylipMatchOnly
543 # Returns whether the argument (a String)
544 # starts with a SWISS-PROT name (SEQN_SPECI).
545 # Last modified: 06/21/01
546 sub startsWithSWISS_PROTname {
547 return ( $_[ 0 ] =~ /^[A-Z0-9]{1,4}_[A-Z0-9]{1,5}/ );
552 # Returns whether the argument starts with XXX.. XXXXX.. and the first
553 # character is not a "#".
554 # Last modified: 06/21/01
555 sub isPfamSequenceLine {
556 return( !&isPfamCommentLine( $_[ 0 ] )
557 && &containsPfamNamedSequence( $_[ 0 ] ) );
562 # Returns whether the argument does start with a "#".
563 # Last modified: 06/21/01
564 sub isPfamCommentLine {
565 return ( $_[ 0 ] =~ /^#/ );
570 # Returns whether the argument starts with XXX XXXXX.
571 # Last modified: 06/21/01
572 sub containsPfamNamedSequence {
573 return ( $_[ 0 ] =~ /^\S+\s+\S+/ );
577 # Returns whether the argument starts with XXX XXXXX.
578 # Last modified: 06/21/01
580 return ( $_[ 0 ] =~ /^#.*RF/ );
587 # 1. pairwise distance file
588 # 2. number of bootstraps
589 # 3. randomize_input_order: 0: do not randomize input order; >=1 jumble
590 # 4. seed for random number generator
591 # 5. lower-triangular data matrix? 1: yes; no, otherwise
592 # Last modified: 06/08/01
593 sub executeNeighbor {
604 &testForTextFilePresence( $inpwd );
624 system( "$NEIGHBOR >/dev/null 2>&1 << !
625 $inpwd$jumble$multi$lower
630 && &dieWithUnexpectedError( "Could not execute \"$NEIGHBOR $inpwd$jumble$multi$lower\"" );
631 # 3: Do NOT print out tree
641 # 1. name of alignment file (in correct format!)
642 # 2. number of bootstraps
643 # 3. jumbles: 0: do not jumble; >=1 number of jumbles
644 # 4. seed for random number generator
645 # Last modified: 03/13/04
646 sub executeProtpars {
655 &testForTextFilePresence( $alin );
657 if ( $bs >= 2 && $rand < 1 ) {
677 system( "$PROTPARS 2>&1 << !
683 && &dieWithUnexpectedError( "Could not execute \"$PROTPARS $alin$jumble$multi\"" );
684 # 3: Do NOT print out tree
693 # "Model of substitution" order for DQO TREE-PUZZLE 5.0:
695 # m -> Dayhoff (Dayhoff et al. 1978)
696 # m -> JTT (Jones et al. 1992)
697 # m -> mtREV24 (Adachi-Hasegawa 1996)
698 # m -> BLOSUM62 (Henikoff-Henikoff 92)
699 # m -> VT (Mueller-Vingron 2000)
700 # m -> WAG (Whelan-Goldman 2000)
703 # matrix option: 0 = JTT; 2 = BLOSUM 62; 3 = mtREV24;
704 # 5 = VT; 6 = WAG; 7 = auto; PAM otherwise
705 # Last modified: 07/07/01
706 sub setModelForPuzzle {
707 my $matrix_option = $_[ 0 ];
710 if ( $matrix_option == 0 ) { # JTT
715 elsif ( $matrix_option == 2 ) { # BLOSUM 62
722 elsif ( $matrix_option == 3 ) { # mtREV24
728 elsif ( $matrix_option == 5 ) { # VT
736 elsif ( $matrix_option == 6 ) { # WAG
745 elsif ( $matrix_option == 7 ) { # auto
755 } ## setModelForPuzzle
758 # Model of rate heterogeneity:
759 # 1 for "8 Gamma distributed rates"
760 # 2 for "Two rates (1 invariable + 1 variable)"
761 # 3 for "Mixed (1 invariable + 8 Gamma rates)"
762 # otherwise: Uniform rate
763 # Last modified: 09/08/03
764 sub setRateHeterogeneityOptionForPuzzle {
765 my $rate_heterogeneity_option = $_[ 0 ];
768 if ( $rate_heterogeneity_option == 1 ) {
772 elsif ( $rate_heterogeneity_option == 2 ) {
777 elsif ( $rate_heterogeneity_option == 3 ) {
788 } ## setRateHeterogeneityOptionForPuzzle
792 # Parameter estimates: 1 for "Exact (slow)"; "Approximate (faster)" otherwise
793 # Last modified: 09/08/03
794 sub setParameterEstimatesOptionForPuzzle {
795 my $parameter_estimates_option = $_[ 0 ];
798 if ( $parameter_estimates_option == 1 ) {
807 } ## setParameterEstimatesOptionForPuzzle
811 # Two/three/four arguments:
812 # 1. Name of inputfile
813 # 2. matrix option: 0 = JTT; 2 = BLOSUM 62; 3 = mtREV24;
814 # 5 = VT; 6 = WAG; 7 = auto; PAM otherwise
815 # 3. Parameter estimates: 1 for "Exact (slow)"; "Approximate (faster)" otherwise
816 # 4. Model of rate heterogeneity:
817 # 1 for "8 Gamma distributed rates"
818 # 2 for "Two rates (1 invariable + 1 variable)"
819 # 3 for "Mixed (1 invariable + 8 Gamma rates)"
820 # otherwise: Uniform rate
821 # Last modified: 09/08/03 (added 3rd and 4th parameter)
822 sub executePuzzleBootstrapped {
824 my $matrix_option = $_[ 1 ];
825 my $parameter_estimates_option = $_[ 2 ];
826 my $rate_heterogeneity_option = $_[ 3 ];
837 &testForTextFilePresence( $in );
839 open( GRP, "<$in" ) || die "\n\n$0: Unexpected error: Cannot open file <<$in>>: $!";
841 if ( $_ =~ /^\s*\d+\s+\d+\s*$/ ) {
847 $l = `cat $in | wc -l`;
848 $slen = $l / $counter;
850 system( "split -$slen $in $in.splt." )
851 && die "\n\n$0: executePuzzleDQObootstrapped: Could not execute \"split -$slen $in $in.splt.\": $!";
855 $mat = setModelForPuzzle( $matrix_option );
856 if ( $parameter_estimates_option ) {
857 $est = &setParameterEstimatesOptionForPuzzle( $parameter_estimates_option );
859 if ( $rate_heterogeneity_option ) {
860 $rate = &setRateHeterogeneityOptionForPuzzle( $rate_heterogeneity_option );
865 system( "$PUZZLE $a << !
870 && die "$0: Could not execute \"$PUZZLE $a\"";
872 system( "cat $a.dist >> $in.dist" )
873 && die "$0: Could not execute \"cat outdist >> $in.dist\"";
875 unlink( $a, $a.".dist", $a.".tree" );
880 } ## executePuzzleBootstrapped
886 # Two/three/four arguments:
887 # 1. Name of inputfile
888 # 2. Matrix option: 0 = JTT; 2 = BLOSUM 62; 3 = mtREV24;
889 # 5 = VT; 6 = WAG; 7 = auto; PAM otherwise
890 # 3. Parameter estimates: 1 for "Exact (slow)"; "Approximate (faster)" otherwise
891 # 4. Model of rate heterogeneity:
892 # 1 for "8 Gamma distributed rates"
893 # 2 for "Two rates (1 invariable + 1 variable)"
894 # 3 for "Mixed (1 invariable + 8 Gamma rates)"
895 # otherwise: Uniform rate
896 # Last modified: 09/08/03 (added 3rd and 4th parameter)
899 my $matrix_option = $_[ 1 ];
900 my $parameter_estimates_option = $_[ 2 ];
901 my $rate_heterogeneity_option = $_[ 3 ];
906 &testForTextFilePresence( $in );
908 $mat = &setModelForPuzzle( $matrix_option );
909 if ( $parameter_estimates_option ) {
910 $est = &setParameterEstimatesOptionForPuzzle( $parameter_estimates_option );
912 if ( $rate_heterogeneity_option ) {
913 $rate = &setRateHeterogeneityOptionForPuzzle( $rate_heterogeneity_option );
917 system( "$PUZZLE $in << !
922 && die "$0: Could not execute \"$PUZZLE\"";
931 # Preparation of the pwd file
932 sub addDistsToQueryToPWDfile {
933 my $pwd_file = $_[ 0 ];
934 my $disttoquery_file = $_[ 1 ];
935 my $outfile = $_[ 2 ];
936 my $name_of_query = $_[ 3 ];
937 my $name_of_query_ = "";
938 my $return_line_pwd = "";
939 my $return_line_dq = "";
942 my $name_from_pwd = "X";
943 my $name_from_dq = "Y";
944 my @dists_to_query = ();
947 &testForTextFilePresence( $pwd_file );
948 &testForTextFilePresence( $disttoquery_file );
950 $name_of_query_ = $name_of_query;
951 for ( my $j = 0; $j <= ( $LENGTH_OF_NAME - length( $name_of_query ) - 1 ); ++$j ) {
952 $name_of_query_ .= " ";
955 open( OUT_AD, ">$outfile" ) || &dieWithUnexpectedError( "Cannot create file \"$outfile\"" );
956 open( IN_PWD, "$pwd_file" ) || &dieWithUnexpectedError( "Cannot open file \"$pwd_file\"" );
957 open( IN_DQ, "$disttoquery_file" ) || &dieWithUnexpectedError( "Cannot open file \"$disttoquery_file\"" );
959 W: while ( $return_line_pwd = <IN_PWD> ) {
962 if ( $return_line_pwd =~ /^\s*(\d+)\s*$/ ) {
966 print OUT_AD "$name_of_query_ ";
967 for ( my $j = 0; $j < $i; ++$j ) {
968 print OUT_AD "$dists_to_query[ $j ] ";
970 print OUT_AD "0.0\n";
972 print OUT_AD " $num_of_sqs\n";
974 @dists_to_query = ();
979 && $return_line_pwd =~ /^\s*(\S+)\s+\S+/ ) {
982 if ( !defined( $return_line_dq = <IN_DQ> ) ) {
983 &dieWithUnexpectedError( "\"$disttoquery_file\" seems too short" );
986 if ( $return_line_dq !~ /\S/ ) {
987 if ( !defined( $return_line_dq = <IN_DQ> ) ) {
988 &dieWithUnexpectedError( "\"$disttoquery_file\" seems too short" );
991 $return_line_dq =~ /^\s*(\S+)\s+(\S+)/;
993 $dists_to_query[ $i++ ] = $2;
996 if ( $name_from_pwd ne $name_from_dq ) {
997 &dieWithUnexpectedError( "Order of sequence names in \"$pwd_file\" and \"$disttoquery_file\" is not the same" );
999 print OUT_AD $return_line_pwd;
1003 && $return_line_pwd =~ /^\s*(\S+)\s+\S+/ ) {
1004 $name_from_pwd = $1;
1005 if ( !defined( $return_line_dq = <IN_DQ> ) ) {
1006 &dieWithUnexpectedError( "\"$disttoquery_file\" seems too short" );
1008 if ( $return_line_dq !~ /\S/ ) {
1009 if ( !defined( $return_line_dq = <IN_DQ>) ) {
1010 &dieWithUnexpectedError( "\"$disttoquery_file\" seems too short" );
1013 $return_line_dq =~ /^\s*\S+\s+(\S+)/;
1014 $dists_to_query[ $i++ ] = $1;
1015 print OUT_AD $return_line_pwd;
1018 print OUT_AD "$name_of_query_ ";
1019 for ( my $j = 0; $j < $i; ++$j ) {
1020 print OUT_AD "$dists_to_query[ $j ] ";
1022 print OUT_AD "0.0\n";
1029 } ## addDistsToQueryToPWDfile
1037 # 3. outputfile name
1038 # Last modified: 02/27/01
1039 sub executeHmmfetch {
1043 my $outfile = $_[ 2 ];
1045 system( "$HMMFETCH $db $name > $outfile" )
1046 && &dieWithUnexpectedError( "Could not execute \"$HMMFETCH $db $name > $outfile\"" );
1049 } ## executeHmmfetch
1053 # Checks wether a file is present, not empty and a plain textfile.
1054 # One argument: name of file.
1055 # Last modified: 07/07/01
1056 sub testForTextFilePresence {
1058 unless ( ( -s $file ) && ( -f $file ) && ( -T $file ) ) {
1059 dieWithUnexpectedError( "File \"$file\" does not exist, is empty, or is not a plain textfile" );
1061 } ## testForTextFilePresence
1064 # Last modified: 02/21/03
1065 sub addSlashAtEndIfNotPresent {
1066 my $filename = $_[ 0 ];
1067 $filename =~ s/\s+//g;
1068 unless ( $filename =~ /\/$/ ) {
1069 $filename = $filename."/";
1072 } ## addSlashAtEndIfNotPresent
1076 # Last modified: 02/15/02
1077 sub exitWithWarning {
1080 if ( defined( $_[ 1 ] ) && $_[ 1 ] == 1 ) {
1081 print( "<H4 class=\"error\">user error</H4>\n" );
1083 print( "<B>$text</B>\n" );
1085 print( "<P>  </P>\n" );
1088 print( "\n\n$text\n\n" );
1093 } ## exit_with_warning
1097 # Last modified: 02/15/02
1098 sub dieWithUnexpectedError {
1102 die( "\n\n$0:\nUnexpected error (should not have happened):\n$text\n$!\n\n" );
1104 } ## dieWithUnexpectedError