#! /usr/bin/perl -W # nph-riowebserver.pl # ------------------- # # Copyright (C) 2002 Washington University School of Medicine # and Howard Hughes Medical Institute # All rights reserved # # Created: 02/18/02 # Author: Christian M. Zmasek # zmasek@genetics.wustl.edu # http://www.genetics.wustl.edu/eddy/people/zmasek/ # # Last modified: 02/20/02 use strict; use CGI; use queue; my $RIOPL = "/home/rio/forester/perl/rio4P.pl"; my $JAVA = "/home/rio/j2sdk1.4.0/bin/java"; my $TEST_NHX = $JAVA." -cp /home/rio/forester/java forester.tools.testNHX"; my $TEMPDIR = "/home/rio/rws_temp"; my $SPECIESTREE = "/home/rio/forester/data/species/tree_of_life_bin_1-4.nhx"; my $SPECIESLIST = "/home/rio/forester/data/species/tree_of_life_bin_1-4_species_list"; my $hmm_search_url_A = "http://pfam.wustl.edu/cgi-bin/nph-hmmsearch?protseq="; my $hmm_search_url_B = "&search_mode=merge&cutoff_strategy=ga"; my $RIO_ALN_DIRECTORY = "/data/rio/ALNs/"; my $RIO_NBD_DIRECTORY = "/data/rio/NBDs/"; my $ALIGN_FILE_SUFFIX = ".aln"; my $ALIGN_NBD_FILE = ".nbd"; my $DIR_FOR_TREES = "/var/www/html/trees/"; # Directory for NHX files to be read by ATV applet my $URL_FOR_TREES = "http://forester.wustl.edu/trees/"; # URL base for NHX files to be read by ATV applet my $CODE_BASE_FOR_ATV_APPLET = "http://forester.wustl.edu/applets/"; # URL for ATV applet (jar file) my $TARGET_FILES_IN_DIR_FOR_TREES = 100; # If the number of nhx files in $DIR_FOR_TREES is lager then $MAX_FILES_IN_DIR_FOR_TREES my $MAX_FILES_IN_DIR_FOR_TREES = 120; # the oldest files will be deleted until the number is down to $TARGET_FILES_IN_DIR_FOR_TREES. my $O_THRESHOLD_DEFAULT = 0; my $SN_THRESHOLD_DEFAULT = 0; my $U_THRESHOLD_DEFAULT = 50; my $SEED_FOR_RANDOM_DEFAULT = 41; my $SORT_DEFAULT = 12; my $MIN_SIZE = 5; # Minimal size (in chars) for input files my $MAX_SIZE = 10000; # Maximal size (in chars) for input files my $MAX_LINES = 1000; # Maximal lines for input files my $RIO_OPTIONS = "U=60 Y=2 X=2 Z=2 I C E x +"; my $CONTACT = "zmasek\@genetics.wustl.edu"; my $VERSION = "0.3"; my $o_threshold = 0; my $sn_threshold = 0; my $u_threshold = 0; my $seed_for_random = 0; my $sort = 0; my $size_d = 0; my $size_c = 0; my $entry_time = 0; my $njobs = 0; my $njobs_thisuser = 0; my $user_defined_tree = 0; my $query = ""; my $query_seq = ""; my $query_seq_file = ""; my $tree_file = ""; my $pfam_domain = ""; my $species = ""; my $output_tree = ""; my $output_up = ""; my $remote_addr = ""; my $oneline = ""; my $aln = ""; my $speciestree = ""; my $output = ""; my $query_sequence = ""; # To be submitted to hmmsearch website, if necessary. my $link_to_hmmsearch = ""; my @lines = (); my %Species_names_hash = (); $| = 1; $query = new CGI; $query_seq = $query->param( 'query_seq' ); $query_seq_file = $query->upload( 'query_seq_file' ); $pfam_domain = $query->param( 'pfam_domain' ); $species = $query->param( 'species' ); $o_threshold = $query->param( 'o_threshold' ); $sn_threshold = $query->param( 'sn_threshold' ); $u_threshold = $query->param( 'u_threshold' ); $seed_for_random = $query->param( 'seed_for_random' ); $output_up = $query->param( 'output_up' ); $sort = $query->param( 'sort_priority' ); $tree_file = $query->upload( 'tree_file' ); $remote_addr = $ENV{ REMOTE_ADDR }; # NPH header # ---------- print $query->header( -status=>"200 OK", -server=>"$ENV{ SERVER_SOFTWARE }", -nph=>1 ); # Prints the first HTML # --------------------- print "\n"; print "\n"; print "
\n"; print " 
\n"; # Leaves the queue in an orderly fashion. &queue::RemoveFromQueue( "rioqueue", $remote_addr, $TEMPDIR, $$ ); print( $output ); &print_footer(); &removeFiles( $DIR_FOR_TREES, $TARGET_FILES_IN_DIR_FOR_TREES, $MAX_FILES_IN_DIR_FOR_TREES ); exit( 0 ); # Methods # ------- # Last modified: 02/19/02 sub run_rio { my $pfam_name = $_[ 0 ]; my $query_file = $_[ 1 ]; my $output_file = $_[ 2 ]; my $name_for_query = $_[ 3 ]; my $species_tree_file = $_[ 4 ]; my $more_options = $_[ 5 ]; my $tmp_file_rio = $_[ 6 ]; my $t_o = $_[ 7 ]; my $t_sn = $_[ 8 ]; my $t_u = $_[ 9 ]; my $seed = $_[ 10 ]; my $sort = $_[ 11 ]; my $start_time = time; my $options_for_rio = ""; $options_for_rio .= ( " A=".$pfam_name ); $options_for_rio .= ( " Q=".$query_file ); $options_for_rio .= ( " O=".$output_file ); $options_for_rio .= ( " N=".$name_for_query ); $options_for_rio .= ( " S=".$species_tree_file ); $options_for_rio .= ( " j=".$tmp_file_rio ); $options_for_rio .= ( " L=".$t_o ); $options_for_rio .= ( " B=".$t_sn ); if ( $t_u != -1 ) { $options_for_rio .= ( " v=".$t_u ); } $options_for_rio .= ( " y=".$seed ); $options_for_rio .= ( " P=".$sort ); $options_for_rio .= ( " ".$more_options ); $output = `$RIOPL 1 $options_for_rio`; if ( $? != 0 ) { nph_rio_error(); } my $finish_time = time; my $wait_time = $finish_time - $entry_time; my $cpu_time = $finish_time - $start_time; # Logs the results. my $date = `date`; chop( $date ); open ( LOGFILE, ">>$TEMPDIR/log") || &nph_fatal_error( "could not open log file" ); flock( LOGFILE, 2 ); print LOGFILE "$date queue: $njobs wait: $wait_time true_cpu: $cpu_time pid: $$ addr: $ENV{'REMOTE_ADDR'} host: $ENV{'REMOTE_HOST'} pfam: $pfam_name\n"; flock( LOGFILE, 8 ); close ( LOGFILE ); return; } ## run_rio # Reads a file into "@lines" # Last modified: 02/19/02 sub readInFile { my $file = $_[ 0 ]; my $l = 0; my $s = 0; @lines = (); $file =~ s/;\|,<>&\s//g; while( <$file> ) { $s += length( $_ ); if ( $s > $MAX_SIZE ) { &nph_user_error( "query sequence is too long (>$MAX_SIZE)" ); } $l++; if ( $l > $MAX_LINES ) { &nph_user_error( "file has too many lines (>$MAX_LINES)" ); } push( @lines, $_ ); } } ## readInFile # Reads in (SWISS-PROT) species names from a file. # Names must be separated by newlines. # Lines beginning with "#" are ignored. # A possible "=" and everything after is ignored. # One argument: species-names-file name # Last modified: 02/19/02 sub readSpeciesNamesFile { my $infile = $_[ 0 ]; my $return_line = ""; my $species = ""; open( IN_RSNF, "$infile" ) || &nph_fatal_error( "could not open species list" ); while ( $return_line =\n" ); print( "RIO $VERSION \n" ); print( "phylogenomic analysis of a protein sequence | " ); print( "help | " ); print( "forester/rio home | " ); print( "pfam\n" ); print( "
\n" ); print( "comments, questions, flames? email $CONTACT
\n" ); return; } ## print_contact # Last modified: 02/19/02 sub showATVlinks { my $domain_no = 0; if ( -s "$TEMPDIR/$$.outfile.rio.nhx" ) { $domain_no = 1; system( "mv", "$TEMPDIR/$$.outfile.rio.nhx", $DIR_FOR_TREES ) && &nph_fatal_error( "could not mv $TEMPDIR/$$.outfile.rio.nhx" ); } elsif ( -s "$TEMPDIR/$$.outfile.rio-1.nhx" ) { $domain_no = 1; while ( -s "$TEMPDIR/$$.outfile.rio-$domain_no.nhx" ) { system( "mv", "$TEMPDIR/$$.outfile.rio-$domain_no.nhx", $DIR_FOR_TREES ) && &nph_fatal_error( "could not mv $TEMPDIR/$$.outfile.rio-$domain_no.nhx.nhx" ); $domain_no++; } } if ( $domain_no == 1 ) { $output .= " 
\n"; $output .= "\n"; $output .= "download NHX file describing this tree | \n"; $output .= "
 
\n"; $output .= "\n"; $output .= "download NHX file for domain #$x | \n"; } $output .= "
[the RIO analysis appearently died]
\n" ); print( "the most likely source of this error is an invalid user defined species tree
\n" ); } else { print( "[the RIO analysis appearently died for unknown reasons]
\n" ); print( "This type of error should not happen
\n" ); print( "\n" ); print( "We may have logged it automatically, but we would appreciate it if you would also notify us at\n" ); print( "$CONTACT\n" ); print( "
\n" ); } print( " 
\n" ); &print_footer(); system( "rm -r $TEMPDIR/$$"."0" ); die; } ## nph_fatal_error # Last modified: 02/19/02 sub nph_fatal_error { my $mesg = $_[ 0 ]; &queue::RemoveFromQueue( "rioqueue", $remote_addr, $TEMPDIR, $$ ); unlink( "$TEMPDIR/$$.query", "$TEMPDIR/$$.tree" ); print( "[$mesg : $!]
\n" ); print( "This type of error should not happen
\n" ); print( "\n" ); print( "We may have logged it automatically, but we would appreciate it if you would also notify us at\n" ); print( "$CONTACT\n" ); print( "
\n" ); print( " 
\n" ); &print_footer(); die; } ## nph_fatal_error # Last modified: 02/19/02 sub nph_user_error { my $mesg = $_[ 0 ]; &queue::RemoveFromQueue( "rioqueue", $remote_addr, $TEMPDIR, $$ ); unlink( "$TEMPDIR/$$.query", "$TEMPDIR/$$.tree" ); print( "\n" ); print( "$mesg\n" ); print( "
\n" ); print( " 
\n" ); &print_footer(); die "nph-riowebserver handled: $mesg"; } ## nph_user_error