From 81aff14b756ba3fd6341875c4e4af92f0415450a Mon Sep 17 00:00:00 2001 From: "cmzmasek@gmail.com" Date: Tue, 12 Nov 2013 00:00:05 +0000 Subject: [PATCH] inprogress --- forester/ruby/evoruby/exe/run_phylo_pipeline_x.rb | 35 +++++++++-- .../ruby/evoruby/lib/evo/tool/phylogeny_factory.rb | 62 ++++++++++++++------ 2 files changed, 74 insertions(+), 23 deletions(-) diff --git a/forester/ruby/evoruby/exe/run_phylo_pipeline_x.rb b/forester/ruby/evoruby/exe/run_phylo_pipeline_x.rb index 50de838..8d0a27d 100644 --- a/forester/ruby/evoruby/exe/run_phylo_pipeline_x.rb +++ b/forester/ruby/evoruby/exe/run_phylo_pipeline_x.rb @@ -15,12 +15,17 @@ module Evoruby class RunPhyloPipeline - PFAM = "/home/czmasek/DATA/PFAM/PFAM270X/" - HMMSCAN = "/home/czmasek/SOFTWARE/HMMER/hmmer-3.0/src/hmmscan" - HSP = "/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/ruby/evoruby/exe/hsp.rb" - D2F = "/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/ruby/evoruby/exe/d2f.rb" - DSX = "/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/ruby/evoruby/exe/dsx.rb" - TAP = "/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/ruby/evoruby/exe/tap.rb" + LAUNCH_ANALYSIS = true + HOME = "/home/czmasek/" + FORESTER_RUBY = "#{HOME}SOFTWARE/FORESTER/DEV/forester/forester/ruby/evoruby/exe/" + PFAM = "#{HOME}DATA/PFAM/PFAM270X/" + HMMSCAN = "#{HOME}SOFTWARE/HMMER/hmmer-3.0/src/hmmscan" + HSP = "#{FORESTER_RUBY}hsp.rb" + D2F = "#{FORESTER_RUBY}d2f.rb" + DSX = "#{FORESTER_RUBY}dsx.rb" + TAP = "#{FORESTER_RUBY}tap.rb" + PF = "#{FORESTER_RUBY}phylogeny_factory.rb" + TEMPLATE_FILE = '00_phylogeny_factory.template' def run unless ARGV.length >= 2 && ARGV.length <= 4 @@ -114,8 +119,26 @@ module Evoruby end Dir.mkdir( hmm_name + "/msa" ) + Dir.mkdir( hmm_name + "/msa100" ) FileUtils.cp "#{hmm_name}/#{hmm_name}__#{hmm_name}__ee#{e_value_exp.to_s}_#{length}.fasta", "#{hmm_name}/msa/#{hmm_name}__#{hmm_name}__ee#{e_value_exp.to_s}_#{length}" + FileUtils.cp "#{hmm_name}/#{hmm_name}__#{hmm_name}__ee#{e_value_exp.to_s}_#{length}.fasta", "#{hmm_name}/msa100/#{hmm_name}__#{hmm_name}__ee#{e_value_exp.to_s}_#{length}" + + if File.exists?( TEMPLATE_FILE ) + FileUtils.cp TEMPLATE_FILE, "#{hmm_name}/msa/" + FileUtils.cp TEMPLATE_FILE, "#{hmm_name}/msa100/" + + if LAUNCH_ANALYSIS + puts "f. analysis:" + Dir.chdir "#{hmm_name}/msa/" + run_command "#{PF} -s" + Dir.chdir "../.." + Dir.chdir "#{hmm_name}/msa100/" + run_command "#{PF} -s" + Dir.chdir "../.." + puts + end + end end diff --git a/forester/ruby/evoruby/lib/evo/tool/phylogeny_factory.rb b/forester/ruby/evoruby/lib/evo/tool/phylogeny_factory.rb index e453c8c..93eac07 100644 --- a/forester/ruby/evoruby/lib/evo/tool/phylogeny_factory.rb +++ b/forester/ruby/evoruby/lib/evo/tool/phylogeny_factory.rb @@ -18,18 +18,19 @@ module Evoruby class PhylogenyFactory PRG_NAME = "phylogeny_factory" - PRG_DATE = "130402" + PRG_DATE = "1301111" PRG_DESC = "automated phylogeny reconstruction using queing system" - PRG_VERSION = "1.002" + PRG_VERSION = "1.100" COPYRIGHT = "2013 Christian M Zmasek" CONTACT = "phylosoft@gmail.com" WWW = "www.phylosoft.org" USE_JOB_SUBMISSION_SYSTEM_OPTION = 's' + BS_OPTION = 'b' LOG_FILE = '00_phylogeny_factory.log' TEMPLATE_FILE = '00_phylogeny_factory.template' PBS_O_WORKDIR = '$PBS_O_WORKDIR/' - MIN_LENGTH_DEFAULT = 50 + MIN_LENGTH_DEFAULT = 40 PFAM_HHMS = "/home/czmasek/DATA/PFAM/PFAM270X/PFAM_A_HMMs/" WALLTIME = '100:00:00' QUEUE = 'default' @@ -38,6 +39,7 @@ module Evoruby RSL = 'RSL' HMM = 'HMM' + PHYLO_PL = 'PHYLO_PL' OPTION_OPEN = '%[' OPTION_CLOSE = ']%' @@ -65,6 +67,7 @@ module Evoruby allowed_opts = Array.new allowed_opts.push( USE_JOB_SUBMISSION_SYSTEM_OPTION ) + allowed_opts.push( BS_OPTION ) disallowed = cla.validate_allowed_options_as_str( allowed_opts ) if ( disallowed.length > 0 ) @@ -88,12 +91,25 @@ module Evoruby use_job_submission_system = true end + bootstraps = 1 + if cla.is_option_set?( BS_OPTION ) + bootstraps = cla.get_option_value_as_int( BS_OPTION ) + end + if bootstraps < 0 + puts( '[' + PRG_NAME + '] > negative bootstrap value' ) + exit( -1 ) + end + if bootstraps == 0 + bootstraps = 1 + end + log = String.new now = DateTime.now log << "Program : " + PRG_NAME + NL log << "Version : " + PRG_VERSION + NL log << "Program date: " + PRG_DATE + NL + NL + log << "Bootstraps : " + bootstraps.to_s + NL log << "Date/time : " + now.to_s + NL log << "Directory : " + Dir.getwd + NL + NL @@ -102,7 +118,6 @@ module Evoruby paths = Hash.new # path placeholder -> full path min_lengths = Hash.new # alignment id -> minimal length options = Hash.new # option placeholder -> option - # ids = Set.new commands = Array.new @@ -123,8 +138,13 @@ module Evoruby puts( '[' + PRG_NAME + '] > min lengths: ' + $1 + ' => ' + $2 ) elsif ( line =~ /^%\s*(\S+)\s*=\s*(\S+)/ ) - options[ $1 ] = $2 - puts( '[' + PRG_NAME + '] > options : ' + $1 + ' => ' + $2 ) + key = $1 + value = $2 + if key == PHYLO_PL + value = update_phylo_pl_options( value, bootstraps ) + end + options[ key ] = value + puts( '[' + PRG_NAME + '] > options : ' + key + ' => ' + value ) elsif ( line =~ /^>\s*(.+)/ ) command = command + $1 + ";#{NL}" @@ -243,6 +263,14 @@ module Evoruby command end + def update_phylo_pl_options( phylo_pl_options, bootstraps ) + unless phylo_pl_options =~ /B\d/ + phylo_pl_options = 'B' + bootstraps.to_s + phylo_pl_options + end + phylo_pl_options = '-' + phylo_pl_options + phylo_pl_options + end + def subst_min_length( command, id, min_lengths ) min_length = nil if id != nil && id.length > 0 @@ -274,10 +302,10 @@ end # module Evoruby # are to be used: # the substring between the first two double underscores is a # unique identifier and needs to match the identifiers -# in '% =' statements +# in '% =' statements # Example: # alignment name : 'x__bcl2__e1' -# parameter statments: '% RSL bcl2=60' +# parameter statments: '% RSL bcl2=60' $ PROBCONS=/home/czmasek/SOFTWARE/PROBCONS/probcons_v1_12/probcons $ DIALIGN_TX=/home/czmasek/SOFTWARE/DIALIGNTX/DIALIGN-TX_1.0.2/source/dialign-tx $ DIALIGN_CONF=/home/czmasek/SOFTWARE/DIALIGNTX/DIALIGN-TX_1.0.2/conf @@ -291,13 +319,13 @@ $ PHYLO_PL=/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/archive/perl/ph % RSL Hormone_recep=60 -% +% % RSL Y_phosphatase=100 % RSL Y_phosphatase2=75 % RSL Y_phosphatase3=50 % RSL Y_phosphatase3C=40 -% PHYLO_OPT=-B100q@1r4j2IGS21X +% PHYLO_OPT=B100q@1r4j2IGS21X % TMP_DIR = /home/czmasek/tmp/ @@ -305,23 +333,23 @@ $ PHYLO_PL=/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/archive/perl/ph > KALIGN $ > $_kalign > MSA_PRO -o=p -n=10 -d -rr=0.5 -c -rsl=%[RSL]% $_kalign $_kalign_05_%[RSL]%.aln > PHYLO_PL %[PHYLO_OPT]% $_kalign_05_%[RSL]%.aln $_kalign_05_%[RSL]% %[TMP_DIR]% -- +- > KALIGN $ > $_kalign_ > MSA_PRO -o=p -n=10 -d -rr=0.9 -c -rsl=%[RSL]% $_kalign_ $_kalign_09_%[RSL]%.aln > PHYLO_PL %[PHYLO_OPT]% $_kalign_09_%[RSL]%.aln $_kalign_09_%[RSL]% %[TMP_DIR]% -- +- > HMMALIGN --amino --trim --outformat Pfam -o $_hmmalign %[HMM]% $ > /dev/null > MSA_PRO -o=p -n=10 -d -rr=0.5 -c -rsl=%[RSL]% $_hmmalign $_hmmalign_05_%[RSL]%.aln > PHYLO_PL %[PHYLO_OPT]% $_hmmalign_05_%[RSL]%.aln $_hmmalign_05_%[RSL]% %[TMP_DIR]% -- +- > HMMALIGN --amino --trim --outformat Pfam -o $_hmmalign_ %[HMM]% $ > /dev/null > MSA_PRO -o=p -n=10 -d -rr=0.9 -c -rsl=%[RSL]% $_hmmalign_ $_hmmalign_09_%[RSL]%.aln > PHYLO_PL %[PHYLO_OPT]% $_hmmalign_09_%[RSL]%.aln $_hmmalign_09_%[RSL]% %[TMP_DIR]% -- +- > MAFFT --maxiterate 1000 --localpair $ > $_mafft @@ -334,7 +362,7 @@ $ PHYLO_PL=/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/archive/perl/ph > PHYLO_PL %[PHYLO_OPT]% $_mafft_09_%[RSL]%.aln $_mafft_09_%[RSL]% %[TMP_DIR]% - - + > MUSCLE -maxiters 1000 -maxtrees 100 -in $ -out $_muscle > MSA_PRO -o=p -n=10 -d -rr=0.5 -c -rsl=%[RSL]% $_muscle $_muscle_05_%[RSL]%.aln > PHYLO_PL %[PHYLO_OPT]% $_muscle_05_%[RSL]%.aln $_muscle_05_%[RSL]% %[TMP_DIR]% @@ -360,12 +388,12 @@ $ PHYLO_PL=/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/archive/perl/ph > PROBCONS $ > $_probcons > MSA_PRO -o=p -n=10 -d -rem_red -rr=0.5 -c -rsl=%[RSL]% $_probcons $_probcons_05_%[RSL]%.aln > PHYLO_PL %[PHYLO_OPT]% $_probcons_05_%[RSL]%.aln $_probcons_05_%[RSL]% %[TMP_DIR]% -- +- > PROBCONS $ > $_probcons_ > MSA_PRO -o=p -n=10 -d -rem_red -rr=0.9 -c -rsl=%[RSL]% $_probcons_ $_probcons_09_%[RSL]%.aln > PHYLO_PL %[PHYLO_OPT]% $_probcons_09_%[RSL]%.aln $_probcons_09_%[RSL]% %[TMP_DIR]% -- +- > DIALIGN_TX DIALIGN_CONF $ $_dialigntx -- 1.7.10.2