inprogress
authorcmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Tue, 12 Nov 2013 00:00:05 +0000 (00:00 +0000)
committercmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Tue, 12 Nov 2013 00:00:05 +0000 (00:00 +0000)
forester/ruby/evoruby/exe/run_phylo_pipeline_x.rb
forester/ruby/evoruby/lib/evo/tool/phylogeny_factory.rb

index 50de838..8d0a27d 100644 (file)
@@ -15,12 +15,17 @@ module Evoruby
 
   class RunPhyloPipeline
 
-    PFAM      = "/home/czmasek/DATA/PFAM/PFAM270X/"
-    HMMSCAN  = "/home/czmasek/SOFTWARE/HMMER/hmmer-3.0/src/hmmscan"
-    HSP       = "/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/ruby/evoruby/exe/hsp.rb"
-    D2F       = "/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/ruby/evoruby/exe/d2f.rb"
-    DSX       = "/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/ruby/evoruby/exe/dsx.rb"
-    TAP       = "/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/ruby/evoruby/exe/tap.rb"
+    LAUNCH_ANALYSIS = true
+    HOME          = "/home/czmasek/"
+    FORESTER_RUBY = "#{HOME}SOFTWARE/FORESTER/DEV/forester/forester/ruby/evoruby/exe/"
+    PFAM          = "#{HOME}DATA/PFAM/PFAM270X/"
+    HMMSCAN       = "#{HOME}SOFTWARE/HMMER/hmmer-3.0/src/hmmscan"
+    HSP           = "#{FORESTER_RUBY}hsp.rb"
+    D2F           = "#{FORESTER_RUBY}d2f.rb"
+    DSX           = "#{FORESTER_RUBY}dsx.rb"
+    TAP           = "#{FORESTER_RUBY}tap.rb"
+    PF            = "#{FORESTER_RUBY}phylogeny_factory.rb"
+    TEMPLATE_FILE = '00_phylogeny_factory.template'
 
     def run
       unless ARGV.length >= 2 && ARGV.length <= 4
@@ -114,8 +119,26 @@ module Evoruby
         end
 
         Dir.mkdir( hmm_name + "/msa" )
+        Dir.mkdir( hmm_name + "/msa100" )
 
         FileUtils.cp "#{hmm_name}/#{hmm_name}__#{hmm_name}__ee#{e_value_exp.to_s}_#{length}.fasta", "#{hmm_name}/msa/#{hmm_name}__#{hmm_name}__ee#{e_value_exp.to_s}_#{length}"
+        FileUtils.cp "#{hmm_name}/#{hmm_name}__#{hmm_name}__ee#{e_value_exp.to_s}_#{length}.fasta", "#{hmm_name}/msa100/#{hmm_name}__#{hmm_name}__ee#{e_value_exp.to_s}_#{length}"
+
+        if File.exists?( TEMPLATE_FILE )
+          FileUtils.cp TEMPLATE_FILE, "#{hmm_name}/msa/"
+          FileUtils.cp TEMPLATE_FILE, "#{hmm_name}/msa100/"
+
+          if LAUNCH_ANALYSIS
+            puts "f. analysis:"
+            Dir.chdir "#{hmm_name}/msa/"
+            run_command "#{PF} -s"
+            Dir.chdir "../.."
+            Dir.chdir "#{hmm_name}/msa100/"
+            run_command "#{PF} -s"
+            Dir.chdir "../.."
+            puts
+          end
+        end
 
       end
 
index e453c8c..93eac07 100644 (file)
@@ -18,18 +18,19 @@ module Evoruby
   class PhylogenyFactory
 
     PRG_NAME       = "phylogeny_factory"
-    PRG_DATE       = "130402"
+    PRG_DATE       = "1301111"
     PRG_DESC       = "automated phylogeny reconstruction using queing system"
-    PRG_VERSION    = "1.002"
+    PRG_VERSION    = "1.100"
     COPYRIGHT      = "2013 Christian M Zmasek"
     CONTACT        = "phylosoft@gmail.com"
     WWW            = "www.phylosoft.org"
 
     USE_JOB_SUBMISSION_SYSTEM_OPTION  = 's'
+    BS_OPTION                         = 'b'
     LOG_FILE                          = '00_phylogeny_factory.log'
     TEMPLATE_FILE                     = '00_phylogeny_factory.template'
     PBS_O_WORKDIR                     = '$PBS_O_WORKDIR/'
-    MIN_LENGTH_DEFAULT                = 50
+    MIN_LENGTH_DEFAULT                = 40
     PFAM_HHMS                         = "/home/czmasek/DATA/PFAM/PFAM270X/PFAM_A_HMMs/"
     WALLTIME                          = '100:00:00'
     QUEUE                             = 'default'
@@ -38,6 +39,7 @@ module Evoruby
 
     RSL                 = 'RSL'
     HMM                 = 'HMM'
+    PHYLO_PL            = 'PHYLO_PL'
 
     OPTION_OPEN          = '%['
     OPTION_CLOSE          = ']%'
@@ -65,6 +67,7 @@ module Evoruby
 
       allowed_opts = Array.new
       allowed_opts.push( USE_JOB_SUBMISSION_SYSTEM_OPTION )
+      allowed_opts.push( BS_OPTION )
 
       disallowed = cla.validate_allowed_options_as_str( allowed_opts )
       if ( disallowed.length > 0 )
@@ -88,12 +91,25 @@ module Evoruby
         use_job_submission_system = true
       end
 
+      bootstraps = 1
+      if cla.is_option_set?( BS_OPTION )
+        bootstraps = cla.get_option_value_as_int( BS_OPTION )
+      end
+      if bootstraps < 0
+        puts( '[' + PRG_NAME + '] > negative bootstrap value' )
+        exit( -1 )
+      end
+      if bootstraps == 0
+        bootstraps = 1
+      end
+
       log = String.new
 
       now = DateTime.now
       log << "Program     : " + PRG_NAME + NL
       log << "Version     : " + PRG_VERSION + NL
       log << "Program date: " + PRG_DATE + NL + NL
+      log << "Bootstraps  : " + bootstraps.to_s + NL
       log << "Date/time   : " + now.to_s + NL
       log << "Directory   : " + Dir.getwd  + NL + NL
 
@@ -102,7 +118,6 @@ module Evoruby
       paths       = Hash.new  # path placeholder -> full path
       min_lengths = Hash.new  # alignment id -> minimal length
       options     = Hash.new  # option placeholder -> option
-      #  ids         = Set.new
 
       commands    = Array.new
 
@@ -123,8 +138,13 @@ module Evoruby
           puts( '[' + PRG_NAME + '] > min lengths: ' + $1 + ' => ' + $2 )
 
         elsif ( line =~ /^%\s*(\S+)\s*=\s*(\S+)/ )
-          options[ $1 ] = $2
-          puts( '[' + PRG_NAME + '] > options    : ' + $1 + ' => ' + $2 )
+          key = $1
+          value = $2
+          if key == PHYLO_PL
+            value = update_phylo_pl_options( value, bootstraps )
+          end
+          options[ key ] = value
+          puts( '[' + PRG_NAME + '] > options    : ' + key + ' => ' + value )
 
         elsif ( line =~ /^>\s*(.+)/ )
           command = command + $1 + ";#{NL}"
@@ -243,6 +263,14 @@ module Evoruby
       command
     end
 
+    def update_phylo_pl_options( phylo_pl_options, bootstraps )
+      unless phylo_pl_options =~ /B\d/
+        phylo_pl_options = 'B' + bootstraps.to_s + phylo_pl_options
+      end
+      phylo_pl_options = '-' + phylo_pl_options
+      phylo_pl_options
+    end
+
     def subst_min_length( command, id, min_lengths )
       min_length = nil
       if id != nil && id.length > 0
@@ -274,10 +302,10 @@ end # module Evoruby
 # are to be used:
 #  the substring between the first two double underscores is a
 #  unique identifier and needs to match the identifiers
-#  in '% <parameter-type> <unique-id>=<value>' statements 
+#  in '% <parameter-type> <unique-id>=<value>' statements
 #  Example:
 #  alignment name     : 'x__bcl2__e1'
-#  parameter statments: '% RSL bcl2=60' 
+#  parameter statments: '% RSL bcl2=60'
 $ PROBCONS=/home/czmasek/SOFTWARE/PROBCONS/probcons_v1_12/probcons
 $ DIALIGN_TX=/home/czmasek/SOFTWARE/DIALIGNTX/DIALIGN-TX_1.0.2/source/dialign-tx
 $ DIALIGN_CONF=/home/czmasek/SOFTWARE/DIALIGNTX/DIALIGN-TX_1.0.2/conf
@@ -291,13 +319,13 @@ $ PHYLO_PL=/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/archive/perl/ph
 
 
 % RSL Hormone_recep=60
-% 
+%
 % RSL Y_phosphatase=100
 % RSL Y_phosphatase2=75
 % RSL Y_phosphatase3=50
 % RSL Y_phosphatase3C=40
 
-% PHYLO_OPT=-B100q@1r4j2IGS21X
+% PHYLO_OPT=B100q@1r4j2IGS21X
 
 % TMP_DIR  = /home/czmasek/tmp/
 
@@ -305,23 +333,23 @@ $ PHYLO_PL=/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/archive/perl/ph
 > KALIGN $ > $_kalign
 > MSA_PRO -o=p -n=10 -d -rr=0.5 -c -rsl=%[RSL]% $_kalign $_kalign_05_%[RSL]%.aln
 > PHYLO_PL %[PHYLO_OPT]% $_kalign_05_%[RSL]%.aln $_kalign_05_%[RSL]% %[TMP_DIR]%
-- 
+-
 
 > KALIGN $ > $_kalign_
 > MSA_PRO -o=p -n=10 -d -rr=0.9 -c -rsl=%[RSL]% $_kalign_ $_kalign_09_%[RSL]%.aln
 > PHYLO_PL %[PHYLO_OPT]% $_kalign_09_%[RSL]%.aln $_kalign_09_%[RSL]% %[TMP_DIR]%
-- 
+-
 
 
 > HMMALIGN --amino --trim --outformat Pfam -o $_hmmalign %[HMM]% $ > /dev/null
 > MSA_PRO -o=p -n=10 -d -rr=0.5 -c -rsl=%[RSL]% $_hmmalign $_hmmalign_05_%[RSL]%.aln
 > PHYLO_PL %[PHYLO_OPT]% $_hmmalign_05_%[RSL]%.aln $_hmmalign_05_%[RSL]% %[TMP_DIR]%
-- 
+-
 
 > HMMALIGN --amino --trim --outformat Pfam -o $_hmmalign_ %[HMM]% $ > /dev/null
 > MSA_PRO -o=p -n=10 -d -rr=0.9 -c -rsl=%[RSL]% $_hmmalign_ $_hmmalign_09_%[RSL]%.aln
 > PHYLO_PL %[PHYLO_OPT]% $_hmmalign_09_%[RSL]%.aln $_hmmalign_09_%[RSL]% %[TMP_DIR]%
-- 
+-
 
 
 > MAFFT --maxiterate 1000 --localpair $ > $_mafft
@@ -334,7 +362,7 @@ $ PHYLO_PL=/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/archive/perl/ph
 > PHYLO_PL %[PHYLO_OPT]% $_mafft_09_%[RSL]%.aln $_mafft_09_%[RSL]% %[TMP_DIR]%
 -
 
-        
+
 > MUSCLE  -maxiters 1000 -maxtrees 100 -in $ -out $_muscle
 > MSA_PRO -o=p -n=10 -d -rr=0.5 -c -rsl=%[RSL]% $_muscle $_muscle_05_%[RSL]%.aln
 > PHYLO_PL %[PHYLO_OPT]% $_muscle_05_%[RSL]%.aln $_muscle_05_%[RSL]% %[TMP_DIR]%
@@ -360,12 +388,12 @@ $ PHYLO_PL=/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/archive/perl/ph
 > PROBCONS $ > $_probcons
 > MSA_PRO -o=p -n=10 -d -rem_red -rr=0.5 -c -rsl=%[RSL]% $_probcons $_probcons_05_%[RSL]%.aln
 > PHYLO_PL %[PHYLO_OPT]% $_probcons_05_%[RSL]%.aln $_probcons_05_%[RSL]% %[TMP_DIR]%
--  
+-
 
 > PROBCONS $ > $_probcons_
 > MSA_PRO -o=p -n=10 -d -rem_red -rr=0.9 -c -rsl=%[RSL]% $_probcons_ $_probcons_09_%[RSL]%.aln
 > PHYLO_PL %[PHYLO_OPT]% $_probcons_09_%[RSL]%.aln $_probcons_09_%[RSL]% %[TMP_DIR]%
--  
+-
 
 
 > DIALIGN_TX DIALIGN_CONF $ $_dialigntx