inprogress
[jalview.git] / forester / ruby / evoruby / exe / run_phylo_pipeline_x.rb
index 50de838..8745f32 100644 (file)
@@ -15,12 +15,17 @@ module Evoruby
 
   class RunPhyloPipeline
 
-    PFAM      = "/home/czmasek/DATA/PFAM/PFAM270X/"
-    HMMSCAN  = "/home/czmasek/SOFTWARE/HMMER/hmmer-3.0/src/hmmscan"
-    HSP       = "/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/ruby/evoruby/exe/hsp.rb"
-    D2F       = "/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/ruby/evoruby/exe/d2f.rb"
-    DSX       = "/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/ruby/evoruby/exe/dsx.rb"
-    TAP       = "/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/ruby/evoruby/exe/tap.rb"
+    LAUNCH_ANALYSIS = true
+    HOME          = "/home/czmasek/"
+    FORESTER_RUBY = "#{HOME}SOFTWARE/FORESTER/DEV/forester/forester/ruby/evoruby/exe/"
+    PFAM          = "#{HOME}DATA/PFAM/PFAM270X/"
+    HMMSCAN       = "#{HOME}SOFTWARE/HMMER/hmmer-3.0/src/hmmscan"
+    HSP           = "#{FORESTER_RUBY}hsp.rb"
+    D2F           = "#{FORESTER_RUBY}d2f.rb"
+    DSX           = "#{FORESTER_RUBY}dsx.rb"
+    TAP           = "#{FORESTER_RUBY}tap.rb"
+    PF            = "#{FORESTER_RUBY}phylogeny_factory.rb"
+    TEMPLATE_FILE = '00_phylogeny_factory.template'
 
     def run
       unless ARGV.length >= 2 && ARGV.length <= 4
@@ -73,38 +78,70 @@ module Evoruby
         if input.downcase.end_with?( "_ni.fasta" )
           hmm_name = input[ 0 .. input.length - 10 ]
         elsif input.downcase.end_with?( ".fasta" )
-          id_norm = true
           hmm_name = input[ 0 .. input.length - 7 ]
-          puts
-          puts "a. identifier normalization:"
-          cmd = "#{TAP} #{input} #{hmm_name}_ni.fasta #{hmm_name}.nim"
-          run_command( cmd )
-          input = hmm_name + "_ni.fasta"
+          unless File.exist? hmm_name
+            id_norm = true
+            puts
+            puts "a. identifier normalization:"
+            cmd = "#{TAP} #{input} #{hmm_name}_ni.fasta #{hmm_name}.nim"
+            run_command( cmd )
+            input = hmm_name + "_ni.fasta"
+          else
+            input = hmm_name + "/" + hmm_name + "_ni.fasta"
+            unless File.exist? input
+              error "expected to already exist: " + input
+            end
+            puts "a. identifier normalization already done:" + input
+          end
         else
           error "illegal name: " + input
         end
 
-        Dir.mkdir( hmm_name )
+        unless File.exist? hmm_name
+          Dir.mkdir( hmm_name )
+        end
 
         puts
-        puts "b. hmmscan:"
-        cmd = "#{HMMSCAN} #{hmmscan_option} --domtblout #{hmm_name}/#{hmm_name}_hmmscan_#{e_for_hmmscan.to_s} -E #{e_for_hmmscan.to_s} #{PFAM}Pfam-A.hmm #{input}"
-        run_command( cmd )
+        hmmscan_output = hmm_name + "/" + hmm_name + "_hmmscan_" + e_for_hmmscan.to_s
+        unless File.exist? hmmscan_output
+          puts "b. hmmscan:"
+          cmd = "#{HMMSCAN} #{hmmscan_option} --domtblout #{hmmscan_output} -E #{e_for_hmmscan.to_s} #{PFAM}Pfam-A.hmm #{input}"
+          run_command( cmd )
+        else
+          puts "b. hmmscan output already exists: " + hmmscan_output
+        end
         puts
 
-        puts "c. hmmscan to simple domain table:"
-        cmd = "#{HSP} #{hmm_name}/#{hmm_name}_hmmscan_#{e_for_hmmscan.to_s} #{hmm_name}/#{hmm_name}_hmmscan_#{e_for_hmmscan.to_s}_domain_table"
-        run_command( cmd )
+
+        hsp_output = hmm_name + "/" + hmm_name + "_hmmscan_#{e_for_hmmscan.to_s}_domain_table"
+        unless File.exist? hsp_output
+          puts "c. hmmscan to simple domain table:"
+          cmd = "#{HSP} #{hmmscan_output} #{hsp_output}"
+          run_command( cmd )
+        else
+          puts "c. hmmscan to simple domain table output already exists: " + hsp_output
+        end
         puts
 
-        puts "d. domain table to forester format:"
-        cmd = "#{D2F} -e=10 #{hmm_name}/#{hmm_name}_hmmscan_#{e_for_hmmscan.to_s}_domain_table #{input} #{hmm_name}/#{hmm_name}_hmmscan_#{e_for_hmmscan.to_s}.dff"
-        run_command( cmd )
+        d2f_output = "#{hmm_name}/#{hmm_name}_hmmscan_#{e_for_hmmscan.to_s}.dff"
+        unless File.exist? d2f_output
+          puts "d. domain table to forester format:"
+          cmd = "#{D2F} -e=10 #{hsp_output} #{input} #{d2f_output}"
+          run_command( cmd )
+        else
+          puts "d. domain table to forester format output already exists: " + d2f_output
+        end
         puts
 
-        puts "e. dsx:"
-        cmd = "#{DSX} -d -e=1e-#{e_value_exp.to_s} -l=#{length} #{hmm_name} #{hmm_name}/#{hmm_name}_hmmscan_#{e_for_hmmscan.to_s} #{input} #{hmm_name}/#{hmm_name}__#{hmm_name}__ee#{e_value_exp.to_s}_#{length}"
-        run_command( cmd )
+
+        dsx_output = "#{hmm_name}/#{hmm_name}__#{hmm_name}__ee#{e_value_exp.to_s}_#{length}"
+        unless File.exist? dsx_output + ".fasta"
+          puts "e. dsx:"
+          cmd = "#{DSX} -d -e=1e-#{e_value_exp.to_s} -l=#{length} #{hmm_name} #{hmmscan_output} #{input} #{dsx_output}"
+          run_command( cmd )
+        else
+          puts "e. dsx output already exists: " + dsx_output + ".fasta"
+        end
         puts
 
         if id_norm
@@ -113,9 +150,52 @@ module Evoruby
           FileUtils.cp orig_input, "#{hmm_name}/#{orig_input}"
         end
 
-        Dir.mkdir( hmm_name + "/msa" )
+        msa_dir = hmm_name + "/msa_ee#{e_value_exp.to_s}_#{length}"
+        msa_100_dir =hmm_name + "/msa100_ee#{e_value_exp.to_s}_#{length}"
+
+        unless File.exist? msa_dir
+          Dir.mkdir( msa_dir )
+        end
+        unless File.exist? msa_100_dir
+          Dir.mkdir( msa_100_dir )
+        end
+
+        run_1 = false
+        run_100 = false
+
+        unless File.exist? "#{msa_dir}/#{hmm_name}__#{hmm_name}__ee#{e_value_exp.to_s}_#{length}"
+          run_1 = true
+          FileUtils.cp "#{dsx_output}.fasta", "#{msa_dir}/#{hmm_name}__#{hmm_name}__ee#{e_value_exp.to_s}_#{length}"
+        end
+
+        unless File.exist? "#{msa_100_dir}/#{hmm_name}__#{hmm_name}__ee#{e_value_exp.to_s}_#{length}"
+          run_100 = true
+          FileUtils.cp "#{dsx_output}.fasta", "#{msa_100_dir}/#{hmm_name}__#{hmm_name}__ee#{e_value_exp.to_s}_#{length}"
+        end
 
-        FileUtils.cp "#{hmm_name}/#{hmm_name}__#{hmm_name}__ee#{e_value_exp.to_s}_#{length}.fasta", "#{hmm_name}/msa/#{hmm_name}__#{hmm_name}__ee#{e_value_exp.to_s}_#{length}"
+        if File.exist?( TEMPLATE_FILE )
+          if run_1
+            FileUtils.cp TEMPLATE_FILE, msa_dir
+          end
+          if run_100
+            FileUtils.cp TEMPLATE_FILE, msa_100_dir
+          end
+
+          if LAUNCH_ANALYSIS && ( run_1 || run_100 )
+            puts "f. analysis:"
+            if run_1
+              Dir.chdir msa_dir
+              run_command "#{PF} -b=1 -s"
+              Dir.chdir "../.."
+            end
+            if run_100
+              Dir.chdir msa_100_dir
+              run_command "#{PF} -b=100 -s"
+              Dir.chdir "../.."
+            end
+            puts
+          end
+        end
 
       end