inprogress
[jalview.git] / forester / ruby / evoruby / exe / run_phylo_pipeline_x.rb
index fb351c2..8d0a27d 100644 (file)
@@ -9,24 +9,30 @@
 #
 #
 
+require 'fileutils'
 
 module Evoruby
 
   class RunPhyloPipeline
 
-    PFAM      = "/home/czmasek/DATA/PFAM/PFAM270X/"
-    HMMSCAN  = "/home/czmasek/SOFTWARE/HMMER/hmmer-3.0/src/hmmscan"
-    HSP       = "/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/ruby/evoruby/exe/hsp.rb"
-    D2F       = "/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/ruby/evoruby/exe/d2f.rb"
-    DSX       = "/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/ruby/evoruby/exe/dsx.rb"
-    TAP       = "/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/ruby/evoruby/exe/tap.rb"
+    LAUNCH_ANALYSIS = true
+    HOME          = "/home/czmasek/"
+    FORESTER_RUBY = "#{HOME}SOFTWARE/FORESTER/DEV/forester/forester/ruby/evoruby/exe/"
+    PFAM          = "#{HOME}DATA/PFAM/PFAM270X/"
+    HMMSCAN       = "#{HOME}SOFTWARE/HMMER/hmmer-3.0/src/hmmscan"
+    HSP           = "#{FORESTER_RUBY}hsp.rb"
+    D2F           = "#{FORESTER_RUBY}d2f.rb"
+    DSX           = "#{FORESTER_RUBY}dsx.rb"
+    TAP           = "#{FORESTER_RUBY}tap.rb"
+    PF            = "#{FORESTER_RUBY}phylogeny_factory.rb"
+    TEMPLATE_FILE = '00_phylogeny_factory.template'
 
     def run
-      unless ARGV.length >= 4 && ARGV.length <= 6
+      unless ARGV.length >= 2 && ARGV.length <= 4
         error "arguments are:  <min-length> " +
          "<neg E-value exponent for domain extraction> [E-value for hmmscan, default is 10] [hmmscan option, default is --nobias, --max for no heuristics]"
       end
-      
+
       length      = ARGV[ 0 ].to_i
       e_value_exp = ARGV[ 1 ].to_i
 
@@ -58,16 +64,21 @@ module Evoruby
       end
       puts
 
-      counter = 0
+      counter = 1
       input_files.each do | input |
 
         puts counter.to_s + "/" +  input_files.size.to_s + " " + input + ": "
-        
+
+        counter += 1
+
         hmm_name = ""
+        id_norm = false
+        orig_input = input
 
         if input.downcase.end_with?( "_ni.fasta" )
           hmm_name = input[ 0 .. input.length - 10 ]
         elsif input.downcase.end_with?( ".fasta" )
+          id_norm = true
           hmm_name = input[ 0 .. input.length - 7 ]
           puts
           puts "a. identifier normalization:"
@@ -78,27 +89,57 @@ module Evoruby
           error "illegal name: " + input
         end
 
+        Dir.mkdir( hmm_name )
+
         puts
         puts "b. hmmscan:"
-        cmd = "#{HMMSCAN} #{hmmscan_option} --domtblout #{hmm_name}_hmmscan_#{e_for_hmmscan.to_s} -E #{e_for_hmmscan.to_s} #{PFAM}Pfam-A.hmm #{input}"
+        cmd = "#{HMMSCAN} #{hmmscan_option} --domtblout #{hmm_name}/#{hmm_name}_hmmscan_#{e_for_hmmscan.to_s} -E #{e_for_hmmscan.to_s} #{PFAM}Pfam-A.hmm #{input}"
         run_command( cmd )
         puts
 
         puts "c. hmmscan to simple domain table:"
-        cmd = "#{HSP} #{hmm_name}_hmmscan_#{e_for_hmmscan.to_s} #{hmm_name}_hmmscan_#{e_for_hmmscan.to_s}_domain_table"
+        cmd = "#{HSP} #{hmm_name}/#{hmm_name}_hmmscan_#{e_for_hmmscan.to_s} #{hmm_name}/#{hmm_name}_hmmscan_#{e_for_hmmscan.to_s}_domain_table"
         run_command( cmd )
         puts
 
         puts "d. domain table to forester format:"
-        cmd = "#{D2F} -e=10 #{hmm_name}_hmmscan_#{e_for_hmmscan.to_s}_domain_table #{input} #{hmm_name}_hmmscan_#{e_for_hmmscan.to_s}.dff"
+        cmd = "#{D2F} -e=10 #{hmm_name}/#{hmm_name}_hmmscan_#{e_for_hmmscan.to_s}_domain_table #{input} #{hmm_name}/#{hmm_name}_hmmscan_#{e_for_hmmscan.to_s}.dff"
         run_command( cmd )
         puts
 
         puts "e. dsx:"
-        cmd = "#{DSX} -d -e=1e-#{e_value_exp.to_s} -l=#{length} #{hmm_name} #{hmm_name}_hmmscan_#{e_for_hmmscan.to_s} #{input} #{hmm_name}__#{hmm_name}__ee#{e_value_exp.to_s}_#{length}"
+        cmd = "#{DSX} -d -e=1e-#{e_value_exp.to_s} -l=#{length} #{hmm_name} #{hmm_name}/#{hmm_name}_hmmscan_#{e_for_hmmscan.to_s} #{input} #{hmm_name}/#{hmm_name}__#{hmm_name}__ee#{e_value_exp.to_s}_#{length}"
         run_command( cmd )
         puts
 
+        if id_norm
+          FileUtils.mv "#{hmm_name}_ni.fasta", "#{hmm_name}/#{hmm_name}_ni.fasta"
+          FileUtils.mv "#{hmm_name}.nim", "#{hmm_name}/#{hmm_name}.nim"
+          FileUtils.cp orig_input, "#{hmm_name}/#{orig_input}"
+        end
+
+        Dir.mkdir( hmm_name + "/msa" )
+        Dir.mkdir( hmm_name + "/msa100" )
+
+        FileUtils.cp "#{hmm_name}/#{hmm_name}__#{hmm_name}__ee#{e_value_exp.to_s}_#{length}.fasta", "#{hmm_name}/msa/#{hmm_name}__#{hmm_name}__ee#{e_value_exp.to_s}_#{length}"
+        FileUtils.cp "#{hmm_name}/#{hmm_name}__#{hmm_name}__ee#{e_value_exp.to_s}_#{length}.fasta", "#{hmm_name}/msa100/#{hmm_name}__#{hmm_name}__ee#{e_value_exp.to_s}_#{length}"
+
+        if File.exists?( TEMPLATE_FILE )
+          FileUtils.cp TEMPLATE_FILE, "#{hmm_name}/msa/"
+          FileUtils.cp TEMPLATE_FILE, "#{hmm_name}/msa100/"
+
+          if LAUNCH_ANALYSIS
+            puts "f. analysis:"
+            Dir.chdir "#{hmm_name}/msa/"
+            run_command "#{PF} -s"
+            Dir.chdir "../.."
+            Dir.chdir "#{hmm_name}/msa100/"
+            run_command "#{PF} -s"
+            Dir.chdir "../.."
+            puts
+          end
+        end
+
       end
 
     end