new tool
authorcmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Sat, 9 Nov 2013 00:32:45 +0000 (00:32 +0000)
committercmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Sat, 9 Nov 2013 00:32:45 +0000 (00:32 +0000)
forester/ruby/evoruby/exe/run_phylo_pipeline_x.rb [new file with mode: 0644]

diff --git a/forester/ruby/evoruby/exe/run_phylo_pipeline_x.rb b/forester/ruby/evoruby/exe/run_phylo_pipeline_x.rb
new file mode 100644 (file)
index 0000000..cc8cfbb
--- /dev/null
@@ -0,0 +1,136 @@
+#!/usr/local/bin/ruby -w
+#
+# = run_phylo_pipeline
+#
+# Copyright::  Copyright (C) 2010 Christian M. Zmasek
+# License::    GNU Lesser General Public License (LGPL)
+#
+# $Id Exp $
+#
+#
+
+
+module Evoruby
+
+  class RunPhyloPipeline
+
+    PFAM      = "/home/czmasek/DATA/PFAM/PFAM270X/"
+    HMMSCAN  = "/home/czmasek/SOFTWARE/HMMER/hmmer-3.0/src/hmmscan"
+    HSP       = "/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/ruby/evoruby/exe/hsp.rb"
+    D2F       = "/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/ruby/evoruby/exe/d2f.rb"
+    DSX       = "/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/ruby/evoruby/exe/dsx.rb"
+    TAP       = "/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/ruby/evoruby/exe/tap.rb"
+
+    def run
+      unless ARGV.length >= 4 && ARGV.length <= 6
+        error "arguments are:  <min-length> " +
+         "<neg E-value exponent for domain extraction> [E-value for hmmscan, default is 10] [hmmscan option, default is --nobias, --max for no heuristics]"
+      end
+
+
+
+      length      = ARGV[ 0 ].to_i
+      e_value_exp = ARGV[ 1 ].to_i
+
+      e_for_hmmscan = 10
+      hmmscan_option = "--nobias"
+
+      if ARGV.length == 4
+        hmmscan_option = ARGV[ 3 ]
+      end
+      if ARGV.length == 3 || ARGV.length == 4
+        e_for_hmmscan = ARGV[ 2 ].to_i
+      end
+
+      if e_value_exp < 0
+        error "E-value exponent for domain extraction cannot be negative"
+      end
+      if length <= 1
+        error "length cannot be smaller than or equal to 1"
+      end
+      if e_for_hmmscan < 1
+        error "E-value for hmmscan cannot be smaller than 1"
+      end
+
+      input_files = Dir.entries(".").select { |f| !File.directory?( f ) && f.downcase.end_with?( ".fasta" ) }
+
+      puts "Input files:"
+      input_files.each do | input |
+        puts input
+      end
+      puts
+
+      input_files.each do | input |
+
+        hmm_name = ""
+
+        if input.downcase.end_with?( "_ni.fasta" )
+          hmm_name = input[ 0 .. input.length - 10 ]
+        elsif input.downcase.end_with?( ".fasta" )
+          hmm_name = input[ 0 .. input.length - 7 ]
+          puts
+          puts "0. identifier normalization:"
+          cmd = "#{TAP} #{input}"
+          run_command( cmd )
+          puts
+        else
+          error "illegal name: " + input
+        end
+
+        puts
+        puts "1. hmmscan:"
+        cmd = "#{HMMSCAN} #{hmmscan_option} --domtblout #{hmm_name}_hmmscan_#{e_for_hmmscan.to_s} -E #{e_for_hmmscan.to_s} #{PFAM}Pfam-A.hmm #{input}"
+        run_command( cmd )
+        puts
+
+        puts "2. hmmscan to simple domain table:"
+        cmd = "#{HSP} #{hmm_name}_hmmscan_#{e_for_hmmscan.to_s} #{hmm_name}_hmmscan_#{e_for_hmmscan.to_s}_domain_table"
+        run_command( cmd )
+        puts
+
+        puts "3. domain table to forester format:"
+        cmd = "#{D2F} -e=10 #{hmm_name}_hmmscan_#{e_for_hmmscan.to_s}_domain_table #{input} #{hmm_name}_hmmscan_#{e_for_hmmscan.to_s}.dff"
+        run_command( cmd )
+        puts
+
+        puts "4. dsx:"
+        cmd = "#{DSX} -d -e=1e-#{e_value_exp.to_s} -l=#{length} #{hmm_name} #{hmm_name}_hmmscan_#{e_for_hmmscan.to_s} #{input} #{hmm_name}__#{hmm_name}__ee#{e_value_exp.to_s}_#{length}"
+        run_command( cmd )
+        puts
+
+      end
+
+    end
+
+    def run_command cmd
+      puts cmd
+      `#{cmd}`
+    end
+
+    def get_base_name n
+      if n.downcase.end_with?( "_ni.fasta" )
+        n[ 0 .. n.length - 10 ]
+      elsif n.downcase.end_with?( ".fasta" )
+        n[ 0 .. n.length - 7 ]
+      else
+        error "illegal name: " + n
+      end
+    end
+
+    def error msg
+      puts
+      puts msg
+      puts
+      exit
+    end
+
+  end
+
+  p = RunPhyloPipeline.new()
+
+  p.run()
+
+end
+
+
+