From 80ae355701e22009bbbeccfb2a0b4456f7fa1164 Mon Sep 17 00:00:00 2001 From: "cmzmasek@gmail.com" Date: Sat, 9 Nov 2013 00:32:45 +0000 Subject: [PATCH] new tool --- forester/ruby/evoruby/exe/run_phylo_pipeline_x.rb | 136 +++++++++++++++++++++ 1 file changed, 136 insertions(+) create mode 100644 forester/ruby/evoruby/exe/run_phylo_pipeline_x.rb diff --git a/forester/ruby/evoruby/exe/run_phylo_pipeline_x.rb b/forester/ruby/evoruby/exe/run_phylo_pipeline_x.rb new file mode 100644 index 0000000..cc8cfbb --- /dev/null +++ b/forester/ruby/evoruby/exe/run_phylo_pipeline_x.rb @@ -0,0 +1,136 @@ +#!/usr/local/bin/ruby -w +# +# = run_phylo_pipeline +# +# Copyright:: Copyright (C) 2010 Christian M. Zmasek +# License:: GNU Lesser General Public License (LGPL) +# +# $Id Exp $ +# +# + + +module Evoruby + + class RunPhyloPipeline + + PFAM = "/home/czmasek/DATA/PFAM/PFAM270X/" + HMMSCAN = "/home/czmasek/SOFTWARE/HMMER/hmmer-3.0/src/hmmscan" + HSP = "/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/ruby/evoruby/exe/hsp.rb" + D2F = "/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/ruby/evoruby/exe/d2f.rb" + DSX = "/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/ruby/evoruby/exe/dsx.rb" + TAP = "/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/ruby/evoruby/exe/tap.rb" + + def run + unless ARGV.length >= 4 && ARGV.length <= 6 + error "arguments are: " + + " [E-value for hmmscan, default is 10] [hmmscan option, default is --nobias, --max for no heuristics]" + end + + + + length = ARGV[ 0 ].to_i + e_value_exp = ARGV[ 1 ].to_i + + e_for_hmmscan = 10 + hmmscan_option = "--nobias" + + if ARGV.length == 4 + hmmscan_option = ARGV[ 3 ] + end + if ARGV.length == 3 || ARGV.length == 4 + e_for_hmmscan = ARGV[ 2 ].to_i + end + + if e_value_exp < 0 + error "E-value exponent for domain extraction cannot be negative" + end + if length <= 1 + error "length cannot be smaller than or equal to 1" + end + if e_for_hmmscan < 1 + error "E-value for hmmscan cannot be smaller than 1" + end + + input_files = Dir.entries(".").select { |f| !File.directory?( f ) && f.downcase.end_with?( ".fasta" ) } + + puts "Input files:" + input_files.each do | input | + puts input + end + puts + + input_files.each do | input | + + hmm_name = "" + + if input.downcase.end_with?( "_ni.fasta" ) + hmm_name = input[ 0 .. input.length - 10 ] + elsif input.downcase.end_with?( ".fasta" ) + hmm_name = input[ 0 .. input.length - 7 ] + puts + puts "0. identifier normalization:" + cmd = "#{TAP} #{input}" + run_command( cmd ) + puts + else + error "illegal name: " + input + end + + puts + puts "1. hmmscan:" + cmd = "#{HMMSCAN} #{hmmscan_option} --domtblout #{hmm_name}_hmmscan_#{e_for_hmmscan.to_s} -E #{e_for_hmmscan.to_s} #{PFAM}Pfam-A.hmm #{input}" + run_command( cmd ) + puts + + puts "2. hmmscan to simple domain table:" + cmd = "#{HSP} #{hmm_name}_hmmscan_#{e_for_hmmscan.to_s} #{hmm_name}_hmmscan_#{e_for_hmmscan.to_s}_domain_table" + run_command( cmd ) + puts + + puts "3. domain table to forester format:" + cmd = "#{D2F} -e=10 #{hmm_name}_hmmscan_#{e_for_hmmscan.to_s}_domain_table #{input} #{hmm_name}_hmmscan_#{e_for_hmmscan.to_s}.dff" + run_command( cmd ) + puts + + puts "4. dsx:" + cmd = "#{DSX} -d -e=1e-#{e_value_exp.to_s} -l=#{length} #{hmm_name} #{hmm_name}_hmmscan_#{e_for_hmmscan.to_s} #{input} #{hmm_name}__#{hmm_name}__ee#{e_value_exp.to_s}_#{length}" + run_command( cmd ) + puts + + end + + end + + def run_command cmd + puts cmd + `#{cmd}` + end + + def get_base_name n + if n.downcase.end_with?( "_ni.fasta" ) + n[ 0 .. n.length - 10 ] + elsif n.downcase.end_with?( ".fasta" ) + n[ 0 .. n.length - 7 ] + else + error "illegal name: " + n + end + end + + def error msg + puts + puts msg + puts + exit + end + + end + + p = RunPhyloPipeline.new() + + p.run() + +end + + + -- 1.7.10.2