X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fruby%2Fevoruby%2Fexe%2Frun_phylo_pipeline.rb;h=e4889ffb7b8aec9c044629585406e61ce31f3519;hb=edbf1c9f67be0c59e4fe9b1a43c24e3db7afaaf0;hp=d09655f457d8f52bcc5c2a5b8849dadf988f923c;hpb=4efe5abda9af7f075953aa056d6530fe4f4c17db;p=jalview.git diff --git a/forester/ruby/evoruby/exe/run_phylo_pipeline.rb b/forester/ruby/evoruby/exe/run_phylo_pipeline.rb index d09655f..e4889ff 100755 --- a/forester/ruby/evoruby/exe/run_phylo_pipeline.rb +++ b/forester/ruby/evoruby/exe/run_phylo_pipeline.rb @@ -5,73 +5,103 @@ # Copyright:: Copyright (C) 2010 Christian M. Zmasek # License:: GNU Lesser General Public License (LGPL) # -# $Id: run_phylo_pipeline.rb,v 1.15 2010/10/09 02:35:42 cmzmasek Exp $ +# $Id Exp $ # # -# hmmscan --nobias --domtblout _hmmscan_250_10 -E 10 /home/czmasek/DATA/PFAM/PFAM250/Pfam-A.hmm .fasta - -# hsp _hmmscan_250_10 _hmmscan_250_10_domain_table - -# d2f -e=10 _hmmscan_250_10_domain_table .fasta _hmmscan_250_10.dff - -# hmmsearch --nobias -E 1000 --domtblout .hmmsearch_SusD <~/DATA/PFAM/PFAM250/PFAM_A_HMMs/SusD.hmm> BACTH_CHIPI.fasta - -# dsx -dd -e=<1e-2> -l=<200> .hmmsearch_SusD .fasta BACTH_CHIPI_e2_200 - - module Evoruby class RunPhyloPipeline + PFAM = "/home/czmasek/DATA/PFAM/PFAM260X/" + HMMSCAN = "/home/czmasek/SOFTWARE/HMMER/hmmer-3.0/src/hmmscan" + HSP = "/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/ruby/evoruby/exe/hsp.rb" + D2F = "/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/ruby/evoruby/exe/d2f.rb" + DSX = "/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/ruby/evoruby/exe/dsx.rb" + def run - unless ARGV.length == 4 - puts - puts "arguments are: [inputfile].fasta [hmm-name] [min-length] [neg e-value exponent]" - puts - exit + unless ARGV.length >= 4 && ARGV.length <= 6 + error "arguments are: " + + " [E-value for hmmscan, default is 20] [hmmscan option, default is --nobias, --max for no heuristics]" end - pfam = "/home/czmasek/DATA/PFAM/PFAM260X/" - hmmscan = "/home/czmasek/SOFTWARE/HMMER/hmmer-3.0/src/hmmscan" - hmmsearch = "/home/czmasek/SOFTWARE/HMMER/hmmer-3.0/src/hmmsearch" - hsp = "/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/ruby/evoruby/exe/hsp.rb" - d2f = "/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/ruby/evoruby/exe/d2f.rb" - dsx = "/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/ruby/evoruby/exe/dsx.rb" - - base_name = ARGV[ 0 ] + input = ARGV[ 0 ] hmm = ARGV[ 1 ] - length = ARGV[ 2 ] - e_value_exp = ARGV[ 3 ] - do_domain_combination_analysis = true + length = ARGV[ 2 ].to_i + e_value_exp = ARGV[ 3 ].to_i + + e_for_hmmscan = 20 + hmmscan_option = "--nobias" - if do_domain_combination_analysis + if ARGV.length == 6 + hmmscan_option = ARGV[ 5 ] + end + if ARGV.length == 5 || ARGV.length == 6 + e_for_hmmscan = ARGV[ 4 ].to_i + end - cmd = "#{hmmscan} --nobias --domtblout #{base_name}_hmmscan_10 -E 10 #{pfam}Pfam-A.hmm #{base_name}.fasta" - run_command( cmd ) + if e_value_exp < 0 + error "E-value exponent for domain extraction cannot be negative" + end + if length <= 1 + error "length cannot be smaller than or equal to 1" + end + if e_for_hmmscan < 1 + error "E-value for hmmscan cannot be smaller than 1" + end - cmd = "#{hsp} #{base_name}_hmmscan_10 #{base_name}_hmmscan_10_domain_table" - run_command( cmd ) + base_name = get_base_name input - cmd = "#{d2f} -e=10 #{base_name}_hmmscan_10_domain_table #{base_name}.fasta #{base_name}_hmmscan_10.dff" - run_command( cmd ) + puts + puts "1. hmmscan:" + cmd = "#{HMMSCAN} #{hmmscan_option} --domtblout #{base_name}_hmmscan_#{e_for_hmmscan.to_s} -E #{e_for_hmmscan.to_s} #{PFAM}Pfam-A.hmm #{input}" + run_command( cmd ) + puts - end + puts "2. hmmscan to simple domain table:" + cmd = "#{HSP} #{base_name}_hmmscan_#{e_for_hmmscan.to_s} #{base_name}_hmmscan_#{e_for_hmmscan.to_s}_domain_table" + run_command( cmd ) + puts - cmd = "#{hmmsearch} --nobias -E 1000 --domtblout #{base_name}.hmmsearch_#{hmm} #{pfam}PFAM_A_HMMs/#{hmm}.hmm #{base_name}.fasta" + puts "3. domain table to forester format:" + cmd = "#{D2F} -e=10 #{base_name}_hmmscan_#{e_for_hmmscan.to_s}_domain_table #{input} #{base_name}_hmmscan_#{e_for_hmmscan.to_s}.dff" run_command( cmd ) + puts - cmd = "#{dsx} -dd -e=1e-#{e_value_exp.to_s} -l=#{length} #{base_name}.hmmsearch_#{hmm} #{base_name}.fasta #{base_name}_#{hmm}_e#{e_value_exp.to_s}_#{length}" + puts "4. dsx:" + cmd = "#{DSX} -d -e=1e-#{e_value_exp.to_s} -l=#{length} #{hmm} #{base_name}_hmmscan_#{e_for_hmmscan.to_s} #{input} #{base_name}_#{hmm}_ee#{e_value_exp.to_s}_#{length}" run_command( cmd ) + puts end - def run_command( cmd ) + def run_command cmd puts cmd `#{cmd}` end + def get_base_name n + if n.downcase.end_with?( "_ni.fasta" ) + n[ 0 .. n.length - 10 ] + elsif n.downcase.end_with?( ".fasta" ) + n[ 0 .. n.length - 7 ] + elsif n.downcase.end_with?( "_ni.fsa" ) + n[ 0 .. n.length - 8 ] + elsif n.downcase.end_with?( ".fsa" ) + n[ 0 .. n.length - 5 ] + else + n + end + end + + def error msg + puts + puts msg + puts + exit + end + end p = RunPhyloPipeline.new()