1 #!/usr/local/bin/ruby -w
5 # Copyright:: Copyright (C) 2010 Christian M. Zmasek
6 # License:: GNU Lesser General Public License (LGPL)
15 class RunPhyloPipeline
17 PFAM = "/home/czmasek/DATA/PFAM/PFAM260X/"
18 HMMSCAN = "/home/czmasek/SOFTWARE/HMMER/hmmer-3.0/src/hmmscan"
19 HSP = "/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/ruby/evoruby/exe/hsp.rb"
20 D2F = "/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/ruby/evoruby/exe/d2f.rb"
21 DSX = "/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/ruby/evoruby/exe/dsx.rb"
25 unless ARGV.length >= 4 && ARGV.length <= 6
26 error "arguments are: <fasta formatted inputfile> <hmm-name> <min-length> +
27 <neg E-value exponent for domain extraction> [E-value for hmmscan, default is 20] [hmmscan option, default is --nobias]"
32 length = ARGV[ 2 ].to_i
33 e_value_exp = ARGV[ 3 ].to_i
36 hmmscan_option = "--nobias"
39 hmmscan_option = ARGV[ 5 ]
41 if ARGV.length == 5 || ARGV.length == 6
42 e_for_hmmscan = ARGV[ 4 ].to_i
46 error "E-value exponent for domain extraction cannot be negative"
49 error "length cannot be smaller than or equal to 1"
52 error "E-value for hmmscan cannot be smaller than 1"
55 base_name = get_base_name input
58 #if input.downcase.end_with?( "_ni.fasta" )
59 # base_name = input[ 0 .. input.length - 10 ]
60 #elsif input.downcase.end_with?( ".fasta" )
61 # base_name = input[ 0 .. input.length - 7 ]
62 #elsif input.downcase.end_with?( "_ni.fsa" )
63 # base_name = input[ 0 .. input.length - 8 ]
64 #elsif input.downcase.end_with?( ".fsa" )
65 # base_name = input[ 0 .. input.length - 5 ]
71 cmd = "#{HMMSCAN} #{hmmscan_option} --domtblout #{base_name}_hmmscan_#{e_for_hmmscan.to_s} -E #{e_for_hmmscan.to_s} #{PFAM}Pfam-A.hmm #{input}"
75 puts "2. hmmscan to simple domain table:"
76 cmd = "#{HSP} #{base_name}_hmmscan_#{e_for_hmmscan.to_s} #{base_name}_hmmscan_#{e_for_hmmscan.to_s}_domain_table"
80 puts "3. domain table to forester format:"
81 cmd = "#{D2F} -e=10 #{base_name}_hmmscan_#{e_for_hmmscan.to_s}_domain_table #{input} #{base_name}_hmmscan_#{e_for_hmmscan.to_s}.dff"
86 cmd = "#{DSX} -d -e=1e-#{e_value_exp.to_s} -l=#{length} #{hmm} #{base_name}_hmmscan_#{e_for_hmmscan.to_s} #{input} #{base_name}_#{hmm}_ee#{e_value_exp.to_s}_#{length}"
98 if n.downcase.end_with?( "_ni.fasta" )
99 n[ 0 .. n.length - 10 ]
100 elsif n.downcase.end_with?( ".fasta" )
101 n[ 0 .. n.length - 7 ]
102 elsif n.downcase.end_with?( "_ni.fsa" )
103 n[ 0 .. n.length - 8 ]
104 elsif n.downcase.end_with?( ".fsa" )
105 n[ 0 .. n.length - 5 ]
120 p = RunPhyloPipeline.new()