1 #!/usr/local/bin/ruby -w
5 # Copyright:: Copyright (C) 2010 Christian M. Zmasek
6 # License:: GNU Lesser General Public License (LGPL)
16 class RunPhyloPipeline
18 PFAM = "/home/czmasek/DATA/PFAM/PFAM270X/"
19 HMMSCAN = "/home/czmasek/SOFTWARE/HMMER/hmmer-3.0/src/hmmscan"
20 HSP = "/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/ruby/evoruby/exe/hsp.rb"
21 D2F = "/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/ruby/evoruby/exe/d2f.rb"
22 DSX = "/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/ruby/evoruby/exe/dsx.rb"
23 TAP = "/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/ruby/evoruby/exe/tap.rb"
26 unless ARGV.length >= 2 && ARGV.length <= 4
27 error "arguments are: <min-length> " +
28 "<neg E-value exponent for domain extraction> [E-value for hmmscan, default is 10] [hmmscan option, default is --nobias, --max for no heuristics]"
31 length = ARGV[ 0 ].to_i
32 e_value_exp = ARGV[ 1 ].to_i
35 hmmscan_option = "--nobias"
38 hmmscan_option = ARGV[ 3 ]
40 if ARGV.length == 3 || ARGV.length == 4
41 e_for_hmmscan = ARGV[ 2 ].to_i
45 error "E-value exponent for domain extraction cannot be negative"
48 error "length cannot be smaller than or equal to 1"
51 error "E-value for hmmscan cannot be smaller than 1"
54 input_files = Dir.entries(".").select { |f| !File.directory?( f ) && f.downcase.end_with?( ".fasta" ) }
57 input_files.each do | input |
63 input_files.each do | input |
65 puts counter.to_s + "/" + input_files.size.to_s + " " + input + ": "
73 if input.downcase.end_with?( "_ni.fasta" )
74 hmm_name = input[ 0 .. input.length - 10 ]
75 elsif input.downcase.end_with?( ".fasta" )
77 hmm_name = input[ 0 .. input.length - 7 ]
79 puts "a. identifier normalization:"
80 cmd = "#{TAP} #{input} #{hmm_name}_ni.fasta #{hmm_name}.nim"
82 input = hmm_name + "_ni.fasta"
84 error "illegal name: " + input
91 cmd = "#{HMMSCAN} #{hmmscan_option} --domtblout #{hmm_name}/#{hmm_name}_hmmscan_#{e_for_hmmscan.to_s} -E #{e_for_hmmscan.to_s} #{PFAM}Pfam-A.hmm #{input}"
95 puts "c. hmmscan to simple domain table:"
96 cmd = "#{HSP} #{hmm_name}/#{hmm_name}_hmmscan_#{e_for_hmmscan.to_s} #{hmm_name}/#{hmm_name}_hmmscan_#{e_for_hmmscan.to_s}_domain_table"
100 puts "d. domain table to forester format:"
101 cmd = "#{D2F} -e=10 #{hmm_name}/#{hmm_name}_hmmscan_#{e_for_hmmscan.to_s}_domain_table #{input} #{hmm_name}/#{hmm_name}_hmmscan_#{e_for_hmmscan.to_s}.dff"
106 cmd = "#{DSX} -d -e=1e-#{e_value_exp.to_s} -l=#{length} #{hmm_name} #{hmm_name}/#{hmm_name}_hmmscan_#{e_for_hmmscan.to_s} #{input} #{hmm_name}/#{hmm_name}__#{hmm_name}__ee#{e_value_exp.to_s}_#{length}"
111 FileUtils.mv "#{hmm_name}_ni.fasta", "#{hmm_name}/#{hmm_name}_ni.fasta"
112 FileUtils.mv "#{hmm_name}.nim", "#{hmm_name}/#{hmm_name}.nim"
113 FileUtils.cp orig_input, "#{hmm_name}/#{orig_input}"
126 if n.downcase.end_with?( "_ni.fasta" )
127 n[ 0 .. n.length - 10 ]
128 elsif n.downcase.end_with?( ".fasta" )
129 n[ 0 .. n.length - 7 ]
131 error "illegal name: " + n
144 p = RunPhyloPipeline.new()