1 #!/usr/local/bin/ruby -w
5 # Copyright:: Copyright (C) 2010 Christian M. Zmasek
6 # License:: GNU Lesser General Public License (LGPL)
16 class RunPhyloPipeline
18 LAUNCH_ANALYSIS = true
19 HOME = "/home/czmasek/"
20 FORESTER_RUBY = "#{HOME}SOFTWARE/FORESTER/DEV/forester/forester/ruby/evoruby/exe/"
21 PFAM = "#{HOME}DATA/PFAM/PFAM270X/"
22 HMMSCAN = "#{HOME}SOFTWARE/HMMER/hmmer-3.0/src/hmmscan"
23 HSP = "#{FORESTER_RUBY}hsp.rb"
24 D2F = "#{FORESTER_RUBY}d2f.rb"
25 DSX = "#{FORESTER_RUBY}dsx.rb"
26 TAP = "#{FORESTER_RUBY}tap.rb"
27 PF = "#{FORESTER_RUBY}phylogeny_factory.rb"
28 TEMPLATE_FILE = '00_phylogeny_factory.template'
31 unless ARGV.length >= 2 && ARGV.length <= 4
32 error "arguments are: <min-length> " +
33 "<neg E-value exponent for domain extraction> [E-value for hmmscan, default is 10] [hmmscan option, default is --nobias, --max for no heuristics]"
36 length = ARGV[ 0 ].to_i
37 e_value_exp = ARGV[ 1 ].to_i
40 hmmscan_option = "--nobias"
43 hmmscan_option = ARGV[ 3 ]
45 if ARGV.length == 3 || ARGV.length == 4
46 e_for_hmmscan = ARGV[ 2 ].to_i
50 error "E-value exponent for domain extraction cannot be negative"
53 error "length cannot be smaller than or equal to 1"
56 error "E-value for hmmscan cannot be smaller than 1"
59 input_files = Dir.entries(".").select { |f| !File.directory?( f ) && f.downcase.end_with?( ".fasta" ) }
62 input_files.each do | input |
68 input_files.each do | input |
70 puts counter.to_s + "/" + input_files.size.to_s + " " + input + ": "
78 if input.downcase.end_with?( "_ni.fasta" )
79 hmm_name = input[ 0 .. input.length - 10 ]
80 elsif input.downcase.end_with?( ".fasta" )
81 hmm_name = input[ 0 .. input.length - 7 ]
82 unless File.exist? hmm_name
85 puts "a. identifier normalization:"
86 cmd = "#{TAP} #{input} #{hmm_name}_ni.fasta #{hmm_name}.nim"
88 input = hmm_name + "_ni.fasta"
90 input = hmm_name + "/" + hmm_name + "_ni.fasta"
91 unless File.exist? input
92 error "expected to already exist: " + input
94 puts "a. identifier normalization already done:" + input
97 error "illegal name: " + input
100 unless File.exist? hmm_name
101 Dir.mkdir( hmm_name )
105 hmmscan_output = hmm_name + "/" + hmm_name + "_hmmscan_" + e_for_hmmscan.to_s
106 unless File.exist? hmmscan_output
108 cmd = "#{HMMSCAN} #{hmmscan_option} --domtblout #{hmmscan_output} -E #{e_for_hmmscan.to_s} #{PFAM}Pfam-A.hmm #{input}"
111 puts "b. hmmscan output already exists: " + hmmscan_output
116 hsp_output = hmm_name + "/" + hmm_name + "_hmmscan_#{e_for_hmmscan.to_s}_domain_table"
117 unless File.exist? hsp_output
118 puts "c. hmmscan to simple domain table:"
119 cmd = "#{HSP} #{hmmscan_output} #{hsp_output}"
122 puts "c. hmmscan to simple domain table output already exists: " + hsp_output
126 d2f_output = "#{hmm_name}/#{hmm_name}_hmmscan_#{e_for_hmmscan.to_s}.dff"
127 unless File.exist? d2f_output
128 puts "d. domain table to forester format:"
129 cmd = "#{D2F} -e=10 #{hsp_output} #{input} #{d2f_output}"
132 puts "d. domain table to forester format output already exists: " + d2f_output
136 dsx_output_base = "#{hmm_name}__#{hmm_name}__ee#{e_value_exp.to_s}_#{length}"
137 dsx_output = hmm_name +"/" + dsx_output_base
138 unless File.exist? dsx_output + ".fasta"
140 cmd = "#{DSX} -d -e=1e-#{e_value_exp.to_s} -l=#{length} #{hmm_name} #{hmmscan_output} #{input} #{dsx_output}"
143 puts "e. dsx output already exists: " + dsx_output + ".fasta"
148 FileUtils.mv "#{hmm_name}_ni.fasta", "#{hmm_name}/#{hmm_name}_ni.fasta"
149 FileUtils.mv "#{hmm_name}.nim", "#{hmm_name}/#{hmm_name}.nim"
150 FileUtils.cp orig_input, "#{hmm_name}/#{orig_input}"
153 msa_dir = hmm_name + "/msa_ee#{e_value_exp.to_s}_#{length}"
154 msa_100_dir =hmm_name + "/msa100_ee#{e_value_exp.to_s}_#{length}"
156 unless File.exist? msa_dir
159 unless File.exist? msa_100_dir
160 Dir.mkdir( msa_100_dir )
166 unless File.exist? "#{msa_dir}/#{dsx_output_base}"
168 FileUtils.cp "#{dsx_output}.fasta", "#{msa_dir}/#{dsx_output_base}"
171 unless File.exist? "#{msa_100_dir}/#{dsx_output_base}"
173 FileUtils.cp "#{dsx_output}.fasta", "#{msa_100_dir}/#{dsx_output_base}"
176 if File.exist?( TEMPLATE_FILE )
178 FileUtils.cp TEMPLATE_FILE, msa_dir
181 FileUtils.cp TEMPLATE_FILE, msa_100_dir
184 if LAUNCH_ANALYSIS && ( run_1 || run_100 )
188 run_command "#{PF} -b=1 -s"
192 Dir.chdir msa_100_dir
193 run_command "#{PF} -b=100 -s"
210 if n.downcase.end_with?( "_ni.fasta" )
211 n[ 0 .. n.length - 10 ]
212 elsif n.downcase.end_with?( ".fasta" )
213 n[ 0 .. n.length - 7 ]
215 error "illegal name: " + n
228 p = RunPhyloPipeline.new()