inprogress
[jalview.git] / forester / ruby / evoruby / exe / run_phylo_pipeline_x.rb
1 #!/usr/local/bin/ruby -w
2 #
3 # = run_phylo_pipeline
4 #
5 # Copyright::  Copyright (C) 2010 Christian M. Zmasek
6 # License::    GNU Lesser General Public License (LGPL)
7 #
8 # $Id Exp $
9 #
10 #
11
12 require 'fileutils'
13
14 module Evoruby
15
16   class RunPhyloPipeline
17
18     LAUNCH_ANALYSIS = true
19     HOME          = "/home/czmasek/"
20     FORESTER_RUBY = "#{HOME}SOFTWARE/FORESTER/DEV/forester/forester/ruby/evoruby/exe/"
21     PFAM          = "#{HOME}DATA/PFAM/PFAM270X/"
22     HMMSCAN       = "#{HOME}SOFTWARE/HMMER/hmmer-3.0/src/hmmscan"
23     HSP           = "#{FORESTER_RUBY}hsp.rb"
24     D2F           = "#{FORESTER_RUBY}d2f.rb"
25     DSX           = "#{FORESTER_RUBY}dsx.rb"
26     TAP           = "#{FORESTER_RUBY}tap.rb"
27     PF            = "#{FORESTER_RUBY}phylogeny_factory.rb"
28     TEMPLATE_FILE = '00_phylogeny_factory.template'
29
30     def run
31       unless ARGV.length >= 2 && ARGV.length <= 4
32         error "arguments are:  <min-length> " +
33          "<neg E-value exponent for domain extraction> [E-value for hmmscan, default is 10] [hmmscan option, default is --nobias, --max for no heuristics]"
34       end
35
36       length      = ARGV[ 0 ].to_i
37       e_value_exp = ARGV[ 1 ].to_i
38
39       e_for_hmmscan = 10
40       hmmscan_option = "--nobias"
41
42       if ARGV.length == 4
43         hmmscan_option = ARGV[ 3 ]
44       end
45       if ARGV.length == 3 || ARGV.length == 4
46         e_for_hmmscan = ARGV[ 2 ].to_i
47       end
48
49       if e_value_exp < 0
50         error "E-value exponent for domain extraction cannot be negative"
51       end
52       if length <= 1
53         error "length cannot be smaller than or equal to 1"
54       end
55       if e_for_hmmscan < 1
56         error "E-value for hmmscan cannot be smaller than 1"
57       end
58
59       input_files = Dir.entries(".").select { |f| !File.directory?( f ) && f.downcase.end_with?( ".fasta" ) }
60
61       puts "Input files:"
62       input_files.each do | input |
63         puts input
64       end
65       puts
66
67       counter = 1
68       input_files.each do | input |
69
70         puts counter.to_s + "/" +  input_files.size.to_s + " " + input + ": "
71
72         counter += 1
73
74         hmm_name = ""
75         id_norm = false
76         orig_input = input
77
78         if input.downcase.end_with?( "_ni.fasta" )
79           hmm_name = input[ 0 .. input.length - 10 ]
80         elsif input.downcase.end_with?( ".fasta" )
81           id_norm = true
82           hmm_name = input[ 0 .. input.length - 7 ]
83           puts
84           puts "a. identifier normalization:"
85           cmd = "#{TAP} #{input} #{hmm_name}_ni.fasta #{hmm_name}.nim"
86           run_command( cmd )
87           input = hmm_name + "_ni.fasta"
88         else
89           error "illegal name: " + input
90         end
91
92         Dir.mkdir( hmm_name )
93
94         puts
95         puts "b. hmmscan:"
96         cmd = "#{HMMSCAN} #{hmmscan_option} --domtblout #{hmm_name}/#{hmm_name}_hmmscan_#{e_for_hmmscan.to_s} -E #{e_for_hmmscan.to_s} #{PFAM}Pfam-A.hmm #{input}"
97         run_command( cmd )
98         puts
99
100         puts "c. hmmscan to simple domain table:"
101         cmd = "#{HSP} #{hmm_name}/#{hmm_name}_hmmscan_#{e_for_hmmscan.to_s} #{hmm_name}/#{hmm_name}_hmmscan_#{e_for_hmmscan.to_s}_domain_table"
102         run_command( cmd )
103         puts
104
105         puts "d. domain table to forester format:"
106         cmd = "#{D2F} -e=10 #{hmm_name}/#{hmm_name}_hmmscan_#{e_for_hmmscan.to_s}_domain_table #{input} #{hmm_name}/#{hmm_name}_hmmscan_#{e_for_hmmscan.to_s}.dff"
107         run_command( cmd )
108         puts
109
110         puts "e. dsx:"
111         cmd = "#{DSX} -d -e=1e-#{e_value_exp.to_s} -l=#{length} #{hmm_name} #{hmm_name}/#{hmm_name}_hmmscan_#{e_for_hmmscan.to_s} #{input} #{hmm_name}/#{hmm_name}__#{hmm_name}__ee#{e_value_exp.to_s}_#{length}"
112         run_command( cmd )
113         puts
114
115         if id_norm
116           FileUtils.mv "#{hmm_name}_ni.fasta", "#{hmm_name}/#{hmm_name}_ni.fasta"
117           FileUtils.mv "#{hmm_name}.nim", "#{hmm_name}/#{hmm_name}.nim"
118           FileUtils.cp orig_input, "#{hmm_name}/#{orig_input}"
119         end
120
121         Dir.mkdir( hmm_name + "/msa" )
122         Dir.mkdir( hmm_name + "/msa100" )
123
124         FileUtils.cp "#{hmm_name}/#{hmm_name}__#{hmm_name}__ee#{e_value_exp.to_s}_#{length}.fasta", "#{hmm_name}/msa/#{hmm_name}__#{hmm_name}__ee#{e_value_exp.to_s}_#{length}"
125         FileUtils.cp "#{hmm_name}/#{hmm_name}__#{hmm_name}__ee#{e_value_exp.to_s}_#{length}.fasta", "#{hmm_name}/msa100/#{hmm_name}__#{hmm_name}__ee#{e_value_exp.to_s}_#{length}"
126
127         if File.exists?( TEMPLATE_FILE )
128           FileUtils.cp TEMPLATE_FILE, "#{hmm_name}/msa/"
129           FileUtils.cp TEMPLATE_FILE, "#{hmm_name}/msa100/"
130
131           if LAUNCH_ANALYSIS
132             puts "f. analysis:"
133             Dir.chdir "#{hmm_name}/msa/"
134             run_command "#{PF} -b=1 -s"
135             Dir.chdir "../.."
136             Dir.chdir "#{hmm_name}/msa100/"
137             run_command "#{PF} -b=100 -s"
138             Dir.chdir "../.."
139             puts
140           end
141         end
142
143       end
144
145     end
146
147     def run_command cmd
148       puts cmd
149       `#{cmd}`
150     end
151
152     def get_base_name n
153       if n.downcase.end_with?( "_ni.fasta" )
154         n[ 0 .. n.length - 10 ]
155       elsif n.downcase.end_with?( ".fasta" )
156         n[ 0 .. n.length - 7 ]
157       else
158         error "illegal name: " + n
159       end
160     end
161
162     def error msg
163       puts
164       puts msg
165       puts
166       exit
167     end
168
169   end
170
171   p = RunPhyloPipeline.new()
172
173   p.run()
174
175 end
176
177
178