inprogress
[jalview.git] / forester / ruby / evoruby / exe / run_phylo_pipeline_x.rb
1 #!/usr/local/bin/ruby -w
2 #
3 # = run_phylo_pipeline
4 #
5 # Copyright::  Copyright (C) 2010 Christian M. Zmasek
6 # License::    GNU Lesser General Public License (LGPL)
7 #
8 # $Id Exp $
9 #
10 #
11
12 require 'fileutils'
13
14 module Evoruby
15
16   class RunPhyloPipeline
17
18     PFAM      = "/home/czmasek/DATA/PFAM/PFAM270X/"
19     HMMSCAN  = "/home/czmasek/SOFTWARE/HMMER/hmmer-3.0/src/hmmscan"
20     HSP       = "/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/ruby/evoruby/exe/hsp.rb"
21     D2F       = "/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/ruby/evoruby/exe/d2f.rb"
22     DSX       = "/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/ruby/evoruby/exe/dsx.rb"
23     TAP       = "/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/ruby/evoruby/exe/tap.rb"
24
25     def run
26       unless ARGV.length >= 2 && ARGV.length <= 4
27         error "arguments are:  <min-length> " +
28          "<neg E-value exponent for domain extraction> [E-value for hmmscan, default is 10] [hmmscan option, default is --nobias, --max for no heuristics]"
29       end
30
31       length      = ARGV[ 0 ].to_i
32       e_value_exp = ARGV[ 1 ].to_i
33
34       e_for_hmmscan = 10
35       hmmscan_option = "--nobias"
36
37       if ARGV.length == 4
38         hmmscan_option = ARGV[ 3 ]
39       end
40       if ARGV.length == 3 || ARGV.length == 4
41         e_for_hmmscan = ARGV[ 2 ].to_i
42       end
43
44       if e_value_exp < 0
45         error "E-value exponent for domain extraction cannot be negative"
46       end
47       if length <= 1
48         error "length cannot be smaller than or equal to 1"
49       end
50       if e_for_hmmscan < 1
51         error "E-value for hmmscan cannot be smaller than 1"
52       end
53
54       input_files = Dir.entries(".").select { |f| !File.directory?( f ) && f.downcase.end_with?( ".fasta" ) }
55
56       puts "Input files:"
57       input_files.each do | input |
58         puts input
59       end
60       puts
61
62       counter = 1
63       input_files.each do | input |
64
65         puts counter.to_s + "/" +  input_files.size.to_s + " " + input + ": "
66
67         counter += 1
68
69         hmm_name = ""
70         id_norm = false
71         orig_input = input
72
73         if input.downcase.end_with?( "_ni.fasta" )
74           hmm_name = input[ 0 .. input.length - 10 ]
75         elsif input.downcase.end_with?( ".fasta" )
76           id_norm = true
77           hmm_name = input[ 0 .. input.length - 7 ]
78           puts
79           puts "a. identifier normalization:"
80           cmd = "#{TAP} #{input} #{hmm_name}_ni.fasta #{hmm_name}.nim"
81           run_command( cmd )
82           input = hmm_name + "_ni.fasta"
83         else
84           error "illegal name: " + input
85         end
86
87         Dir.mkdir( hmm_name )
88
89         puts
90         puts "b. hmmscan:"
91         cmd = "#{HMMSCAN} #{hmmscan_option} --domtblout #{hmm_name}/#{hmm_name}_hmmscan_#{e_for_hmmscan.to_s} -E #{e_for_hmmscan.to_s} #{PFAM}Pfam-A.hmm #{input}"
92         run_command( cmd )
93         puts
94
95         puts "c. hmmscan to simple domain table:"
96         cmd = "#{HSP} #{hmm_name}/#{hmm_name}_hmmscan_#{e_for_hmmscan.to_s} #{hmm_name}/#{hmm_name}_hmmscan_#{e_for_hmmscan.to_s}_domain_table"
97         run_command( cmd )
98         puts
99
100         puts "d. domain table to forester format:"
101         cmd = "#{D2F} -e=10 #{hmm_name}/#{hmm_name}_hmmscan_#{e_for_hmmscan.to_s}_domain_table #{input} #{hmm_name}/#{hmm_name}_hmmscan_#{e_for_hmmscan.to_s}.dff"
102         run_command( cmd )
103         puts
104
105         puts "e. dsx:"
106         cmd = "#{DSX} -d -e=1e-#{e_value_exp.to_s} -l=#{length} #{hmm_name} #{hmm_name}/#{hmm_name}_hmmscan_#{e_for_hmmscan.to_s} #{input} #{hmm_name}/#{hmm_name}__#{hmm_name}__ee#{e_value_exp.to_s}_#{length}"
107         run_command( cmd )
108         puts
109
110         if id_norm
111           FileUtils.mv "#{hmm_name}_ni.fasta", "#{hmm_name}/#{hmm_name}_ni.fasta"
112           FileUtils.mv "#{hmm_name}.nim", "#{hmm_name}/#{hmm_name}.nim"
113           FileUtils.cp orig_input, "#{hmm_name}/#{orig_input}"
114         end
115
116         Dir.mkdir( hmm_name + "/msa" )
117
118         FileUtils.cp "#{hmm_name}/#{hmm_name}__#{hmm_name}__ee#{e_value_exp.to_s}_#{length}.fasta", "#{hmm_name}/msa/#{hmm_name}__#{hmm_name}__ee#{e_value_exp.to_s}_#{length}"
119
120       end
121
122     end
123
124     def run_command cmd
125       puts cmd
126       `#{cmd}`
127     end
128
129     def get_base_name n
130       if n.downcase.end_with?( "_ni.fasta" )
131         n[ 0 .. n.length - 10 ]
132       elsif n.downcase.end_with?( ".fasta" )
133         n[ 0 .. n.length - 7 ]
134       else
135         error "illegal name: " + n
136       end
137     end
138
139     def error msg
140       puts
141       puts msg
142       puts
143       exit
144     end
145
146   end
147
148   p = RunPhyloPipeline.new()
149
150   p.run()
151
152 end
153
154
155