new tool
[jalview.git] / forester / ruby / evoruby / exe / run_phylo_pipeline_x.rb
1 #!/usr/local/bin/ruby -w
2 #
3 # = run_phylo_pipeline
4 #
5 # Copyright::  Copyright (C) 2010 Christian M. Zmasek
6 # License::    GNU Lesser General Public License (LGPL)
7 #
8 # $Id Exp $
9 #
10 #
11
12
13 module Evoruby
14
15   class RunPhyloPipeline
16
17     PFAM      = "/home/czmasek/DATA/PFAM/PFAM270X/"
18     HMMSCAN  = "/home/czmasek/SOFTWARE/HMMER/hmmer-3.0/src/hmmscan"
19     HSP       = "/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/ruby/evoruby/exe/hsp.rb"
20     D2F       = "/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/ruby/evoruby/exe/d2f.rb"
21     DSX       = "/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/ruby/evoruby/exe/dsx.rb"
22     TAP       = "/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/ruby/evoruby/exe/tap.rb"
23
24     def run
25       unless ARGV.length >= 4 && ARGV.length <= 6
26         error "arguments are:  <min-length> " +
27          "<neg E-value exponent for domain extraction> [E-value for hmmscan, default is 10] [hmmscan option, default is --nobias, --max for no heuristics]"
28       end
29       
30       length      = ARGV[ 0 ].to_i
31       e_value_exp = ARGV[ 1 ].to_i
32
33       e_for_hmmscan = 10
34       hmmscan_option = "--nobias"
35
36       if ARGV.length == 4
37         hmmscan_option = ARGV[ 3 ]
38       end
39       if ARGV.length == 3 || ARGV.length == 4
40         e_for_hmmscan = ARGV[ 2 ].to_i
41       end
42
43       if e_value_exp < 0
44         error "E-value exponent for domain extraction cannot be negative"
45       end
46       if length <= 1
47         error "length cannot be smaller than or equal to 1"
48       end
49       if e_for_hmmscan < 1
50         error "E-value for hmmscan cannot be smaller than 1"
51       end
52
53       input_files = Dir.entries(".").select { |f| !File.directory?( f ) && f.downcase.end_with?( ".fasta" ) }
54
55       puts "Input files:"
56       input_files.each do | input |
57         puts input
58       end
59       puts
60
61       counter = 0
62       input_files.each do | input |
63
64         puts counter.to_s + "/" +  input_files.size.to_s + " " + input + ": "
65         
66         hmm_name = ""
67
68         if input.downcase.end_with?( "_ni.fasta" )
69           hmm_name = input[ 0 .. input.length - 10 ]
70         elsif input.downcase.end_with?( ".fasta" )
71           hmm_name = input[ 0 .. input.length - 7 ]
72           puts
73           puts "a. identifier normalization:"
74           cmd = "#{TAP} #{input} #{hmm_name}_ni.fasta #{hmm_name}.nim"
75           run_command( cmd )
76           input = hmm_name + "_ni.fasta"
77         else
78           error "illegal name: " + input
79         end
80
81         puts
82         puts "b. hmmscan:"
83         cmd = "#{HMMSCAN} #{hmmscan_option} --domtblout #{hmm_name}_hmmscan_#{e_for_hmmscan.to_s} -E #{e_for_hmmscan.to_s} #{PFAM}Pfam-A.hmm #{input}"
84         run_command( cmd )
85         puts
86
87         puts "c. hmmscan to simple domain table:"
88         cmd = "#{HSP} #{hmm_name}_hmmscan_#{e_for_hmmscan.to_s} #{hmm_name}_hmmscan_#{e_for_hmmscan.to_s}_domain_table"
89         run_command( cmd )
90         puts
91
92         puts "d. domain table to forester format:"
93         cmd = "#{D2F} -e=10 #{hmm_name}_hmmscan_#{e_for_hmmscan.to_s}_domain_table #{input} #{hmm_name}_hmmscan_#{e_for_hmmscan.to_s}.dff"
94         run_command( cmd )
95         puts
96
97         puts "e. dsx:"
98         cmd = "#{DSX} -d -e=1e-#{e_value_exp.to_s} -l=#{length} #{hmm_name} #{hmm_name}_hmmscan_#{e_for_hmmscan.to_s} #{input} #{hmm_name}__#{hmm_name}__ee#{e_value_exp.to_s}_#{length}"
99         run_command( cmd )
100         puts
101
102       end
103
104     end
105
106     def run_command cmd
107       puts cmd
108       `#{cmd}`
109     end
110
111     def get_base_name n
112       if n.downcase.end_with?( "_ni.fasta" )
113         n[ 0 .. n.length - 10 ]
114       elsif n.downcase.end_with?( ".fasta" )
115         n[ 0 .. n.length - 7 ]
116       else
117         error "illegal name: " + n
118       end
119     end
120
121     def error msg
122       puts
123       puts msg
124       puts
125       exit
126     end
127
128   end
129
130   p = RunPhyloPipeline.new()
131
132   p.run()
133
134 end
135
136
137