new tool
[jalview.git] / forester / ruby / evoruby / exe / run_phylo_pipeline_x.rb
1 #!/usr/local/bin/ruby -w
2 #
3 # = run_phylo_pipeline
4 #
5 # Copyright::  Copyright (C) 2010 Christian M. Zmasek
6 # License::    GNU Lesser General Public License (LGPL)
7 #
8 # $Id Exp $
9 #
10 #
11
12
13 module Evoruby
14
15   class RunPhyloPipeline
16
17     PFAM      = "/home/czmasek/DATA/PFAM/PFAM270X/"
18     HMMSCAN  = "/home/czmasek/SOFTWARE/HMMER/hmmer-3.0/src/hmmscan"
19     HSP       = "/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/ruby/evoruby/exe/hsp.rb"
20     D2F       = "/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/ruby/evoruby/exe/d2f.rb"
21     DSX       = "/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/ruby/evoruby/exe/dsx.rb"
22     TAP       = "/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/ruby/evoruby/exe/tap.rb"
23
24     def run
25       unless ARGV.length >= 4 && ARGV.length <= 6
26         error "arguments are:  <min-length> " +
27          "<neg E-value exponent for domain extraction> [E-value for hmmscan, default is 10] [hmmscan option, default is --nobias, --max for no heuristics]"
28       end
29
30
31
32       length      = ARGV[ 0 ].to_i
33       e_value_exp = ARGV[ 1 ].to_i
34
35       e_for_hmmscan = 10
36       hmmscan_option = "--nobias"
37
38       if ARGV.length == 4
39         hmmscan_option = ARGV[ 3 ]
40       end
41       if ARGV.length == 3 || ARGV.length == 4
42         e_for_hmmscan = ARGV[ 2 ].to_i
43       end
44
45       if e_value_exp < 0
46         error "E-value exponent for domain extraction cannot be negative"
47       end
48       if length <= 1
49         error "length cannot be smaller than or equal to 1"
50       end
51       if e_for_hmmscan < 1
52         error "E-value for hmmscan cannot be smaller than 1"
53       end
54
55       input_files = Dir.entries(".").select { |f| !File.directory?( f ) && f.downcase.end_with?( ".fasta" ) }
56
57       puts "Input files:"
58       input_files.each do | input |
59         puts input
60       end
61       puts
62
63       input_files.each do | input |
64
65         hmm_name = ""
66
67         if input.downcase.end_with?( "_ni.fasta" )
68           hmm_name = input[ 0 .. input.length - 10 ]
69         elsif input.downcase.end_with?( ".fasta" )
70           hmm_name = input[ 0 .. input.length - 7 ]
71           puts
72           puts "0. identifier normalization:"
73           cmd = "#{TAP} #{input}"
74           run_command( cmd )
75           puts
76         else
77           error "illegal name: " + input
78         end
79
80         puts
81         puts "1. hmmscan:"
82         cmd = "#{HMMSCAN} #{hmmscan_option} --domtblout #{hmm_name}_hmmscan_#{e_for_hmmscan.to_s} -E #{e_for_hmmscan.to_s} #{PFAM}Pfam-A.hmm #{input}"
83         run_command( cmd )
84         puts
85
86         puts "2. hmmscan to simple domain table:"
87         cmd = "#{HSP} #{hmm_name}_hmmscan_#{e_for_hmmscan.to_s} #{hmm_name}_hmmscan_#{e_for_hmmscan.to_s}_domain_table"
88         run_command( cmd )
89         puts
90
91         puts "3. domain table to forester format:"
92         cmd = "#{D2F} -e=10 #{hmm_name}_hmmscan_#{e_for_hmmscan.to_s}_domain_table #{input} #{hmm_name}_hmmscan_#{e_for_hmmscan.to_s}.dff"
93         run_command( cmd )
94         puts
95
96         puts "4. dsx:"
97         cmd = "#{DSX} -d -e=1e-#{e_value_exp.to_s} -l=#{length} #{hmm_name} #{hmm_name}_hmmscan_#{e_for_hmmscan.to_s} #{input} #{hmm_name}__#{hmm_name}__ee#{e_value_exp.to_s}_#{length}"
98         run_command( cmd )
99         puts
100
101       end
102
103     end
104
105     def run_command cmd
106       puts cmd
107       `#{cmd}`
108     end
109
110     def get_base_name n
111       if n.downcase.end_with?( "_ni.fasta" )
112         n[ 0 .. n.length - 10 ]
113       elsif n.downcase.end_with?( ".fasta" )
114         n[ 0 .. n.length - 7 ]
115       else
116         error "illegal name: " + n
117       end
118     end
119
120     def error msg
121       puts
122       puts msg
123       puts
124       exit
125     end
126
127   end
128
129   p = RunPhyloPipeline.new()
130
131   p.run()
132
133 end
134
135
136