e1445525d9e3fd4b6928ecc4a2ab68edfb4dbf8c
[jalview.git] / forester / ruby / evoruby / exe / run_phylo_pipeline.rb
1 #!/usr/local/bin/ruby -w
2 #
3 # = run_phylo_pipeline
4 #
5 # Copyright::  Copyright (C) 2010 Christian M. Zmasek
6 # License::    GNU Lesser General Public License (LGPL)
7 #
8 # $Id Exp $
9 #
10 #
11
12
13 module Evoruby
14
15   class RunPhyloPipeline
16
17     PFAM      = "/home/czmasek/DATA/PFAM/PFAM260X/"
18
19
20
21     def run
22       unless ARGV.length == 4
23         error "arguments are: <fasta formatted inputfile> <hmm-name> <min-length> <neg e-value exponent>"
24       end
25
26       hmmscan   = "/home/czmasek/SOFTWARE/HMMER/hmmer-3.0/src/hmmscan"
27       hmmsearch = "/home/czmasek/SOFTWARE/HMMER/hmmer-3.0/src/hmmsearch"
28       hsp       = "/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/ruby/evoruby/exe/hsp.rb"
29       d2f       = "/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/ruby/evoruby/exe/d2f.rb"
30       dsx       = "/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/ruby/evoruby/exe/dsx.rb"
31
32       input       = ARGV[ 0 ]
33       hmm         = ARGV[ 1 ]
34       length      = ARGV[ 2 ]
35       e_value_exp = ARGV[ 3 ]
36       do_domain_combination_analysis = true
37
38       if e_value_exp < 0
39         error "e-value exponent cannot be negative"
40       end
41       if length <= 1
42         error "length exponent cannot be smaller than or equal to 1"
43       end
44
45       base_name = nil
46       if input.downcase.end_with?( ".fasta" )
47         base_name = input[ 0 .. input.length - 7 ]
48       elsif input.downcase.end_with?( ".fsa" )
49         base_name = input[ 0 .. input.length - 5 ]
50       else
51          base_name = input
52       end
53
54       if do_domain_combination_analysis
55
56         puts "hmmscan:"
57         cmd = "#{hmmscan} --nobias --domtblout #{base_name}_hmmscan_10 -E 10 #{PFAM}Pfam-A.hmm #{input}"
58         run_command( cmd )
59         puts
60
61         puts "hmmscan to simple domain table:"
62         cmd = "#{hsp} #{base_name}_hmmscan_10 #{base_name}_hmmscan_10_domain_table"
63         run_command( cmd )
64         puts
65
66         puts "domain table to forester format:"
67         cmd = "#{d2f} -e=10 #{base_name}_hmmscan_10_domain_table #{input} #{base_name}_hmmscan_10.dff"
68         run_command( cmd )
69         puts
70
71       end
72
73       puts "hmmsearch:"
74       cmd = "#{hmmsearch} --nobias -E 1000 --domtblout #{base_name}.hmmsearch_#{hmm}  #{PFAM}PFAM_A_HMMs/#{hmm}.hmm #{input}"
75       run_command( cmd )
76       puts
77
78       puts "dsx:"
79       cmd = "#{dsx} -d -e=1e-#{e_value_exp.to_s} -l=#{length} #{hmm} #{base_name}.hmmsearch_#{hmm} #{input} #{base_name}_#{hmm}_e#{e_value_exp.to_s}_#{length}"
80       run_command( cmd )
81       puts
82
83     end
84
85     def run_command( cmd )
86       puts cmd
87       `#{cmd}`
88     end
89
90     def error msg
91       puts
92       puts msg
93       puts
94       exit
95     end
96
97   end
98
99   p = RunPhyloPipeline.new()
100
101   p.run()
102
103 end
104
105
106