in progress
[jalview.git] / forester / ruby / evoruby / exe / run_phylo_pipeline.rb
1 #!/usr/local/bin/ruby -w
2 #
3 # = run_phylo_pipeline
4 #
5 # Copyright::  Copyright (C) 2010 Christian M. Zmasek
6 # License::    GNU Lesser General Public License (LGPL)
7 #
8 # $Id: run_phylo_pipeline.rb,v 1.15 2010/10/09 02:35:42 cmzmasek Exp $
9 #
10 #
11
12
13 #  hmmscan --nobias --domtblout <BACTH_CHIPI>_hmmscan_250_10 -E 10 /home/czmasek/DATA/PFAM/PFAM250/Pfam-A.hmm <BACTH_CHIPI>.fasta
14
15 #  hsp <BACTH_CHIPI>_hmmscan_250_10 <BACTH_CHIPI>_hmmscan_250_10_domain_table
16
17 #  d2f -e=10 <BACTH_CHIPI>_hmmscan_250_10_domain_table <BACTH_CHIPI>.fasta <BACTH_CHIPI>_hmmscan_250_10.dff
18
19 # hmmsearch --nobias -E 1000 --domtblout <BACTH_CHIPI>.hmmsearch_SusD  <~/DATA/PFAM/PFAM250/PFAM_A_HMMs/SusD.hmm> BACTH_CHIPI.fasta
20
21 # dsx -dd -e=<1e-2> -l=<200> <BACTH_CHIPI>.hmmsearch_SusD <BACTH_CHIPI>.fasta BACTH_CHIPI_e2_200
22
23
24 module Evoruby
25
26   class RunPhyloPipeline
27
28     def run
29       unless ARGV.length == 4
30         puts "arguments are: [inputfile].fasta [hmm-name] [min-length] [neg e-value exponent]"
31         exit
32       end
33
34       pfam      = "/home/czmasek/DATA/PFAM/PFAM260/"
35       hmmscan   = "/home/czmasek/SOFTWARE/HMMER/hmmer-3.0/src/hmmscan"
36       hmmsearch = "/home/czmasek/SOFTWARE/HMMER/hmmer-3.0/src/hmmsearch"
37       hsp       = "/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/ruby/evoruby/exe/hsp.rb"
38       d2f       = "/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/ruby/evoruby/exe/d2f.rb"
39       dsx       = "/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/ruby/evoruby/exe/dsx.rb"
40
41       base_name   = ARGV[ 0 ]
42       hmm         = ARGV[ 1 ]
43       length      = ARGV[ 2 ]
44       e_value_exp = ARGV[ 3 ]
45       do_domain_combination_analysis = true
46
47       if do_domain_combination_analysis
48
49         cmd = "#{hmmscan} --nobias --domtblout #{base_name}_hmmscan_10 -E 10 #{pfam}Pfam-A.hmm #{base_name}.fasta"
50         run_command( cmd )
51
52         cmd = "#{hsp} #{base_name}_hmmscan_10 #{base_name}_hmmscan_10_domain_table"
53         run_command( cmd )
54
55         cmd = "#{d2f} -e=10 #{base_name}_hmmscan_10_domain_table #{base_name}.fasta #{base_name}_hmmscan_10.dff"
56         run_command( cmd )
57
58       end
59
60       cmd = "#{hmmsearch} --nobias -E 1000 --domtblout #{base_name}.hmmsearch_#{hmm}  #{pfam}PFAM_A_HMMs/#{hmm}.hmm #{base_name}.fasta"
61       run_command( cmd )
62
63       cmd = "#{dsx} -dd -e=1e-#{e_value_exp.to_s} -l=#{length} #{base_name}.hmmsearch_#{hmm} #{base_name}.fasta #{base_name}_#{hmm}_e#{e_value_exp.to_s}_#{length}"
64       run_command( cmd )
65
66     end
67
68     def run_command( cmd )
69       puts cmd
70       `#{cmd}`
71     end
72
73   end
74
75   p = RunPhyloPipeline.new()
76
77   p.run()
78
79 end
80
81
82