a5f8304003e0b4b056e716f6c08c41d7bfea5da5
[jalview.git] / forester / ruby / evoruby / exe / run_phylo_pipeline.rb
1 #!/usr/local/bin/ruby -w
2 #
3 # = run_phylo_pipeline
4 #
5 # Copyright::  Copyright (C) 2010 Christian M. Zmasek
6 # License::    GNU Lesser General Public License (LGPL)
7 #
8 # $Id Exp $
9 #
10 #
11
12
13 module Evoruby
14
15   class RunPhyloPipeline
16
17     PFAM      = "/home/czmasek/DATA/PFAM/PFAM260X/"
18     HMMSCAN  = "/home/czmasek/SOFTWARE/HMMER/hmmer-3.0/src/hmmscan"
19     HSP       = "/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/ruby/evoruby/exe/hsp.rb"
20     D2F       = "/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/ruby/evoruby/exe/d2f.rb"
21     DSX       = "/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/ruby/evoruby/exe/dsx.rb"
22
23
24     def run
25       unless ARGV.length == 4
26         error "arguments are: <fasta formatted inputfile> <hmm-name> <min-length> <neg e-value exponent>"
27       end
28
29
30
31
32       input       = ARGV[ 0 ]
33       hmm         = ARGV[ 1 ]
34       length      = ARGV[ 2 ].to_i
35       e_value_exp = ARGV[ 3 ].to_i
36
37       hmmscan_option = "--nobias"
38       e_for_hmmscan = 100
39
40       if e_value_exp < 0
41         error "e-value exponent cannot be negative"
42       end
43       if length <= 1
44         error "length exponent cannot be smaller than or equal to 1"
45       end
46
47       base_name = nil
48       if input.downcase.end_with?( ".fasta" )
49         base_name = input[ 0 .. input.length - 7 ]
50       elsif input.downcase.end_with?( ".fsa" )
51         base_name = input[ 0 .. input.length - 5 ]
52       else
53         base_name = input
54       end
55
56       puts "1. hmmscan:"
57       cmd = "#{HMMSCAN} #{hmmscan_option} --domtblout #{base_name}_hmmscan_#{e_for_hmmscan.to_s} -E #{e_for_hmmscan.to_s} #{PFAM}Pfam-A.hmm #{input}"
58       run_command( cmd )
59       puts
60
61       puts "2. hmmscan to simple domain table:"
62       cmd = "#{HSP} #{base_name}_hmmscan_#{e_for_hmmscan.to_s} #{base_name}_hmmscan_#{e_for_hmmscan.to_s}_domain_table"
63       run_command( cmd )
64       puts
65
66       puts "3. domain table to forester format:"
67       cmd = "#{D2F} -e=10 #{base_name}_hmmscan_#{e_for_hmmscan.to_s}_domain_table #{input} #{base_name}_hmmscan_#{e_for_hmmscan.to_s}.dff"
68       run_command( cmd )
69       puts
70
71       puts "4. dsx:"
72       cmd = "#{DSX} -d -e=1e-#{e_value_exp.to_s} -l=#{length} #{hmm} #{base_name}.hmmscan_#{e_for_hmmscan.to_s} #{input} #{base_name}_#{hmm}_e#{e_value_exp.to_s}_#{length}"
73       run_command( cmd )
74       puts
75
76     end
77
78     def run_command( cmd )
79       puts cmd
80       `#{cmd}`
81     end
82
83     def error msg
84       puts
85       puts msg
86       puts
87       exit
88     end
89
90   end
91
92   p = RunPhyloPipeline.new()
93
94   p.run()
95
96 end
97
98
99