inprogress
[jalview.git] / forester / ruby / evoruby / exe / run_phylo_pipeline.rb
1 #!/usr/local/bin/ruby -w
2 #
3 # = run_phylo_pipeline
4 #
5 # Copyright::  Copyright (C) 2010 Christian M. Zmasek
6 # License::    GNU Lesser General Public License (LGPL)
7 #
8 # $Id Exp $
9 #
10 #
11
12
13 module Evoruby
14
15   class RunPhyloPipeline
16
17     PFAM      = "/home/czmasek/DATA/PFAM/PFAM270X/"
18     HMMSCAN  = "/home/czmasek/SOFTWARE/HMMER/hmmer-3.0/src/hmmscan"
19     HSP       = "/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/ruby/evoruby/exe/hsp.rb"
20     D2F       = "/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/ruby/evoruby/exe/d2f.rb"
21     DSX       = "/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/ruby/evoruby/exe/dsx.rb"
22
23     def run
24       unless ARGV.length >= 4 && ARGV.length <= 6
25         error "arguments are: <fasta formatted inputfile> <hmm-name> <min-length> " +
26          "<neg E-value exponent for domain extraction> [E-value for hmmscan, default is 20] [hmmscan option, default is --nobias, --max for no heuristics]"
27       end
28
29       input       = ARGV[ 0 ]
30       hmm         = ARGV[ 1 ]
31       length      = ARGV[ 2 ].to_i
32       e_value_exp = ARGV[ 3 ].to_i
33
34       e_for_hmmscan = 20
35       hmmscan_option = "--nobias"
36
37       if ARGV.length == 6
38         hmmscan_option = ARGV[ 5 ]
39       end
40       if ARGV.length == 5 || ARGV.length == 6
41         e_for_hmmscan = ARGV[ 4 ].to_i
42       end
43
44       if e_value_exp < 0
45         error "E-value exponent for domain extraction cannot be negative"
46       end
47       if length <= 1
48         error "length cannot be smaller than or equal to 1"
49       end
50       if e_for_hmmscan < 1
51         error "E-value for hmmscan cannot be smaller than 1"
52       end
53
54       base_name = get_base_name input
55
56       puts
57       puts "1. hmmscan:"
58       cmd = "#{HMMSCAN} #{hmmscan_option} --domtblout #{base_name}_hmmscan_#{e_for_hmmscan.to_s} -E #{e_for_hmmscan.to_s} #{PFAM}Pfam-A.hmm #{input}"
59       run_command( cmd )
60       puts
61
62       puts "2. hmmscan to simple domain table:"
63       cmd = "#{HSP} #{base_name}_hmmscan_#{e_for_hmmscan.to_s} #{base_name}_hmmscan_#{e_for_hmmscan.to_s}_domain_table"
64       run_command( cmd )
65       puts
66
67       puts "3. domain table to forester format:"
68       cmd = "#{D2F} -e=10 #{base_name}_hmmscan_#{e_for_hmmscan.to_s}_domain_table #{input} #{base_name}_hmmscan_#{e_for_hmmscan.to_s}.dff"
69       run_command( cmd )
70       puts
71
72       puts "4. dsx:"
73       cmd = "#{DSX} -d -e=1e-#{e_value_exp.to_s} -l=#{length} #{hmm} #{base_name}_hmmscan_#{e_for_hmmscan.to_s} #{input} #{base_name}__#{hmm}__ee#{e_value_exp.to_s}_#{length}"
74       run_command( cmd )
75       puts
76
77     end
78
79     def run_command cmd
80       puts cmd
81       `#{cmd}`
82     end
83
84     def get_base_name n
85       if n.downcase.end_with?( "_ni.fasta" )
86         n[ 0 .. n.length - 10 ]
87       elsif n.downcase.end_with?( ".fasta" )
88         n[ 0 .. n.length - 7 ]
89       elsif n.downcase.end_with?( "_ni.fsa" )
90         n[ 0 .. n.length - 8 ]
91       elsif n.downcase.end_with?( ".fsa" )
92         n[ 0 .. n.length - 5 ]
93       else
94         n
95       end
96     end
97
98     def error msg
99       puts
100       puts msg
101       puts
102       exit
103     end
104
105   end
106
107   p = RunPhyloPipeline.new()
108
109   p.run()
110
111 end
112
113
114