in progress
[jalview.git] / forester / ruby / evoruby / exe / run_phylo_pipeline.rb
1 #!/usr/local/bin/ruby -w
2 #
3 # = run_phylo_pipeline
4 #
5 # Copyright::  Copyright (C) 2010 Christian M. Zmasek
6 # License::    GNU Lesser General Public License (LGPL)
7 #
8 # $Id Exp $
9 #
10 #
11
12
13 module Evoruby
14
15   class RunPhyloPipeline
16
17     PFAM      = "/home/czmasek/DATA/PFAM/PFAM260X/"
18     HMMSCAN  = "/home/czmasek/SOFTWARE/HMMER/hmmer-3.0/src/hmmscan"
19     HSP       = "/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/ruby/evoruby/exe/hsp.rb"
20     D2F       = "/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/ruby/evoruby/exe/d2f.rb"
21     DSX       = "/home/czmasek/SOFTWARE/FORESTER/DEV/forester/forester/ruby/evoruby/exe/dsx.rb"
22
23
24     def run
25       unless ARGV.length >= 4 && ARGV.length <= 6
26         error "arguments are: <fasta formatted inputfile> <hmm-name> <min-length> +
27         <neg E-value exponent for domain extraction> [E-value for hmmscan, default is 20] [hmmscan option, default is --nobias]"
28       end
29
30       input       = ARGV[ 0 ]
31       hmm         = ARGV[ 1 ]
32       length      = ARGV[ 2 ].to_i
33       e_value_exp = ARGV[ 3 ].to_i
34
35       e_for_hmmscan = 20
36       hmmscan_option = "--nobias"
37
38       if ARGV.length == 6
39         hmmscan_option = ARGV[ 5 ]
40       end
41       if ARGV.length == 5 || ARGV.length == 6
42         e_for_hmmscan = ARGV[ 4 ].to_i
43       end
44
45       if e_value_exp < 0
46         error "E-value exponent for domain extraction cannot be negative"
47       end
48       if length <= 1
49         error "length cannot be smaller than or equal to 1"
50       end
51       if e_for_hmmscan < 1
52         error "E-value for hmmscan cannot be smaller than 1"
53       end
54
55       base_name = get_base_name input
56
57       #base_name = nil
58       #if input.downcase.end_with?( "_ni.fasta" )
59       #  base_name = input[ 0 .. input.length - 10 ]
60       #elsif input.downcase.end_with?( ".fasta" )
61       #  base_name = input[ 0 .. input.length - 7 ]
62       #elsif input.downcase.end_with?( "_ni.fsa" )
63       #  base_name = input[ 0 .. input.length - 8 ]
64       #elsif input.downcase.end_with?( ".fsa" )
65       #  base_name = input[ 0 .. input.length - 5 ]
66       #else
67       #  base_name = input
68       #end
69
70       puts "1. hmmscan:"
71       cmd = "#{HMMSCAN} #{hmmscan_option} --domtblout #{base_name}_hmmscan_#{e_for_hmmscan.to_s} -E #{e_for_hmmscan.to_s} #{PFAM}Pfam-A.hmm #{input}"
72       run_command( cmd )
73       puts
74
75       puts "2. hmmscan to simple domain table:"
76       cmd = "#{HSP} #{base_name}_hmmscan_#{e_for_hmmscan.to_s} #{base_name}_hmmscan_#{e_for_hmmscan.to_s}_domain_table"
77       run_command( cmd )
78       puts
79
80       puts "3. domain table to forester format:"
81       cmd = "#{D2F} -e=10 #{base_name}_hmmscan_#{e_for_hmmscan.to_s}_domain_table #{input} #{base_name}_hmmscan_#{e_for_hmmscan.to_s}.dff"
82       run_command( cmd )
83       puts
84
85       puts "4. dsx:"
86       cmd = "#{DSX} -d -e=1e-#{e_value_exp.to_s} -l=#{length} #{hmm} #{base_name}_hmmscan_#{e_for_hmmscan.to_s} #{input} #{base_name}_#{hmm}_ee#{e_value_exp.to_s}_#{length}"
87       run_command( cmd )
88       puts
89
90     end
91
92     def run_command cmd
93       puts cmd
94       `#{cmd}`
95     end
96
97     def get_base_name n
98       if n.downcase.end_with?( "_ni.fasta" )
99         n[ 0 .. n.length - 10 ]
100       elsif n.downcase.end_with?( ".fasta" )
101         n[ 0 .. n.length - 7 ]
102       elsif n.downcase.end_with?( "_ni.fsa" )
103         n[ 0 .. n.length - 8 ]
104       elsif n.downcase.end_with?( ".fsa" )
105         n[ 0 .. n.length - 5 ]
106       else
107         n
108       end
109     end
110
111     def error msg
112       puts
113       puts msg
114       puts
115       exit
116     end
117
118   end
119
120   p = RunPhyloPipeline.new()
121
122   p.run()
123
124 end
125
126
127