in progress
[jalview.git] / forester / ruby / evoruby / lib / evo / apps / phylogeny_factory.rb
1 #
2 # = lib/evo/apps/phylogeny_factory - PhylogenyFactory class
3 #
4 # Copyright::  Copyright (C) 2006-2007 Christian M. Zmasek
5 # License::    GNU Lesser General Public License (LGPL)
6 #
7 # $Id: phylogeny_factory.rb,v 1.32 2010/12/13 19:00:11 cmzmasek Exp $
8
9 require 'lib/evo/util/constants'
10 require 'lib/evo/util/util'
11 require 'lib/evo/util/command_line_arguments'
12
13 require 'set'
14 require 'date'
15
16 module Evoruby
17
18     class PhylogenyFactory
19
20         PRG_NAME       = "phylogeny_factory"
21         PRG_DATE       = "2010.05.26"
22         PRG_DESC       = "automated phylogeny reconstruction using queing system"
23         PRG_VERSION    = "1.1"
24         COPYRIGHT      = "2010 Christian M Zmasek"
25         CONTACT        = "phylosoft@gmail.com"
26         WWW            = "www.phylosoft.org"
27
28         USE_JOB_SUBMISSION_SYSTEM_OPTION  = 's'
29         LOG_FILE                          = '00_phylogeny_factory.log'
30         TEMPLATE_FILE                     = '00_phylogeny_factory.template'
31         PBS_O_WORKDIR                     = '$PBS_O_WORKDIR/'
32         MIN_LENGTH_DEFAULT                = 40
33         WALLTIME                          = '100:00:00'
34         QUEUE                             = 'default'
35
36         TMP_CMD_FILE_SUFFIX = '_QSUB'
37
38         HMM                 = 'HMM'
39         RSL                 = 'RSL'
40
41         OPTION_OPEN          = '%['
42         OPTION_CLOSE          = ']%'
43
44         WAIT                 = 1.0
45
46         NL = Constants::LINE_DELIMITER
47
48         def run
49
50             Util.print_program_information( PRG_NAME,
51                 PRG_VERSION,
52                 PRG_DESC,
53                 PRG_DATE,
54                 COPYRIGHT,
55                 CONTACT,
56                 WWW,
57                 STDOUT )
58
59             begin
60                 cla = CommandLineArguments.new( ARGV )
61             rescue ArgumentError => e
62                 Util.fatal_error( PRG_NAME, "error: " + e.to_s )
63             end
64
65             allowed_opts = Array.new
66             allowed_opts.push( USE_JOB_SUBMISSION_SYSTEM_OPTION )
67
68             disallowed = cla.validate_allowed_options_as_str( allowed_opts )
69             if ( disallowed.length > 0 )
70                 Util.fatal_error( PRG_NAME,
71                     "unknown option(s): " + disallowed,
72                     STDOUT )
73             end
74
75             if File.exists?( LOG_FILE )
76                 puts( '[' + PRG_NAME + '] > log file [' + LOG_FILE + '] already exists' )
77                 exit( -1 )
78             end
79
80             if !File.exists?( TEMPLATE_FILE )
81                 puts( '[' + PRG_NAME + '] > template file [' + TEMPLATE_FILE + '] not found' )
82                 exit( -1 )
83             end
84
85             use_job_submission_system = false
86             if cla.is_option_set?( USE_JOB_SUBMISSION_SYSTEM_OPTION )
87                 use_job_submission_system = true
88             end
89
90             log = String.new
91
92             now = DateTime.now
93             log << "Program     : " + PRG_NAME + NL
94             log << "Version     : " + PRG_VERSION + NL
95             log << "Program date: " + PRG_DATE + NL + NL
96             log << "Date/time   : " + now.to_s + NL
97             log << "Directory   : " + Dir.getwd  + NL + NL
98
99             puts( '[' + PRG_NAME + '] > reading ' + TEMPLATE_FILE )
100
101             paths       = Hash.new  # path placeholder -> full path
102             min_lengths = Hash.new  # alignment id -> minimal length
103             hmms        = Hash.new  # alignment id -> hmm
104             options     = Hash.new  # option placeholder -> option
105             ids         = Set.new
106
107             commands    = Array.new
108
109             log <<  "////////////////////////////////////////////////////////////////// #{NL}"
110             log << "Template file [" + TEMPLATE_FILE + "]:#{NL}"
111
112             command = String.new
113
114             open( TEMPLATE_FILE ).each { | line |
115                 log << line
116                 if ( line =~ /^#/ )
117                 elsif ( line =~ /^\$\s*(\S+)\s*=\s*(\S+)/ )
118                     paths[ $1 ] = $2
119                     puts( '[' + PRG_NAME + '] > paths      : ' + $1 + ' => ' + $2 )
120
121                 elsif ( line =~ /^%\s*#{HMM}\s*(\S+)\s*=\s*(\S+)/ )
122                     hmms[ $1 ] = $2
123                     puts( '[' + PRG_NAME + '] > hmms       : ' + $1 + ' => ' + $2 )
124
125                 elsif ( line =~ /^%\s*#{RSL}\s*(\S+)\s*=\s*(\S+)/ )
126                     min_lengths[ $1 ] = $2
127                     puts( '[' + PRG_NAME + '] > min lengths: ' + $1 + ' => ' + $2 )
128
129                 elsif ( line =~ /^%\s*(\S+)\s*=\s*(\S+)/ )
130                     options[ $1 ] = $2
131                     puts( '[' + PRG_NAME + '] > options    : ' + $1 + ' => ' + $2 )
132
133                 elsif ( line =~ /^>\s*(.+)/ )
134                     command = command + $1 + ";#{NL}"
135
136                 elsif ( line =~ /^-/  )
137                     commands << prepare( command, paths )
138                     command = String.new
139                 end
140             }
141             log << "\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\ #{NL}#{NL}"
142
143             files = Dir.entries( "." )
144
145             files.each { | file |
146                 if ( !File.directory?( file ) &&
147                          file !~ /^\./ &&
148                          file !~ /#{TEMPLATE_FILE}/ &&
149                          file !~ /.bck$/ &&
150                          file !~ /.log$/ &&
151                          file !~ /nohup/ &&
152                          file !~ /^00/ )
153                     aln_name = file.to_str
154                     id = get_id( aln_name )
155                     if !ids.include?( id )
156                         ids.add( id )
157                     end
158                     puts( '[' + PRG_NAME + '] > file [id]  : ' + aln_name + ' [' + id + ']' )
159                     commands.each do | cmd |
160
161                         cmd = subst_hmm( cmd, aln_name, hmms )
162                         cmd = subst_min_length( cmd, aln_name, min_lengths )
163                         cmd = subst_options( cmd, options )
164                         if use_job_submission_system
165                             cmd = subst_aln_name( cmd, PBS_O_WORKDIR + aln_name )
166                         else
167                             cmd = subst_aln_name( cmd, aln_name )
168                         end
169
170                         if ( cmd =~ /%/ )
171                             cmd =~ /(%.*?%)/
172                             problem = $1
173                             puts( '[' + PRG_NAME + '] > WARNING    : [' + id + '] command still contains placeholder: ' + problem )
174                             log << "WARNING: command still contains placeholder: " + cmd + NL
175                         else
176                             tmp_cmd_file = file.to_str[ 0..4 ] + TMP_CMD_FILE_SUFFIX
177                             if ( File.exists?( tmp_cmd_file ) )
178                                 File.delete( tmp_cmd_file )
179                             end
180                             if use_job_submission_system
181                                 open( tmp_cmd_file, 'w' ) do |f|
182                                     f.write( cmd )
183                                 end
184                             end
185
186                             log << cmd + NL
187
188                             if use_job_submission_system
189                                 IO.popen( 'qsub -q ' + QUEUE  + ' -l walltime=' + WALLTIME + ' ' + tmp_cmd_file , 'r+' ) do | pipe |
190                                     pipe.close_write
191                                 end
192                             else
193                                 spawn( 'nohup ' + cmd + ' &', STDERR => "/dev/null" )
194                             end
195
196                             sleep( WAIT )
197                             if ( File.exists?( tmp_cmd_file ) )
198                                 File.delete( tmp_cmd_file )
199                             end
200                         end
201                     end
202                 end
203             }
204
205             open( LOG_FILE, 'w' ) do | f |
206                 f.write( log )
207             end
208
209             puts()
210             puts( '[' + PRG_NAME + '] > OK' )
211             puts()
212
213         end # def run
214
215         def prepare( command, paths )
216             paths.each_pair{ | name, full |
217                 command = command.gsub( name, full )
218             }
219             command
220         end
221
222         def subst_options( command, options )
223             opt_placeholders = command.scan( /%\[\S+\]%/ )
224             opt_placeholders.each { | opt_placeholder |
225                 opt_placeholder = opt_placeholder.gsub( OPTION_OPEN , '' )
226                 opt_placeholder = opt_placeholder.gsub( OPTION_CLOSE, '' )
227                 opt_value = options[ opt_placeholder ]
228                 if ( opt_value != nil && opt_value.size > 0 )
229                     command = command.gsub( OPTION_OPEN + opt_placeholder + OPTION_CLOSE, opt_value )
230                 end
231             }
232             command
233         end
234
235         def subst_aln_name( command, aln_name )
236             command = command.gsub( '$', aln_name )
237             command
238         end
239
240         def subst_hmm( command, aln_name, hmms )
241             id = get_id( aln_name )
242             hmm = hmms[ id ]
243             if ( hmm != nil && hmm.size > 0 )
244                 command = command.gsub( OPTION_OPEN + HMM + OPTION_CLOSE, hmm )
245             end
246             command
247         end
248
249         def subst_min_length( command, aln_name, min_lengths )
250             id = get_id( aln_name )
251             min_length = min_lengths[ id ]
252             if ( min_length != nil && min_length.size > 0 )
253                 command = command.gsub( OPTION_OPEN + RSL + OPTION_CLOSE, min_length )
254             else
255                 command = command.gsub( OPTION_OPEN + RSL + OPTION_CLOSE, MIN_LENGTH_DEFAULT.to_s )
256             end
257             command
258         end
259
260         def get_id( aln_name )
261             aln_name =~ /^([^_]+)/
262             $1
263         end
264
265     end # class PhylogenyFactory
266
267 end # module Evoruby