2 # = lib/evo/apps/phylogeny_factory - PhylogenyFactory class
4 # Copyright:: Copyright (C) 2006-2007 Christian M. Zmasek
5 # License:: GNU Lesser General Public License (LGPL)
7 # $Id: phylogeny_factory.rb,v 1.32 2010/12/13 19:00:11 cmzmasek Exp $
9 require 'lib/evo/util/constants'
10 require 'lib/evo/util/util'
11 require 'lib/evo/util/command_line_arguments'
18 class PhylogenyFactory
20 PRG_NAME = "phylogeny_factory"
21 PRG_DATE = "2010.05.26"
22 PRG_DESC = "automated phylogeny reconstruction using queing system"
24 COPYRIGHT = "2010 Christian M Zmasek"
25 CONTACT = "phylosoft@gmail.com"
26 WWW = "www.phylosoft.org"
28 USE_JOB_SUBMISSION_SYSTEM_OPTION = 's'
29 LOG_FILE = '00_phylogeny_factory.log'
30 TEMPLATE_FILE = '00_phylogeny_factory.template'
31 PBS_O_WORKDIR = '$PBS_O_WORKDIR/'
32 MIN_LENGTH_DEFAULT = 40
33 WALLTIME = '100:00:00'
36 TMP_CMD_FILE_SUFFIX = '_QSUB'
46 NL = Constants::LINE_DELIMITER
50 Util.print_program_information( PRG_NAME,
60 cla = CommandLineArguments.new( ARGV )
61 rescue ArgumentError => e
62 Util.fatal_error( PRG_NAME, "error: " + e.to_s )
65 allowed_opts = Array.new
66 allowed_opts.push( USE_JOB_SUBMISSION_SYSTEM_OPTION )
68 disallowed = cla.validate_allowed_options_as_str( allowed_opts )
69 if ( disallowed.length > 0 )
70 Util.fatal_error( PRG_NAME,
71 "unknown option(s): " + disallowed,
75 if File.exists?( LOG_FILE )
76 puts( '[' + PRG_NAME + '] > log file [' + LOG_FILE + '] already exists' )
80 if !File.exists?( TEMPLATE_FILE )
81 puts( '[' + PRG_NAME + '] > template file [' + TEMPLATE_FILE + '] not found' )
85 use_job_submission_system = false
86 if cla.is_option_set?( USE_JOB_SUBMISSION_SYSTEM_OPTION )
87 use_job_submission_system = true
93 log << "Program : " + PRG_NAME + NL
94 log << "Version : " + PRG_VERSION + NL
95 log << "Program date: " + PRG_DATE + NL + NL
96 log << "Date/time : " + now.to_s + NL
97 log << "Directory : " + Dir.getwd + NL + NL
99 puts( '[' + PRG_NAME + '] > reading ' + TEMPLATE_FILE )
101 paths = Hash.new # path placeholder -> full path
102 min_lengths = Hash.new # alignment id -> minimal length
103 hmms = Hash.new # alignment id -> hmm
104 options = Hash.new # option placeholder -> option
109 log << "////////////////////////////////////////////////////////////////// #{NL}"
110 log << "Template file [" + TEMPLATE_FILE + "]:#{NL}"
114 open( TEMPLATE_FILE ).each { | line |
117 elsif ( line =~ /^\$\s*(\S+)\s*=\s*(\S+)/ )
119 puts( '[' + PRG_NAME + '] > paths : ' + $1 + ' => ' + $2 )
121 elsif ( line =~ /^%\s*#{HMM}\s*(\S+)\s*=\s*(\S+)/ )
123 puts( '[' + PRG_NAME + '] > hmms : ' + $1 + ' => ' + $2 )
125 elsif ( line =~ /^%\s*#{RSL}\s*(\S+)\s*=\s*(\S+)/ )
126 min_lengths[ $1 ] = $2
127 puts( '[' + PRG_NAME + '] > min lengths: ' + $1 + ' => ' + $2 )
129 elsif ( line =~ /^%\s*(\S+)\s*=\s*(\S+)/ )
131 puts( '[' + PRG_NAME + '] > options : ' + $1 + ' => ' + $2 )
133 elsif ( line =~ /^>\s*(.+)/ )
134 command = command + $1 + ";#{NL}"
136 elsif ( line =~ /^-/ )
137 commands << prepare( command, paths )
141 log << "\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\ #{NL}#{NL}"
143 files = Dir.entries( "." )
145 files.each { | file |
146 if ( !File.directory?( file ) &&
148 file !~ /#{TEMPLATE_FILE}/ &&
153 aln_name = file.to_str
154 id = get_id( aln_name )
155 if !ids.include?( id )
158 puts( '[' + PRG_NAME + '] > file [id] : ' + aln_name + ' [' + id + ']' )
159 commands.each do | cmd |
161 cmd = subst_hmm( cmd, aln_name, hmms )
162 cmd = subst_min_length( cmd, aln_name, min_lengths )
163 cmd = subst_options( cmd, options )
164 if use_job_submission_system
165 cmd = subst_aln_name( cmd, PBS_O_WORKDIR + aln_name )
167 cmd = subst_aln_name( cmd, aln_name )
173 puts( '[' + PRG_NAME + '] > WARNING : [' + id + '] command still contains placeholder: ' + problem )
174 log << "WARNING: command still contains placeholder: " + cmd + NL
176 tmp_cmd_file = file.to_str[ 0..4 ] + TMP_CMD_FILE_SUFFIX
177 if ( File.exists?( tmp_cmd_file ) )
178 File.delete( tmp_cmd_file )
180 if use_job_submission_system
181 open( tmp_cmd_file, 'w' ) do |f|
188 if use_job_submission_system
189 IO.popen( 'qsub -q ' + QUEUE + ' -l walltime=' + WALLTIME + ' ' + tmp_cmd_file , 'r+' ) do | pipe |
193 spawn( 'nohup ' + cmd + ' &', STDERR => "/dev/null" )
197 if ( File.exists?( tmp_cmd_file ) )
198 File.delete( tmp_cmd_file )
205 open( LOG_FILE, 'w' ) do | f |
210 puts( '[' + PRG_NAME + '] > OK' )
215 def prepare( command, paths )
216 paths.each_pair{ | name, full |
217 command = command.gsub( name, full )
222 def subst_options( command, options )
223 opt_placeholders = command.scan( /%\[\S+\]%/ )
224 opt_placeholders.each { | opt_placeholder |
225 opt_placeholder = opt_placeholder.gsub( OPTION_OPEN , '' )
226 opt_placeholder = opt_placeholder.gsub( OPTION_CLOSE, '' )
227 opt_value = options[ opt_placeholder ]
228 if ( opt_value != nil && opt_value.size > 0 )
229 command = command.gsub( OPTION_OPEN + opt_placeholder + OPTION_CLOSE, opt_value )
235 def subst_aln_name( command, aln_name )
236 command = command.gsub( '$', aln_name )
240 def subst_hmm( command, aln_name, hmms )
241 id = get_id( aln_name )
243 if ( hmm != nil && hmm.size > 0 )
244 command = command.gsub( OPTION_OPEN + HMM + OPTION_CLOSE, hmm )
249 def subst_min_length( command, aln_name, min_lengths )
250 id = get_id( aln_name )
251 min_length = min_lengths[ id ]
252 if ( min_length != nil && min_length.size > 0 )
253 command = command.gsub( OPTION_OPEN + RSL + OPTION_CLOSE, min_length )
255 command = command.gsub( OPTION_OPEN + RSL + OPTION_CLOSE, MIN_LENGTH_DEFAULT.to_s )
260 def get_id( aln_name )
261 aln_name =~ /^([^_]+)/
265 end # class PhylogenyFactory