2 # = lib/evo/apps/phylogeny_factory - PhylogenyFactory class
4 # Copyright:: Copyright (C) 2006-2007 Christian M. Zmasek
5 # License:: GNU Lesser General Public License (LGPL)
7 # $Id: phylogeny_factory.rb,v 1.32 2010/12/13 19:00:11 cmzmasek Exp $
9 require 'lib/evo/util/constants'
10 require 'lib/evo/util/util'
11 require 'lib/evo/util/command_line_arguments'
18 class PhylogenyFactory
20 PRG_NAME = "phylogeny_factory"
22 PRG_DESC = "automated phylogeny reconstruction using queing system"
24 COPYRIGHT = "2013 Christian M Zmasek"
25 CONTACT = "phylosoft@gmail.com"
26 WWW = "www.phylosoft.org"
28 USE_JOB_SUBMISSION_SYSTEM_OPTION = 's'
29 LOG_FILE = '00_phylogeny_factory.log'
30 TEMPLATE_FILE = '00_phylogeny_factory.template'
31 PBS_O_WORKDIR = '$PBS_O_WORKDIR/'
32 MIN_LENGTH_DEFAULT = 50
33 PFAM_HHMS = "/home/czmasek/DATA/PFAM/PFAM260X/PFAM_A_HMMs/"
34 WALLTIME = '100:00:00'
37 TMP_CMD_FILE_SUFFIX = '_QSUB'
47 NL = Constants::LINE_DELIMITER
51 Util.print_program_information( PRG_NAME,
61 cla = CommandLineArguments.new( ARGV )
62 rescue ArgumentError => e
63 Util.fatal_error( PRG_NAME, "error: " + e.to_s )
66 allowed_opts = Array.new
67 allowed_opts.push( USE_JOB_SUBMISSION_SYSTEM_OPTION )
69 disallowed = cla.validate_allowed_options_as_str( allowed_opts )
70 if ( disallowed.length > 0 )
71 Util.fatal_error( PRG_NAME,
72 "unknown option(s): " + disallowed,
76 if File.exists?( LOG_FILE )
77 puts( '[' + PRG_NAME + '] > log file [' + LOG_FILE + '] already exists' )
81 if !File.exists?( TEMPLATE_FILE )
82 puts( '[' + PRG_NAME + '] > template file [' + TEMPLATE_FILE + '] not found' )
86 use_job_submission_system = false
87 if cla.is_option_set?( USE_JOB_SUBMISSION_SYSTEM_OPTION )
88 use_job_submission_system = true
94 log << "Program : " + PRG_NAME + NL
95 log << "Version : " + PRG_VERSION + NL
96 log << "Program date: " + PRG_DATE + NL + NL
97 log << "Date/time : " + now.to_s + NL
98 log << "Directory : " + Dir.getwd + NL + NL
100 puts( '[' + PRG_NAME + '] > reading ' + TEMPLATE_FILE )
102 paths = Hash.new # path placeholder -> full path
103 min_lengths = Hash.new # alignment id -> minimal length
104 options = Hash.new # option placeholder -> option
109 log << "////////////////////////////////////////////////////////////////// #{NL}"
110 log << "Template file [" + TEMPLATE_FILE + "]:#{NL}"
114 open( TEMPLATE_FILE ).each { | line |
117 elsif ( line =~ /^\$\s*(\S+)\s*=\s*(\S+)/ )
119 puts( '[' + PRG_NAME + '] > paths : ' + $1 + ' => ' + $2 )
121 elsif ( line =~ /^%\s*#{RSL}\s*(\S+)\s*=\s*(\S+)/ )
122 min_lengths[ $1 ] = $2
123 puts( '[' + PRG_NAME + '] > min lengths: ' + $1 + ' => ' + $2 )
125 elsif ( line =~ /^%\s*(\S+)\s*=\s*(\S+)/ )
127 puts( '[' + PRG_NAME + '] > options : ' + $1 + ' => ' + $2 )
129 elsif ( line =~ /^>\s*(.+)/ )
130 command = command + $1 + ";#{NL}"
132 elsif ( line =~ /^-/ )
133 commands << prepare( command, paths )
137 log << "\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\ #{NL}#{NL}"
139 files = Dir.entries( "." )
141 files.each { | file |
142 if ( !File.directory?( file ) &&
144 file !~ /#{TEMPLATE_FILE}/ &&
149 aln_name = file.to_str
150 id = get_id( aln_name )
151 #if !ids.include?( id )
154 puts( '[' + PRG_NAME + '] > file [id]: ' + aln_name + ' [' + id + ']' )
155 commands.each do | cmd |
156 cmd = subst_hmm( cmd, id )
157 cmd = subst_min_length( cmd, id, min_lengths )
158 cmd = subst_options( cmd, options )
159 if use_job_submission_system
160 cmd = subst_aln_name( cmd, PBS_O_WORKDIR + aln_name )
162 cmd = subst_aln_name( cmd, aln_name )
168 puts( '[' + PRG_NAME + '] > WARNING : [' + id + '] command still contains placeholder: ' + problem )
169 log << "WARNING: command still contains placeholder: " + cmd + NL
171 tmp_cmd_file = file.to_str[ 0..4 ] + TMP_CMD_FILE_SUFFIX
172 if ( File.exists?( tmp_cmd_file ) )
173 File.delete( tmp_cmd_file )
175 if use_job_submission_system
176 open( tmp_cmd_file, 'w' ) do |f|
183 if use_job_submission_system
184 IO.popen( 'qsub -q ' + QUEUE + ' -l walltime=' + WALLTIME + ' ' + tmp_cmd_file , 'r+' ) do | pipe |
188 spawn( 'nohup ' + cmd + ' &', STDERR => "/dev/null" )
192 if ( File.exists?( tmp_cmd_file ) )
193 File.delete( tmp_cmd_file )
200 open( LOG_FILE, 'w' ) do | f |
205 puts( '[' + PRG_NAME + '] > OK' )
210 def prepare( command, paths )
211 paths.each_pair{ | name, full |
212 command = command.gsub( name, full )
217 def subst_options( command, options )
218 opt_placeholders = command.scan( /%\[\S+\]%/ )
219 opt_placeholders.each { | opt_placeholder |
220 opt_placeholder = opt_placeholder.gsub( OPTION_OPEN , '' )
221 opt_placeholder = opt_placeholder.gsub( OPTION_CLOSE, '' )
222 opt_value = options[ opt_placeholder ]
223 if ( opt_value != nil && opt_value.size > 0 )
224 command = command.gsub( OPTION_OPEN + opt_placeholder + OPTION_CLOSE, opt_value )
230 def subst_aln_name( command, aln_name )
231 command = command.gsub( '$', aln_name )
235 def subst_hmm( command, id )
236 if id != nil && id.length > 0
237 hmm = PFAM_HHMS + id + ".hmm"
238 command = command.gsub( OPTION_OPEN + HMM + OPTION_CLOSE, hmm )
243 def subst_min_length( command, id, min_lengths )
245 if id != nil && id.length > 0
246 min_length = min_lengths[ id ]
248 if min_length != nil && min_length.size > 0
249 command = command.gsub( OPTION_OPEN + RSL + OPTION_CLOSE, min_length )
251 command = command.gsub( OPTION_OPEN + RSL + OPTION_CLOSE, MIN_LENGTH_DEFAULT.to_s )
256 def get_id( aln_name )
258 if aln_name.include? "__"
259 id = aln_name[ 0, aln_name.index( "__" ) ]
261 if id == nil || id.length < 1
262 puts( '[' + PRG_NAME + '] > WARNING: could not get id from [' + aln_name + ']' )
267 end # class PhylogenyFactory