1 #!/usr/local/bin/ruby -w
3 # = lib/evo/apps/phylogenies_decorator
5 # Copyright:: Copyright (C) 2006-2008 Christian M. Zmasek
6 # License:: GNU Lesser General Public License (LGPL)
8 # decoration of phylogenies with sequence/species names and domain architectures
10 # $Id: phylogenies_decorator.rb,v 1.34 2010/12/13 19:00:11 cmzmasek Exp $
12 # Environment variable FORESTER_HOME needs to point to the appropriate
13 # directory (e.g. setenv FORESTER_HOME $HOME/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester-atv/)
15 require 'lib/evo/util/constants'
16 require 'lib/evo/util/util'
17 require 'lib/evo/util/command_line_arguments'
23 class PhylogeniesDecorator
25 DECORATOR_OPTIONS_SEQ_NAMES = '-r=1 -mdn'
26 # -mdn is a hidden expert option to rename e.g. "6_ORYLA3" to "6_[3]_ORYLA"
27 #DECORATOR_OPTIONS_SEQ_NAMES = '-sn -r=1'
28 DECORATOR_OPTIONS_DOMAINS = '-r=1'
29 IDS_MAPFILE_SUFFIX = '.nim'
30 DOMAINS_MAPFILE_SUFFIX = '.dff'
34 LOG_FILE = '00_phylogenies_decorator.log'
35 FORESTER_HOME = ENV[Constants::FORESTER_HOME_ENV_VARIABLE]
36 JAVA_HOME = ENV[Constants::JAVA_HOME_ENV_VARIABLE]
38 PRG_NAME = "phylogenies_decorator"
39 PRG_DATE = "2008.09.02"
40 PRG_DESC = "decoration of phylogenies with sequence/species names and domain architectures"
42 COPYRIGHT = "2008-2009 Christian M Zmasek"
43 CONTACT = "phylosoft@gmail.com"
44 WWW = "www.phylosoft.org"
47 DOMAINS_ONLY_OPTION = "d"
48 HELP_OPTION_1 = "help"
51 NL = Constants::LINE_DELIMITER
55 Util.print_program_information( PRG_NAME,
64 if ( ARGV == nil || ARGV.length > 3 || ARGV.length < 2 )
69 if FORESTER_HOME == nil || FORESTER_HOME.length < 1
70 Util.fatal_error( PRG_NAME, "apparently environment variable #{Constants::FORESTER_HOME_ENV_VARIABLE} has not been set" )
72 if JAVA_HOME == nil || JAVA_HOME.length < 1
73 Util.fatal_error( PRG_NAME, "apparently environment variable #{Constants::JAVA_HOME_ENV_VARIABLE} has not been set" )
76 if !File.exist?( FORESTER_HOME )
77 Util.fatal_error( PRG_NAME, '[' + FORESTER_HOME + '] does not exist' )
79 if !File.exist?( JAVA_HOME )
80 Util.fatal_error( PRG_NAME, '[' + JAVA_HOME + '] does not exist' )
83 decorator = JAVA_HOME + '/bin/java -cp ' + FORESTER_HOME + '/java/forester.jar org.forester.application.decorator'
86 cla = CommandLineArguments.new( ARGV )
87 rescue ArgumentError => e
88 Util.fatal_error( PRG_NAME, "error: " + e.to_s )
91 if ( cla.is_option_set?( HELP_OPTION_1 ) ||
92 cla.is_option_set?( HELP_OPTION_2 ) )
97 if File.exist?( LOG_FILE )
98 Util.fatal_error( PRG_NAME, 'logfile [' + LOG_FILE + '] already exists' )
101 allowed_opts = Array.new
102 allowed_opts.push( IDS_ONLY_OPTION )
103 allowed_opts.push( DOMAINS_ONLY_OPTION )
105 disallowed = cla.validate_allowed_options_as_str( allowed_opts )
106 if ( disallowed.length > 0 )
107 Util.fatal_error( PRG_NAME, "unknown option(s): " + disallowed )
113 in_suffix = cla.get_file_name( 0 )
114 out_suffix = cla.get_file_name( 1 )
116 if cla.is_option_set?( IDS_ONLY_OPTION )
119 if cla.is_option_set?( DOMAINS_ONLY_OPTION )
123 if ( ids_only && domains_only )
124 Util.fatal_error( PRG_NAME, 'attempt to use ids only and domains only at the same time' )
130 log << "Program : " + PRG_NAME + NL
131 log << "Version : " + PRG_VERSION + NL
132 log << "Program date : " + PRG_DATE + NL
133 log << "Options for seq names: " + DECORATOR_OPTIONS_SEQ_NAMES + NL
134 log << "Options for domains : " + DECORATOR_OPTIONS_DOMAINS + NL
135 log << "FORESTER_HOME : " + FORESTER_HOME + NL
136 log << "JAVA_HOME : " + JAVA_HOME + NL + NL
137 log << "Date/time: " + now.to_s + NL
138 log << "Directory: " + Dir.getwd + NL + NL
140 Util.print_message( PRG_NAME, 'input suffix : ' + in_suffix )
141 Util.print_message( PRG_NAME, 'output suffix : ' + out_suffix )
143 log << 'input suffix : ' + in_suffix + NL
144 log << 'output suffix : ' + out_suffix + NL
146 if ( File.exists?( TMP_FILE ) )
147 File.delete( TMP_FILE )
150 files = Dir.entries( "." )
154 files.each { | phylogeny_file |
155 if ( !File.directory?( phylogeny_file ) &&
156 phylogeny_file !~ /^\./ &&
157 phylogeny_file !~ /^00/ &&
158 phylogeny_file !~ /#{out_suffix}$/ &&
159 phylogeny_file =~ /#{in_suffix}$/ )
161 Util.check_file_for_readability( phylogeny_file )
163 Util.fatal_error( PRG_NAME, 'can not read from: ' + phylogeny_file + ': '+ $! )
168 outfile = phylogeny_file.sub( /#{in_suffix}$/, out_suffix )
171 outfile = outfile.sub( /_ni_/, '_' )
174 if File.exists?( outfile )
175 msg = counter.to_s + ': ' + phylogeny_file + ' -> ' + outfile +
176 ' : already exists, skipping'
177 Util.print_message( PRG_NAME, msg )
182 Util.print_message( PRG_NAME, counter.to_s + ': ' + phylogeny_file + ' -> ' + outfile )
183 log << counter.to_s + ': ' + phylogeny_file + ' -> ' + outfile + NL
185 phylogeny_id = get_id( phylogeny_file )
187 ids_mapfile_name = nil
188 domains_mapfile_name = nil
191 ids_mapfile_name = get_file( files, phylogeny_id, IDS_MAPFILE_SUFFIX )
193 domains_mapfile_name = get_file( files, phylogeny_id, DOMAINS_MAPFILE_SUFFIX )
195 ids_mapfile_name = get_file( files, phylogeny_id, IDS_MAPFILE_SUFFIX )
196 domains_mapfile_name = get_file( files, phylogeny_id, DOMAINS_MAPFILE_SUFFIX )
199 if domains_mapfile_name != nil
201 Util.check_file_for_readability( domains_mapfile_name )
203 Util.fatal_error( PRG_NAME, 'failed to read from [#{domains_mapfile_name}]: ' + $! )
207 if ids_mapfile_name != nil
209 Util.check_file_for_readability( ids_mapfile_name )
211 Util.fatal_error( PRG_NAME, 'failed to read from [#{ids_mapfile_name}]: ' + $! )
215 if domains_mapfile_name != nil
216 if ids_mapfile_name != nil
217 my_outfile = TMP_FILE
221 cmd = decorator + ' ' + DECORATOR_OPTIONS_DOMAINS + ' ' +
222 '-f=d ' + phylogeny_file + ' ' +
223 domains_mapfile_name + ' ' + my_outfile
224 execute_cmd( cmd, log )
227 if ids_mapfile_name != nil
228 if domains_mapfile_name != nil
231 my_infile = phylogeny_file
233 cmd = decorator + ' ' + DECORATOR_OPTIONS_SEQ_NAMES + ' ' +
234 '-f=s ' + my_infile + ' ' +
235 ids_mapfile_name + ' ' + outfile
236 execute_cmd( cmd, log )
239 if ( File.exists?( TMP_FILE ) )
240 File.delete( TMP_FILE )
244 open( LOG_FILE, 'w' ) do | f |
248 Util.print_message( PRG_NAME, 'OK' )
252 def execute_cmd( cmd, log )
253 log << 'excuting ' + cmd + NL
254 IO.popen( cmd , 'r+' ) do | pipe |
256 log << pipe.read + NL + NL
262 def get_id( phylogeny_file_name )
263 phylogeny_file_name =~ /^([^_]+)/
267 def get_file( files_in_dir, phylogeny_id, suffix_pattern )
268 matching_files = Array.new
269 files_in_dir.each { | file |
271 if ( !File.directory?( file ) &&
274 file =~ /^#{phylogeny_id}.*#{suffix_pattern}$/ )
275 matching_files << file
278 if matching_files.length < 1
279 Util.fatal_error( PRG_NAME, 'no file matching [' + phylogeny_id +
280 '_] [' + suffix_pattern + '] present in current directory' )
281 elsif matching_files.length > 1
282 Util.fatal_error( PRG_NAME, 'more than one file matching [' + phylogeny_id +
283 '_] [' + suffix_pattern + '] present in current directory' )
291 puts( " " + PRG_NAME + ".rb [options] <suffix of intrees to be decorated> <suffix for decorated outtrees> " )
293 puts( " options: -" + IDS_ONLY_OPTION + ": decorate with sequence/species names only" )
294 puts( " -" + DOMAINS_ONLY_OPTION + ": decorate with domain structures" )
297 end # class PhylogenyiesDecorator