in progress
[jalview.git] / forester / ruby / evoruby / lib / evo / apps / phylogenies_decorator.rb
1 #!/usr/local/bin/ruby -w
2 #
3 # = lib/evo/apps/phylogenies_decorator
4 #
5 # Copyright::  Copyright (C) 2006-2008 Christian M. Zmasek
6 # License::    GNU Lesser General Public License (LGPL)
7 #
8 # decoration of phylogenies with sequence/species names and domain architectures
9 #
10 # $Id: phylogenies_decorator.rb,v 1.34 2010/12/13 19:00:11 cmzmasek Exp $
11 #
12 # Environment variable FORESTER_HOME needs to point to the appropriate
13 # directory (e.g. setenv FORESTER_HOME $HOME/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester-atv/)
14
15 require 'lib/evo/util/constants'
16 require 'lib/evo/util/util'
17 require 'lib/evo/util/command_line_arguments'
18
19 require 'date'
20
21 module Evoruby
22
23     class PhylogeniesDecorator
24
25         DECORATOR_OPTIONS_SEQ_NAMES = '-r=1 -mdn'
26         # -mdn is a hidden expert option to rename e.g. "6_ORYLA3" to "6_[3]_ORYLA"
27         #DECORATOR_OPTIONS_SEQ_NAMES = '-sn -r=1'
28         DECORATOR_OPTIONS_DOMAINS = '-r=1'
29         IDS_MAPFILE_SUFFIX        = '.nim'
30         DOMAINS_MAPFILE_SUFFIX    = '.dff'
31         SLEEP_TIME                = 0.1
32         REMOVE_NI                 = true
33         TMP_FILE                  = '___PD___'
34         LOG_FILE                  = '00_phylogenies_decorator.log'
35         FORESTER_HOME             = ENV[Constants::FORESTER_HOME_ENV_VARIABLE]
36         JAVA_HOME                 = ENV[Constants::JAVA_HOME_ENV_VARIABLE]
37
38         PRG_NAME       = "phylogenies_decorator"
39         PRG_DATE       = "2008.09.02"
40         PRG_DESC       = "decoration of phylogenies with sequence/species names and domain architectures"
41         PRG_VERSION    = "1.0.1"
42         COPYRIGHT      = "2008-2009 Christian M Zmasek"
43         CONTACT        = "phylosoft@gmail.com"
44         WWW            = "www.phylosoft.org"
45
46         IDS_ONLY_OPTION     = "n"
47         DOMAINS_ONLY_OPTION = "d"
48         HELP_OPTION_1       = "help"
49         HELP_OPTION_2       = "h"
50
51         NL = Constants::LINE_DELIMITER
52
53         def run
54
55             Util.print_program_information( PRG_NAME,
56                 PRG_VERSION,
57                 PRG_DESC,
58                 PRG_DATE,
59                 COPYRIGHT,
60                 CONTACT,
61                 WWW,
62                 STDOUT )
63
64             if ( ARGV == nil || ARGV.length > 3 || ARGV.length < 2  )
65                 print_help
66                 exit( -1 )
67             end
68
69             if FORESTER_HOME == nil || FORESTER_HOME.length < 1
70                 Util.fatal_error( PRG_NAME, "apparently environment variable #{Constants::FORESTER_HOME_ENV_VARIABLE} has not been set" )
71             end
72             if JAVA_HOME == nil ||  JAVA_HOME.length < 1
73                 Util.fatal_error( PRG_NAME, "apparently environment variable #{Constants::JAVA_HOME_ENV_VARIABLE} has not been set" )
74             end
75
76             if !File.exist?( FORESTER_HOME )
77                 Util.fatal_error( PRG_NAME, '[' + FORESTER_HOME + '] does not exist' )
78             end
79             if !File.exist?( JAVA_HOME )
80                 Util.fatal_error( PRG_NAME, '[' + JAVA_HOME + '] does not exist' )
81             end
82
83             decorator = JAVA_HOME + '/bin/java -cp ' + FORESTER_HOME + '/java/forester.jar org.forester.application.decorator'
84
85             begin
86                 cla = CommandLineArguments.new( ARGV )
87             rescue ArgumentError => e
88                 Util.fatal_error( PRG_NAME, "error: " + e.to_s )
89             end
90
91             if ( cla.is_option_set?( HELP_OPTION_1 ) ||
92                      cla.is_option_set?( HELP_OPTION_2 ) )
93                 print_help
94                 exit( 0 )
95             end
96
97             if File.exist?( LOG_FILE )
98                 Util.fatal_error( PRG_NAME, 'logfile [' + LOG_FILE + '] already exists' )
99             end
100
101             allowed_opts = Array.new
102             allowed_opts.push( IDS_ONLY_OPTION )
103             allowed_opts.push( DOMAINS_ONLY_OPTION )
104
105             disallowed = cla.validate_allowed_options_as_str( allowed_opts )
106             if ( disallowed.length > 0 )
107                 Util.fatal_error( PRG_NAME, "unknown option(s): " + disallowed )
108             end
109
110             ids_only = false
111             domains_only = false
112
113             in_suffix = cla.get_file_name( 0 )
114             out_suffix = cla.get_file_name( 1 )
115
116             if cla.is_option_set?( IDS_ONLY_OPTION )
117                 ids_only = true
118             end
119             if cla.is_option_set?( DOMAINS_ONLY_OPTION )
120                 domains_only = true
121             end
122
123             if ( ids_only && domains_only )
124                 Util.fatal_error( PRG_NAME, 'attempt to use ids only and domains only at the same time' )
125             end
126
127             log = String.new
128
129             now = DateTime.now
130             log << "Program              : " + PRG_NAME + NL
131             log << "Version              : " + PRG_VERSION + NL
132             log << "Program date         : " + PRG_DATE + NL
133             log << "Options for seq names: " + DECORATOR_OPTIONS_SEQ_NAMES + NL
134             log << "Options for domains  : " + DECORATOR_OPTIONS_DOMAINS + NL
135             log << "FORESTER_HOME        : " + FORESTER_HOME + NL
136             log << "JAVA_HOME            : " + JAVA_HOME + NL + NL
137             log << "Date/time: " + now.to_s + NL
138             log << "Directory: " + Dir.getwd  + NL + NL
139
140             Util.print_message( PRG_NAME, 'input suffix     : ' + in_suffix )
141             Util.print_message( PRG_NAME, 'output suffix    : ' + out_suffix )
142
143             log << 'input suffix     : ' + in_suffix + NL
144             log << 'output suffix    : ' + out_suffix + NL
145
146             if ( File.exists?( TMP_FILE ) )
147                 File.delete( TMP_FILE )
148             end
149
150             files = Dir.entries( "." )
151
152             counter = 0
153
154             files.each { | phylogeny_file |
155                 if ( !File.directory?( phylogeny_file ) &&
156                          phylogeny_file !~ /^\./ &&
157                          phylogeny_file !~ /^00/ &&
158                          phylogeny_file !~ /#{out_suffix}$/ &&
159                          phylogeny_file =~ /#{in_suffix}$/ )
160                     begin
161                         Util.check_file_for_readability( phylogeny_file )
162                     rescue ArgumentError
163                         Util.fatal_error( PRG_NAME, 'can not read from: ' + phylogeny_file + ': '+ $! )
164                     end
165
166                     counter += 1
167
168                     outfile = phylogeny_file.sub( /#{in_suffix}$/, out_suffix )
169
170                     if REMOVE_NI
171                         outfile = outfile.sub( /_ni_/, '_' )
172                     end
173
174                     if File.exists?( outfile )
175                         msg = counter.to_s + ': ' + phylogeny_file + ' -> ' +  outfile +
176                          ' : already exists, skipping'
177                         Util.print_message( PRG_NAME, msg  )
178                         log << msg + NL
179                         next
180                     end
181
182                     Util.print_message( PRG_NAME, counter.to_s + ': ' + phylogeny_file + ' -> ' +  outfile )
183                     log << counter.to_s + ': ' + phylogeny_file + ' -> ' +  outfile + NL
184
185                     phylogeny_id = get_id( phylogeny_file )
186
187                     ids_mapfile_name = nil
188                     domains_mapfile_name = nil
189
190                     if ids_only
191                         ids_mapfile_name = get_file( files, phylogeny_id, IDS_MAPFILE_SUFFIX )
192                     elsif domains_only
193                         domains_mapfile_name = get_file( files, phylogeny_id, DOMAINS_MAPFILE_SUFFIX )
194                     else
195                         ids_mapfile_name = get_file( files, phylogeny_id, IDS_MAPFILE_SUFFIX )
196                         domains_mapfile_name = get_file( files, phylogeny_id, DOMAINS_MAPFILE_SUFFIX )
197                     end
198
199                     if domains_mapfile_name != nil
200                         begin
201                             Util.check_file_for_readability( domains_mapfile_name )
202                         rescue ArgumentError
203                             Util.fatal_error( PRG_NAME, 'failed to read from [#{domains_mapfile_name}]: ' + $! )
204                         end
205                     end
206
207                     if ids_mapfile_name != nil
208                         begin
209                             Util.check_file_for_readability( ids_mapfile_name )
210                         rescue ArgumentError
211                             Util.fatal_error( PRG_NAME, 'failed to read from [#{ids_mapfile_name}]: ' + $! )
212                         end
213                     end
214
215                     if domains_mapfile_name != nil
216                         if ids_mapfile_name != nil
217                             my_outfile = TMP_FILE
218                         else
219                             my_outfile = outfile
220                         end
221                         cmd = decorator + ' ' + DECORATOR_OPTIONS_DOMAINS + ' ' +
222                          '-f=d ' + phylogeny_file + ' ' +
223                          domains_mapfile_name + ' ' + my_outfile
224                         execute_cmd( cmd, log )
225                     end
226
227                     if ids_mapfile_name != nil
228                         if domains_mapfile_name != nil
229                             my_infile = TMP_FILE
230                         else
231                             my_infile = phylogeny_file
232                         end
233                         cmd = decorator + ' ' +  DECORATOR_OPTIONS_SEQ_NAMES + ' ' +
234                          '-f=s ' + my_infile + ' ' +
235                          ids_mapfile_name + ' ' + outfile
236                         execute_cmd( cmd, log )
237                     end
238
239                     if ( File.exists?( TMP_FILE ) )
240                         File.delete( TMP_FILE )
241                     end
242                 end
243             }
244             open( LOG_FILE, 'w' ) do | f |
245                 f.write( log )
246             end
247             puts
248             Util.print_message( PRG_NAME, 'OK' )
249             puts
250         end # def run
251
252         def execute_cmd( cmd, log )
253             log << 'excuting ' + cmd + NL
254             IO.popen( cmd , 'r+' ) do | pipe |
255                 pipe.close_write
256                 log << pipe.read + NL + NL
257             end
258             sleep( SLEEP_TIME )
259         end
260
261
262         def get_id( phylogeny_file_name )
263             phylogeny_file_name =~ /^([^_]+)/
264             $1
265         end
266
267         def get_file( files_in_dir, phylogeny_id, suffix_pattern )
268             matching_files = Array.new
269             files_in_dir.each { | file |
270
271                 if ( !File.directory?( file ) &&
272                          file !~ /^\./ &&
273                          file !~ /^00/ &&
274                          file =~ /^#{phylogeny_id}.*#{suffix_pattern}$/ )
275                     matching_files << file
276                 end
277             }
278             if matching_files.length < 1
279                 Util.fatal_error( PRG_NAME, 'no file matching [' + phylogeny_id +
280                      '_] [' + suffix_pattern + '] present in current directory' )
281             elsif matching_files.length > 1
282                 Util.fatal_error( PRG_NAME, 'more than one file matching [' + phylogeny_id +
283                      '_] [' + suffix_pattern + '] present in current directory' )
284             end
285             matching_files[ 0 ]
286         end
287
288         def print_help()
289             puts( "Usage:" )
290             puts()
291             puts( "  " + PRG_NAME + ".rb [options] <suffix of intrees to be decorated> <suffix for decorated outtrees> " )
292             puts()
293             puts( "  options: -" + IDS_ONLY_OPTION + ": decorate with sequence/species names only" )
294             puts( "           -" + DOMAINS_ONLY_OPTION + ": decorate with domain structures" )
295             puts()
296         end
297     end # class PhylogenyiesDecorator
298
299 end # module Evoruby