#
# = lib/evo/apps/phylogenies_decorator
#
-# Copyright:: Copyright (C) 2006-2008 Christian M. Zmasek
-# License:: GNU Lesser General Public License (LGPL)
+# Copyright:: Copyright (C) 2017 Christian M. Zmasek
+# License:: GNU Lesser General Public License (LGPL)
#
-# decoration of phylogenies with sequence/species names and domain architectures
+# Last modified: 2017/02/09
#
-# $Id: phylogenies_decorator.rb,v 1.34 2010/12/13 19:00:11 cmzmasek Exp $
+# decoration of phylogenies with sequence/species names and domain architectures
#
# Environment variable FORESTER_HOME needs to point to the appropriate
# directory (e.g. setenv FORESTER_HOME $HOME/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester/)
require 'lib/evo/util/constants'
require 'lib/evo/util/util'
require 'lib/evo/util/command_line_arguments'
-
require 'date'
module Evoruby
-
class PhylogeniesDecorator
#DECORATOR_OPTIONS_SEQ_NAMES = '-r=1 -mdn'
#DECORATOR_OPTIONS_SEQ_NAMES = '-p -t -sn'
- DECORATOR_OPTIONS_SEQ_NAMES = '-p -t -tc -mp -or'
+ #DECORATOR_OPTIONS_SEQ_NAMES = '-p -t -tc -mp -or'
+ DECORATOR_OPTIONS_SEQ_NAMES = '-p -t -mp -or'
# -mdn is a hidden expert option to rename e.g. "6_ORYLA3" to "6_[3]_ORYLA"
#DECORATOR_OPTIONS_SEQ_NAMES = '-sn -r=1'
#DECORATOR_OPTIONS_DOMAINS = '-r=1'
DOMAINS_MAPFILE_SUFFIX = '_hmmscan_10.dff'
SLEEP_TIME = 0.05
REMOVE_NI = true
+ IDS_ONLY = false #TODO this should be a command line option
+ FIXED_NIM_FILE = 'all.nim' #TODO this should be a command line option
TMP_FILE_1 = '___PD1___'
TMP_FILE_2 = '___PD2___'
LOG_FILE = '00_phylogenies_decorator.log'
JAVA_HOME = ENV[Constants::JAVA_HOME_ENV_VARIABLE]
PRG_NAME = "phylogenies_decorator"
- PRG_DATE = "2013.11.15"
+ PRG_DATE = "170209"
PRG_DESC = "decoration of phylogenies with sequence/species names and domain architectures"
PRG_VERSION = "1.02"
- COPYRIGHT = "2013 Christian M Zmasek"
- CONTACT = "phylosoft@gmail.com"
WWW = "https://sites.google.com/site/cmzmasek/home/software/forester"
-
HELP_OPTION_1 = "help"
HELP_OPTION_2 = "h"
NL = Constants::LINE_DELIMITER
-
def run
Util.print_program_information( PRG_NAME,
- PRG_VERSION,
- PRG_DESC,
- PRG_DATE,
- COPYRIGHT,
- CONTACT,
- WWW,
- STDOUT )
+ PRG_VERSION,
+ PRG_DESC,
+ PRG_DATE,
+ WWW,
+ STDOUT )
if ( ARGV == nil || ARGV.length > 3 || ARGV.length < 2 )
print_help
end
if ( cla.is_option_set?( HELP_OPTION_1 ) ||
- cla.is_option_set?( HELP_OPTION_2 ) )
+ cla.is_option_set?( HELP_OPTION_2 ) )
print_help
exit( 0 )
end
log << 'input suffix : ' + in_suffix + NL
log << 'output suffix : ' + out_suffix + NL
- if ( File.exists?( TMP_FILE_1 ) )
+ if ( File.exist?( TMP_FILE_1 ) )
File.delete( TMP_FILE_1 )
end
- if ( File.exists?( TMP_FILE_2 ) )
+ if ( File.exist?( TMP_FILE_2 ) )
File.delete( TMP_FILE_2 )
end
files.each { | phylogeny_file |
if ( !File.directory?( phylogeny_file ) &&
- phylogeny_file !~ /^\./ &&
- phylogeny_file !~ /^00/ &&
- phylogeny_file !~ /#{out_suffix}$/ &&
- phylogeny_file =~ /#{in_suffix}$/ )
+ phylogeny_file !~ /^\./ &&
+ phylogeny_file !~ /^00/ &&
+ phylogeny_file !~ /#{out_suffix}$/ &&
+ phylogeny_file =~ /#{in_suffix}$/ )
begin
Util.check_file_for_readability( phylogeny_file )
rescue ArgumentError
outfile = outfile.sub( /_ni_/, '_' )
end
- if File.exists?( outfile )
+ if File.exist?( outfile )
msg = counter.to_s + ': ' + phylogeny_file + ' -> ' + outfile +
- ' : already exists, skipping'
+ ' : already exists, skipping'
Util.print_message( PRG_NAME, msg )
log << msg + NL
next
domains_mapfile_name = nil
seqs_file_name = nil
- ids_mapfile_name = get_file( files, phylogeny_id, IDS_MAPFILE_SUFFIX )
- domains_mapfile_name = get_file( files, phylogeny_id, DOMAINS_MAPFILE_SUFFIX )
- seqs_file_name = get_seq_file( files, phylogeny_id )
+ if ( FIXED_NIM_FILE == nil )
+ ids_mapfile_name = get_file( files, phylogeny_id, IDS_MAPFILE_SUFFIX )
+ else
+ ids_mapfile_name = FIXED_NIM_FILE
+ end
+
+ unless IDS_ONLY
+ domains_mapfile_name = get_file( files, phylogeny_id, DOMAINS_MAPFILE_SUFFIX )
+ seqs_file_name = get_seq_file( files, phylogeny_id )
+ end
- begin
- Util.check_file_for_readability( domains_mapfile_name )
- rescue ArgumentError
- Util.fatal_error( PRG_NAME, 'failed to read from [#{domains_mapfile_name}]: ' + $! )
+ unless IDS_ONLY
+ begin
+ Util.check_file_for_readability( domains_mapfile_name )
+ rescue ArgumentError
+ Util.fatal_error( PRG_NAME, 'failed to read from [#{domains_mapfile_name}]: ' + $! )
+ end
+ begin
+ Util.check_file_for_readability( seqs_file_name )
+ rescue ArgumentError
+ Util.fatal_error( PRG_NAME, 'failed to read from [#{seqs_file_name }]: ' + $! )
+ end
end
begin
Util.fatal_error( PRG_NAME, 'failed to read from [#{ids_mapfile_name}]: ' + $! )
end
- begin
- Util.check_file_for_readability( seqs_file_name )
- rescue ArgumentError
- Util.fatal_error( PRG_NAME, 'failed to read from [#{seqs_file_name }]: ' + $! )
+ unless IDS_ONLY
+ cmd = decorator +
+ ' -t -p -f=m ' + phylogeny_file + ' ' +
+ seqs_file_name + ' ' + TMP_FILE_1
+ puts cmd
+ begin
+ execute_cmd( cmd, log )
+ rescue Error
+ Util.fatal_error( PRG_NAME, 'error: ' + $! )
+ end
+
+ cmd = decorator + ' ' + DECORATOR_OPTIONS_DOMAINS + ' ' +
+ '-f=d ' + TMP_FILE_1 + ' ' +
+ domains_mapfile_name + ' ' + TMP_FILE_2
+ puts cmd
+ begin
+ execute_cmd( cmd, log )
+ rescue Error
+ Util.fatal_error( PRG_NAME, 'error: ' + $! )
+ end
end
- cmd = decorator +
- ' -p -f=m ' + phylogeny_file + ' ' +
- seqs_file_name + ' ' + TMP_FILE_1
- puts cmd
- execute_cmd( cmd, log )
-
- cmd = decorator + ' ' + DECORATOR_OPTIONS_DOMAINS + ' ' +
- '-f=d ' + TMP_FILE_1 + ' ' +
- domains_mapfile_name + ' ' +TMP_FILE_2
- puts cmd
- execute_cmd( cmd, log )
-
- cmd = decorator + ' ' + DECORATOR_OPTIONS_SEQ_NAMES + ' ' +
- '-f=n ' + TMP_FILE_2 + ' ' +
- ids_mapfile_name + ' ' + outfile
- puts cmd
- execute_cmd( cmd, log )
-
- File.delete( TMP_FILE_1 )
- File.delete( TMP_FILE_2 )
-
+ if IDS_ONLY
+ cmd = decorator + ' ' + DECORATOR_OPTIONS_SEQ_NAMES + ' ' +
+ '-f=n ' + phylogeny_file + ' ' +
+ ids_mapfile_name + ' ' + outfile
+ puts cmd
+ begin
+ execute_cmd( cmd, log )
+ rescue Error
+ Util.fatal_error( PRG_NAME, 'error: ' + $! )
+ end
+ else
+ cmd = decorator + ' ' + DECORATOR_OPTIONS_SEQ_NAMES + ' ' +
+ '-f=n ' + TMP_FILE_2 + ' ' +
+ ids_mapfile_name + ' ' + outfile
+ puts cmd
+ begin
+ execute_cmd( cmd, log )
+ rescue Error
+ Util.fatal_error( PRG_NAME, 'error: ' + $! )
+ end
+ File.delete( TMP_FILE_1 )
+ File.delete( TMP_FILE_2 )
+ end
end
}
open( LOG_FILE, 'w' ) do | f |
end # def run
def execute_cmd( cmd, log )
- log << 'excuting ' + cmd + NL
+ log << 'executing ' + cmd + NL
IO.popen( cmd , 'r+' ) do | pipe |
pipe.close_write
log << pipe.read + NL + NL
sleep( SLEEP_TIME )
end
-
def get_id( phylogeny_file_name )
- phylogeny_file_name =~ /^(.+?)__/
- $1
+ if phylogeny_file_name =~ /^(.+?_.+?)_/
+ return $1
+ elsif phylogeny_file_name =~ /^(.+?)__/
+ return $1
+ elsif phylogeny_file_name =~ /^(.+?)_/
+ return $1
+ end
+ nil
end
def get_file( files_in_dir, phylogeny_id, suffix_pattern )
- matching_files = Array.new
-
- files_in_dir.each { | file |
-
- if ( !File.directory?( file ) &&
- file !~ /^\./ &&
- file !~ /^00/ &&
- file =~ /^#{phylogeny_id}.*#{suffix_pattern}$/ )
- matching_files << file
- end
- }
+ matching_files = Util.get_matching_files( files_in_dir, phylogeny_id, suffix_pattern )
if matching_files.length < 1
Util.fatal_error( PRG_NAME, 'no file matching [' + phylogeny_id +
- '...' + suffix_pattern + '] present in current directory' )
+ '...' + suffix_pattern + '] present in current directory' )
end
if matching_files.length > 1
Util.fatal_error( PRG_NAME, 'more than one file matching [' +
- phylogeny_id + '...' + suffix_pattern + '] present in current directory' )
+ phylogeny_id + '...' + suffix_pattern + '] present in current directory' )
end
matching_files[ 0 ]
end
files_in_dir.each { | file |
if ( !File.directory?( file ) &&
- file !~ /^\./ &&
- file !~ /^00/ &&
- ( file =~ /^#{phylogeny_id}__.+\d$/ || file =~ /^#{phylogeny_id}__.*\.fasta$/ ) )
+ file !~ /^\./ &&
+ file !~ /^00/ &&
+ ( file =~ /^#{phylogeny_id}__.+\d$/ || file =~ /^#{phylogeny_id}_.*\.fasta$/ ) )
matching_files << file
end
}
if matching_files.length < 1
Util.fatal_error( PRG_NAME, 'no seq file matching [' +
- phylogeny_id + '__] present in current directory' )
+ phylogeny_id + '_] present in current directory' )
end
if matching_files.length > 1
Util.fatal_error( PRG_NAME, 'more than one seq file matching [' +
- phylogeny_id + '__] present in current directory' )
+ phylogeny_id + '_] present in current directory' )
end
matching_files[ 0 ]
end
-
def print_help()
puts( "Usage:" )
puts()