#DECORATOR_OPTIONS_SEQ_NAMES = '-r=1 -mdn'
#DECORATOR_OPTIONS_SEQ_NAMES = '-p -t -sn'
- DECORATOR_OPTIONS_SEQ_NAMES = '-p -t -c -tc -mp -or'
+ DECORATOR_OPTIONS_SEQ_NAMES = '-p -t -tc -mp -or'
# -mdn is a hidden expert option to rename e.g. "6_ORYLA3" to "6_[3]_ORYLA"
#DECORATOR_OPTIONS_SEQ_NAMES = '-sn -r=1'
#DECORATOR_OPTIONS_DOMAINS = '-r=1'
DECORATOR_OPTIONS_DOMAINS = '-p -t'
IDS_MAPFILE_SUFFIX = '.nim'
- DOMAINS_MAPFILE_SUFFIX = '.dff'
- SLEEP_TIME = 0.1
+ DOMAINS_MAPFILE_SUFFIX = '_hmmscan_10.dff'
+ SLEEP_TIME = 0.05
REMOVE_NI = true
- TMP_FILE = '___PD___'
+ TMP_FILE_1 = '___PD1___'
+ TMP_FILE_2 = '___PD2___'
LOG_FILE = '00_phylogenies_decorator.log'
FORESTER_HOME = ENV[Constants::FORESTER_HOME_ENV_VARIABLE]
JAVA_HOME = ENV[Constants::JAVA_HOME_ENV_VARIABLE]
PRG_NAME = "phylogenies_decorator"
- PRG_DATE = "2012.10.11"
+ PRG_DATE = "2013.11.15"
PRG_DESC = "decoration of phylogenies with sequence/species names and domain architectures"
PRG_VERSION = "1.02"
- COPYRIGHT = "2012 Christian M Zmasek"
+ COPYRIGHT = "2013 Christian M Zmasek"
CONTACT = "phylosoft@gmail.com"
WWW = "https://sites.google.com/site/cmzmasek/home/software/forester"
- IDS_ONLY_OPTION = "n"
- DOMAINS_ONLY_OPTION = "d"
+
HELP_OPTION_1 = "help"
HELP_OPTION_2 = "h"
Util.fatal_error( PRG_NAME, 'logfile [' + LOG_FILE + '] already exists' )
end
- allowed_opts = Array.new
- allowed_opts.push( IDS_ONLY_OPTION )
- allowed_opts.push( DOMAINS_ONLY_OPTION )
-
- disallowed = cla.validate_allowed_options_as_str( allowed_opts )
- if ( disallowed.length > 0 )
- Util.fatal_error( PRG_NAME, "unknown option(s): " + disallowed )
- end
-
- ids_only = false
- domains_only = false
-
in_suffix = cla.get_file_name( 0 )
out_suffix = cla.get_file_name( 1 )
- if cla.is_option_set?( IDS_ONLY_OPTION )
- ids_only = true
- end
- if cla.is_option_set?( DOMAINS_ONLY_OPTION )
- domains_only = true
- end
-
- if ( ids_only && domains_only )
- Util.fatal_error( PRG_NAME, 'attempt to use ids only and domains only at the same time' )
- end
-
log = String.new
now = DateTime.now
log << 'input suffix : ' + in_suffix + NL
log << 'output suffix : ' + out_suffix + NL
- if ( File.exists?( TMP_FILE ) )
- File.delete( TMP_FILE )
+ if ( File.exists?( TMP_FILE_1 ) )
+ File.delete( TMP_FILE_1 )
+ end
+ if ( File.exists?( TMP_FILE_2 ) )
+ File.delete( TMP_FILE_2 )
end
files = Dir.entries( "." )
log << counter.to_s + ': ' + phylogeny_file + ' -> ' + outfile + NL
phylogeny_id = get_id( phylogeny_file )
+ if phylogeny_id == nil || phylogeny_id.size < 1
+ Util.fatal_error( PRG_NAME, 'could not get id from ' + phylogeny_file.to_s )
+ end
+ puts
+ Util.print_message( PRG_NAME, "id: " + phylogeny_id )
+ log << "id: " + phylogeny_id + NL
ids_mapfile_name = nil
domains_mapfile_name = nil
+ seqs_file_name = nil
- if ids_only
- ids_mapfile_name = get_file( files, phylogeny_id, IDS_MAPFILE_SUFFIX )
- elsif domains_only
- domains_mapfile_name = get_file( files, phylogeny_id, DOMAINS_MAPFILE_SUFFIX )
- else
- ids_mapfile_name = get_file( files, phylogeny_id, IDS_MAPFILE_SUFFIX )
- domains_mapfile_name = get_file( files, phylogeny_id, DOMAINS_MAPFILE_SUFFIX )
- end
+ ids_mapfile_name = get_file( files, phylogeny_id, IDS_MAPFILE_SUFFIX )
+ domains_mapfile_name = get_file( files, phylogeny_id, DOMAINS_MAPFILE_SUFFIX )
+ seqs_file_name = get_seq_file( files, phylogeny_id )
- if domains_mapfile_name != nil
- begin
- Util.check_file_for_readability( domains_mapfile_name )
- rescue ArgumentError
- Util.fatal_error( PRG_NAME, 'failed to read from [#{domains_mapfile_name}]: ' + $! )
- end
+ begin
+ Util.check_file_for_readability( domains_mapfile_name )
+ rescue ArgumentError
+ Util.fatal_error( PRG_NAME, 'failed to read from [#{domains_mapfile_name}]: ' + $! )
end
- if ids_mapfile_name != nil
- begin
- Util.check_file_for_readability( ids_mapfile_name )
- rescue ArgumentError
- Util.fatal_error( PRG_NAME, 'failed to read from [#{ids_mapfile_name}]: ' + $! )
- end
+ begin
+ Util.check_file_for_readability( ids_mapfile_name )
+ rescue ArgumentError
+ Util.fatal_error( PRG_NAME, 'failed to read from [#{ids_mapfile_name}]: ' + $! )
end
- if domains_mapfile_name != nil
- if ids_mapfile_name != nil
- my_outfile = TMP_FILE
- else
- my_outfile = outfile
- end
- cmd = decorator + ' ' + DECORATOR_OPTIONS_DOMAINS + ' ' +
- '-f=d ' + phylogeny_file + ' ' +
- domains_mapfile_name + ' ' + my_outfile
- puts cmd
- execute_cmd( cmd, log )
+ begin
+ Util.check_file_for_readability( seqs_file_name )
+ rescue ArgumentError
+ Util.fatal_error( PRG_NAME, 'failed to read from [#{seqs_file_name }]: ' + $! )
end
- if ids_mapfile_name != nil
- if domains_mapfile_name != nil
- my_infile = TMP_FILE
- else
- my_infile = phylogeny_file
- end
- cmd = decorator + ' ' + DECORATOR_OPTIONS_SEQ_NAMES + ' ' +
- '-f=n ' + my_infile + ' ' +
- ids_mapfile_name + ' ' + outfile
- puts cmd
- execute_cmd( cmd, log )
- end
+ cmd = decorator +
+ ' -p -f=m ' + phylogeny_file + ' ' +
+ seqs_file_name + ' ' + TMP_FILE_1
+ puts cmd
+ execute_cmd( cmd, log )
+
+ cmd = decorator + ' ' + DECORATOR_OPTIONS_DOMAINS + ' ' +
+ '-f=d ' + TMP_FILE_1 + ' ' +
+ domains_mapfile_name + ' ' +TMP_FILE_2
+ puts cmd
+ execute_cmd( cmd, log )
+
+ cmd = decorator + ' ' + DECORATOR_OPTIONS_SEQ_NAMES + ' ' +
+ '-f=n ' + TMP_FILE_2 + ' ' +
+ ids_mapfile_name + ' ' + outfile
+ puts cmd
+ execute_cmd( cmd, log )
+
+ File.delete( TMP_FILE_1 )
+ File.delete( TMP_FILE_2 )
- if ( File.exists?( TMP_FILE ) )
- File.delete( TMP_FILE )
- end
end
}
open( LOG_FILE, 'w' ) do | f |
def get_id( phylogeny_file_name )
- phylogeny_file_name =~ /^([^_]+)/
+ phylogeny_file_name =~ /^(.+?)__/
$1
end
def get_file( files_in_dir, phylogeny_id, suffix_pattern )
matching_files = Array.new
- matching_suffix_files = Array.new
+
files_in_dir.each { | file |
if ( !File.directory?( file ) &&
file =~ /^#{phylogeny_id}.*#{suffix_pattern}$/ )
matching_files << file
end
+ }
+ if matching_files.length < 1
+ Util.fatal_error( PRG_NAME, 'no file matching [' + phylogeny_id +
+ '...' + suffix_pattern + '] present in current directory' )
+ end
+ if matching_files.length > 1
+ Util.fatal_error( PRG_NAME, 'more than one file matching [' +
+ phylogeny_id + '...' + suffix_pattern + '] present in current directory' )
+ end
+ matching_files[ 0 ]
+ end
+
+ def get_seq_file( files_in_dir, phylogeny_id )
+ matching_files = Array.new
+
+ files_in_dir.each { | file |
+
if ( !File.directory?( file ) &&
file !~ /^\./ &&
file !~ /^00/ &&
- file =~ /#{suffix_pattern}$/ )
- matching_suffix_files << file
+ ( file =~ /^#{phylogeny_id}__.+\d$/ || file =~ /^#{phylogeny_id}__.*\.fasta$/ ) )
+ matching_files << file
end
}
- if matching_files.length < 1 && matching_suffix_files.length == 1
- return matching_suffix_files[ 0 ]
- end
- if matching_files.length < 1 && matching_suffix_files.length < 1
- Util.fatal_error( PRG_NAME, 'no file matching [' + phylogeny_id +
- '_] [' + suffix_pattern + '] present in current directory' )
+ if matching_files.length < 1
+ Util.fatal_error( PRG_NAME, 'no seq file matching [' +
+ phylogeny_id + '__] present in current directory' )
end
if matching_files.length > 1
- Util.fatal_error( PRG_NAME, 'more than one file matching [' + phylogeny_id +
- '_] [' + suffix_pattern + '] present in current directory' )
+ Util.fatal_error( PRG_NAME, 'more than one seq file matching [' +
+ phylogeny_id + '__] present in current directory' )
end
matching_files[ 0 ]
end
+
def print_help()
puts( "Usage:" )
puts()
- puts( " " + PRG_NAME + ".rb [options] <suffix of intrees to be decorated> <suffix for decorated outtrees> " )
+ puts( " " + PRG_NAME + ".rb <suffix of intrees to be decorated> <suffix for decorated outtrees> " )
puts()
- puts( " options: -" + IDS_ONLY_OPTION + ": decorate with sequence/species names only" )
- puts( " -" + DOMAINS_ONLY_OPTION + ": decorate with domain structures" )
puts()
end
end # class PhylogenyiesDecorator