X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fruby%2Fevoruby%2Flib%2Fevo%2Ftool%2Fphylogenies_decorator.rb;h=06c103e95b548e3cf88fbf0439aa6d82e87483dd;hb=2a165043be7f54dc4445bf4332a1af5283711fdf;hp=40106ba62cac94458f642d587fe5c71d2a1ea376;hpb=d2823363d9efbad977b5029c09c27ee09a4cbece;p=jalview.git diff --git a/forester/ruby/evoruby/lib/evo/tool/phylogenies_decorator.rb b/forester/ruby/evoruby/lib/evo/tool/phylogenies_decorator.rb index 40106ba..06c103e 100644 --- a/forester/ruby/evoruby/lib/evo/tool/phylogenies_decorator.rb +++ b/forester/ruby/evoruby/lib/evo/tool/phylogenies_decorator.rb @@ -2,12 +2,12 @@ # # = lib/evo/apps/phylogenies_decorator # -# Copyright:: Copyright (C) 2006-2008 Christian M. Zmasek -# License:: GNU Lesser General Public License (LGPL) +# Copyright:: Copyright (C) 2017 Christian M. Zmasek +# License:: GNU Lesser General Public License (LGPL) # -# decoration of phylogenies with sequence/species names and domain architectures +# Last modified: 2017/02/09 # -# $Id: phylogenies_decorator.rb,v 1.34 2010/12/13 19:00:11 cmzmasek Exp $ +# decoration of phylogenies with sequence/species names and domain architectures # # Environment variable FORESTER_HOME needs to point to the appropriate # directory (e.g. setenv FORESTER_HOME $HOME/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester/) @@ -15,16 +15,15 @@ require 'lib/evo/util/constants' require 'lib/evo/util/util' require 'lib/evo/util/command_line_arguments' - require 'date' module Evoruby - class PhylogeniesDecorator #DECORATOR_OPTIONS_SEQ_NAMES = '-r=1 -mdn' #DECORATOR_OPTIONS_SEQ_NAMES = '-p -t -sn' - DECORATOR_OPTIONS_SEQ_NAMES = '-p -t -tc -mp -or' + #DECORATOR_OPTIONS_SEQ_NAMES = '-p -t -tc -mp -or' + DECORATOR_OPTIONS_SEQ_NAMES = '-p -t -mp -or' # -mdn is a hidden expert option to rename e.g. "6_ORYLA3" to "6_[3]_ORYLA" #DECORATOR_OPTIONS_SEQ_NAMES = '-sn -r=1' #DECORATOR_OPTIONS_DOMAINS = '-r=1' @@ -33,6 +32,8 @@ module Evoruby DOMAINS_MAPFILE_SUFFIX = '_hmmscan_10.dff' SLEEP_TIME = 0.05 REMOVE_NI = true + IDS_ONLY = false #TODO this should be a command line option + FIXED_NIM_FILE = 'all.nim' #TODO this should be a command line option TMP_FILE_1 = '___PD1___' TMP_FILE_2 = '___PD2___' LOG_FILE = '00_phylogenies_decorator.log' @@ -40,29 +41,23 @@ module Evoruby JAVA_HOME = ENV[Constants::JAVA_HOME_ENV_VARIABLE] PRG_NAME = "phylogenies_decorator" - PRG_DATE = "2013.11.15" + PRG_DATE = "170209" PRG_DESC = "decoration of phylogenies with sequence/species names and domain architectures" PRG_VERSION = "1.02" - COPYRIGHT = "2013 Christian M Zmasek" - CONTACT = "phylosoft@gmail.com" WWW = "https://sites.google.com/site/cmzmasek/home/software/forester" - HELP_OPTION_1 = "help" HELP_OPTION_2 = "h" NL = Constants::LINE_DELIMITER - def run Util.print_program_information( PRG_NAME, - PRG_VERSION, - PRG_DESC, - PRG_DATE, - COPYRIGHT, - CONTACT, - WWW, - STDOUT ) + PRG_VERSION, + PRG_DESC, + PRG_DATE, + WWW, + STDOUT ) if ( ARGV == nil || ARGV.length > 3 || ARGV.length < 2 ) print_help @@ -92,7 +87,7 @@ module Evoruby end if ( cla.is_option_set?( HELP_OPTION_1 ) || - cla.is_option_set?( HELP_OPTION_2 ) ) + cla.is_option_set?( HELP_OPTION_2 ) ) print_help exit( 0 ) end @@ -123,10 +118,10 @@ module Evoruby log << 'input suffix : ' + in_suffix + NL log << 'output suffix : ' + out_suffix + NL - if ( File.exists?( TMP_FILE_1 ) ) + if ( File.exist?( TMP_FILE_1 ) ) File.delete( TMP_FILE_1 ) end - if ( File.exists?( TMP_FILE_2 ) ) + if ( File.exist?( TMP_FILE_2 ) ) File.delete( TMP_FILE_2 ) end @@ -136,10 +131,10 @@ module Evoruby files.each { | phylogeny_file | if ( !File.directory?( phylogeny_file ) && - phylogeny_file !~ /^\./ && - phylogeny_file !~ /^00/ && - phylogeny_file !~ /#{out_suffix}$/ && - phylogeny_file =~ /#{in_suffix}$/ ) + phylogeny_file !~ /^\./ && + phylogeny_file !~ /^00/ && + phylogeny_file !~ /#{out_suffix}$/ && + phylogeny_file =~ /#{in_suffix}$/ ) begin Util.check_file_for_readability( phylogeny_file ) rescue ArgumentError @@ -154,9 +149,9 @@ module Evoruby outfile = outfile.sub( /_ni_/, '_' ) end - if File.exists?( outfile ) + if File.exist?( outfile ) msg = counter.to_s + ': ' + phylogeny_file + ' -> ' + outfile + - ' : already exists, skipping' + ' : already exists, skipping' Util.print_message( PRG_NAME, msg ) log << msg + NL next @@ -177,14 +172,28 @@ module Evoruby domains_mapfile_name = nil seqs_file_name = nil - ids_mapfile_name = get_file( files, phylogeny_id, IDS_MAPFILE_SUFFIX ) - domains_mapfile_name = get_file( files, phylogeny_id, DOMAINS_MAPFILE_SUFFIX ) - seqs_file_name = get_seq_file( files, phylogeny_id ) + if ( FIXED_NIM_FILE == nil ) + ids_mapfile_name = get_file( files, phylogeny_id, IDS_MAPFILE_SUFFIX ) + else + ids_mapfile_name = FIXED_NIM_FILE + end + + unless IDS_ONLY + domains_mapfile_name = get_file( files, phylogeny_id, DOMAINS_MAPFILE_SUFFIX ) + seqs_file_name = get_seq_file( files, phylogeny_id ) + end - begin - Util.check_file_for_readability( domains_mapfile_name ) - rescue ArgumentError - Util.fatal_error( PRG_NAME, 'failed to read from [#{domains_mapfile_name}]: ' + $! ) + unless IDS_ONLY + begin + Util.check_file_for_readability( domains_mapfile_name ) + rescue ArgumentError + Util.fatal_error( PRG_NAME, 'failed to read from [#{domains_mapfile_name}]: ' + $! ) + end + begin + Util.check_file_for_readability( seqs_file_name ) + rescue ArgumentError + Util.fatal_error( PRG_NAME, 'failed to read from [#{seqs_file_name }]: ' + $! ) + end end begin @@ -193,45 +202,51 @@ module Evoruby Util.fatal_error( PRG_NAME, 'failed to read from [#{ids_mapfile_name}]: ' + $! ) end - begin - Util.check_file_for_readability( seqs_file_name ) - rescue ArgumentError - Util.fatal_error( PRG_NAME, 'failed to read from [#{seqs_file_name }]: ' + $! ) - end - - cmd = decorator + - ' -t -p -f=m ' + phylogeny_file + ' ' + - seqs_file_name + ' ' + TMP_FILE_1 - puts cmd - begin - execute_cmd( cmd, log ) - rescue Error - Util.fatal_error( PRG_NAME, 'error: ' + $! ) + unless IDS_ONLY + cmd = decorator + + ' -t -p -f=m ' + phylogeny_file + ' ' + + seqs_file_name + ' ' + TMP_FILE_1 + puts cmd + begin + execute_cmd( cmd, log ) + rescue Error + Util.fatal_error( PRG_NAME, 'error: ' + $! ) + end + + cmd = decorator + ' ' + DECORATOR_OPTIONS_DOMAINS + ' ' + + '-f=d ' + TMP_FILE_1 + ' ' + + domains_mapfile_name + ' ' + TMP_FILE_2 + puts cmd + begin + execute_cmd( cmd, log ) + rescue Error + Util.fatal_error( PRG_NAME, 'error: ' + $! ) + end end - cmd = decorator + ' ' + DECORATOR_OPTIONS_DOMAINS + ' ' + - '-f=d ' + TMP_FILE_1 + ' ' + - domains_mapfile_name + ' ' +TMP_FILE_2 - puts cmd - begin - execute_cmd( cmd, log ) - rescue Error - Util.fatal_error( PRG_NAME, 'error: ' + $! ) + if IDS_ONLY + cmd = decorator + ' ' + DECORATOR_OPTIONS_SEQ_NAMES + ' ' + + '-f=n ' + phylogeny_file + ' ' + + ids_mapfile_name + ' ' + outfile + puts cmd + begin + execute_cmd( cmd, log ) + rescue Error + Util.fatal_error( PRG_NAME, 'error: ' + $! ) + end + else + cmd = decorator + ' ' + DECORATOR_OPTIONS_SEQ_NAMES + ' ' + + '-f=n ' + TMP_FILE_2 + ' ' + + ids_mapfile_name + ' ' + outfile + puts cmd + begin + execute_cmd( cmd, log ) + rescue Error + Util.fatal_error( PRG_NAME, 'error: ' + $! ) + end + File.delete( TMP_FILE_1 ) + File.delete( TMP_FILE_2 ) end - - cmd = decorator + ' ' + DECORATOR_OPTIONS_SEQ_NAMES + ' ' + - '-f=n ' + TMP_FILE_2 + ' ' + - ids_mapfile_name + ' ' + outfile - puts cmd - begin - execute_cmd( cmd, log ) - rescue Error - Util.fatal_error( PRG_NAME, 'error: ' + $! ) - end - - File.delete( TMP_FILE_1 ) - File.delete( TMP_FILE_2 ) - end } open( LOG_FILE, 'w' ) do | f | @@ -243,7 +258,7 @@ module Evoruby end # def run def execute_cmd( cmd, log ) - log << 'excuting ' + cmd + NL + log << 'executing ' + cmd + NL IO.popen( cmd , 'r+' ) do | pipe | pipe.close_write log << pipe.read + NL + NL @@ -251,9 +266,8 @@ module Evoruby sleep( SLEEP_TIME ) end - def get_id( phylogeny_file_name ) - if phylogeny_file_name =~ /^(.+_.+)_/ + if phylogeny_file_name =~ /^(.+?_.+?)_/ return $1 elsif phylogeny_file_name =~ /^(.+?)__/ return $1 @@ -264,24 +278,14 @@ module Evoruby end def get_file( files_in_dir, phylogeny_id, suffix_pattern ) - matching_files = Array.new - - files_in_dir.each { | file | - - if ( !File.directory?( file ) && - file !~ /^\./ && - file !~ /^00/ && - file =~ /^#{phylogeny_id}.*#{suffix_pattern}$/ ) - matching_files << file - end - } + matching_files = Util.get_matching_files( files_in_dir, phylogeny_id, suffix_pattern ) if matching_files.length < 1 Util.fatal_error( PRG_NAME, 'no file matching [' + phylogeny_id + - '...' + suffix_pattern + '] present in current directory' ) + '...' + suffix_pattern + '] present in current directory' ) end if matching_files.length > 1 Util.fatal_error( PRG_NAME, 'more than one file matching [' + - phylogeny_id + '...' + suffix_pattern + '] present in current directory' ) + phylogeny_id + '...' + suffix_pattern + '] present in current directory' ) end matching_files[ 0 ] end @@ -292,25 +296,24 @@ module Evoruby files_in_dir.each { | file | if ( !File.directory?( file ) && - file !~ /^\./ && - file !~ /^00/ && - ( file =~ /^#{phylogeny_id}__.+\d$/ || file =~ /^#{phylogeny_id}_.*\.fasta$/ ) ) + file !~ /^\./ && + file !~ /^00/ && + ( file =~ /^#{phylogeny_id}__.+\d$/ || file =~ /^#{phylogeny_id}_.*\.fasta$/ ) ) matching_files << file end } if matching_files.length < 1 Util.fatal_error( PRG_NAME, 'no seq file matching [' + - phylogeny_id + '_] present in current directory' ) + phylogeny_id + '_] present in current directory' ) end if matching_files.length > 1 Util.fatal_error( PRG_NAME, 'more than one seq file matching [' + - phylogeny_id + '_] present in current directory' ) + phylogeny_id + '_] present in current directory' ) end matching_files[ 0 ] end - def print_help() puts( "Usage:" ) puts()