From aa35322a551ab7813ee845799d64830a986a3fae Mon Sep 17 00:00:00 2001 From: cmzmasek Date: Fri, 10 Mar 2017 17:47:48 -0800 Subject: [PATCH] in progress --- .../io/parser/hmmscan_multi_domain_extractor.rb | 11 ++++---- .../evoruby/lib/evo/tool/domains_to_forester.rb | 4 +-- .../lib/evo/tool/multi_domain_seq_extractor.rb | 2 +- .../evoruby/lib/evo/tool/phylogenies_decorator.rb | 29 +++++++++++--------- .../evoruby/lib/evo/tool/taxonomy_processor.rb | 3 ++ 5 files changed, 28 insertions(+), 21 deletions(-) diff --git a/forester/ruby/evoruby/lib/evo/io/parser/hmmscan_multi_domain_extractor.rb b/forester/ruby/evoruby/lib/evo/io/parser/hmmscan_multi_domain_extractor.rb index fe62a42..f773e26 100644 --- a/forester/ruby/evoruby/lib/evo/io/parser/hmmscan_multi_domain_extractor.rb +++ b/forester/ruby/evoruby/lib/evo/io/parser/hmmscan_multi_domain_extractor.rb @@ -190,7 +190,7 @@ module Evoruby log d.to_str end log - log'Failing target domain(s):' + log 'Failing target domain(s) (in proteins sequences with target domain architecture):' @failing_domains_data = @failing_domains_data.sort{|a, b|a<=>b}.to_h @failing_domains_data.each do |n, d| log d.to_str @@ -560,13 +560,14 @@ module Evoruby target_das = target_da_str.split '--' target_das.each do |x| inds = x.split '=' - unless inds.size == 4 + unless inds.size == 1 || inds.size == 4 raise IOError, 'domain architecture is ill formatted: ' + x end + target_domain_name = inds[0] - ie_cutoff = Float(inds[1]) - abs_len_cutoff = Integer(inds[2]) - rel_len_cutoff = Float(inds[3]) + ie_cutoff = inds.size == 4 ? Float(inds[1]) : IE_CUTOFF_FOR_DA_OVERVIEW + abs_len_cutoff = inds.size == 4 ? Integer(inds[2]) : 0 + rel_len_cutoff = inds.size == 4 ? Float(inds[3]) : REL_LEN_CUTOFF_FOR_DA_OVERVIEW if target_domain_hash.has_key? target_domain_name target_domain_ary.push target_domain_hash[target_domain_name] else diff --git a/forester/ruby/evoruby/lib/evo/tool/domains_to_forester.rb b/forester/ruby/evoruby/lib/evo/tool/domains_to_forester.rb index 48459ae..a090d52 100644 --- a/forester/ruby/evoruby/lib/evo/tool/domains_to_forester.rb +++ b/forester/ruby/evoruby/lib/evo/tool/domains_to_forester.rb @@ -252,7 +252,7 @@ module Evoruby puts Util.print_message( PRG_NAME, "wrote: " + outfile ) - Util.print_message( PRG_NAME, "next step in standard analysis pipeline: dsx.rb") + Util.print_message( PRG_NAME, "next step in standard analysis pipeline: dsx.rb or mdsx.rb") Util.print_message( PRG_NAME, 'OK' ) puts @@ -269,7 +269,7 @@ module Evoruby puts( " options: -" + E_VALUE_THRESHOLD_OPTION + "=: E-value threshold, default is no threshold" ) puts( " -" + OVERWRITE_IF_SAME_FROM_TO_OPTION + " : overwrite domain with same start and end with domain with better E-value" ) puts - puts( " [next step in standard analysis pipeline: dsx.rb]") + puts( " [next step in standard analysis pipeline: dsx.rb or mdsx.rb]") puts() puts( "Examples:" ) puts diff --git a/forester/ruby/evoruby/lib/evo/tool/multi_domain_seq_extractor.rb b/forester/ruby/evoruby/lib/evo/tool/multi_domain_seq_extractor.rb index 506743a..b2f908d 100644 --- a/forester/ruby/evoruby/lib/evo/tool/multi_domain_seq_extractor.rb +++ b/forester/ruby/evoruby/lib/evo/tool/multi_domain_seq_extractor.rb @@ -13,7 +13,7 @@ module Evoruby class MultiDomainSeqExtractor PRG_NAME = "mdsx" - PRG_VERSION = "1.001" + PRG_VERSION = "1.002" PRG_DESC = "Extraction of multi domain sequences from hmmscan output" PRG_DATE = "2017/03/08" WWW = "https://sites.google.com/site/cmzmasek/home/software/forester" diff --git a/forester/ruby/evoruby/lib/evo/tool/phylogenies_decorator.rb b/forester/ruby/evoruby/lib/evo/tool/phylogenies_decorator.rb index 06c103e..5bc5bac 100644 --- a/forester/ruby/evoruby/lib/evo/tool/phylogenies_decorator.rb +++ b/forester/ruby/evoruby/lib/evo/tool/phylogenies_decorator.rb @@ -29,11 +29,11 @@ module Evoruby #DECORATOR_OPTIONS_DOMAINS = '-r=1' DECORATOR_OPTIONS_DOMAINS = '-p -t' IDS_MAPFILE_SUFFIX = '.nim' - DOMAINS_MAPFILE_SUFFIX = '_hmmscan_10.dff' + DOMAINS_MAPFILE_SUFFIX = '.dff' SLEEP_TIME = 0.05 REMOVE_NI = true IDS_ONLY = false #TODO this should be a command line option - FIXED_NIM_FILE = 'all.nim' #TODO this should be a command line option + FIXED_NIM_FILE = nil #'all.nim' #TODO this should be a command line option TMP_FILE_1 = '___PD1___' TMP_FILE_2 = '___PD2___' LOG_FILE = '00_phylogenies_decorator.log' @@ -165,8 +165,8 @@ module Evoruby Util.fatal_error( PRG_NAME, 'could not get id from ' + phylogeny_file.to_s ) end puts - Util.print_message( PRG_NAME, "id: " + phylogeny_id ) - log << "id: " + phylogeny_id + NL + Util.print_message( PRG_NAME, "Id: " + phylogeny_id ) + log << "Id: " + phylogeny_id + NL ids_mapfile_name = nil domains_mapfile_name = nil @@ -177,28 +177,35 @@ module Evoruby else ids_mapfile_name = FIXED_NIM_FILE end - + + Util.print_message( PRG_NAME, "Ids mapfile: " + ids_mapfile_name ) + log << "Ids mapfile: " + ids_mapfile_name + NL + unless IDS_ONLY domains_mapfile_name = get_file( files, phylogeny_id, DOMAINS_MAPFILE_SUFFIX ) seqs_file_name = get_seq_file( files, phylogeny_id ) + Util.print_message( PRG_NAME, "Domains file: " + domains_mapfile_name ) + log << "Domains file: " + domains_mapfile_name + NL + Util.print_message( PRG_NAME, "Seq file: " + seqs_file_name ) + log << "Seq file: " + seqs_file_name + NL end unless IDS_ONLY begin Util.check_file_for_readability( domains_mapfile_name ) - rescue ArgumentError + rescue IOError Util.fatal_error( PRG_NAME, 'failed to read from [#{domains_mapfile_name}]: ' + $! ) end begin Util.check_file_for_readability( seqs_file_name ) - rescue ArgumentError + rescue IOError Util.fatal_error( PRG_NAME, 'failed to read from [#{seqs_file_name }]: ' + $! ) end end begin Util.check_file_for_readability( ids_mapfile_name ) - rescue ArgumentError + rescue IOError Util.fatal_error( PRG_NAME, 'failed to read from [#{ids_mapfile_name}]: ' + $! ) end @@ -267,11 +274,7 @@ module Evoruby end def get_id( phylogeny_file_name ) - if phylogeny_file_name =~ /^(.+?_.+?)_/ - return $1 - elsif phylogeny_file_name =~ /^(.+?)__/ - return $1 - elsif phylogeny_file_name =~ /^(.+?)_/ + if phylogeny_file_name =~ /^(.+?)_/ return $1 end nil diff --git a/forester/ruby/evoruby/lib/evo/tool/taxonomy_processor.rb b/forester/ruby/evoruby/lib/evo/tool/taxonomy_processor.rb index 1d8a25b..b8f2cad 100644 --- a/forester/ruby/evoruby/lib/evo/tool/taxonomy_processor.rb +++ b/forester/ruby/evoruby/lib/evo/tool/taxonomy_processor.rb @@ -170,6 +170,8 @@ module Evoruby Util.print_message( PRG_NAME, "wrote: " + list_file ) Util.print_message( PRG_NAME, "wrote: " + output ) Util.print_message( PRG_NAME, "next steps in standard analysis pipeline: hmmscan followed by hsp.rb") + Util.print_message( PRG_NAME, "hmmscan example: hmmscan --max --domtblout P53_hmmscan_#{Constants::PFAM_V_FOR_EX}_10 -E 10 Pfam-A.hmm P53_ni.fasta") + Util.print_message( PRG_NAME, "OK" ) end @@ -209,6 +211,7 @@ module Evoruby puts( " -" + ANNOTATION_OPTION + "=: to add an annotation to all entries" ) puts() puts( " [next steps in standard analysis pipeline: hmmscan followed by hsp.rb]") + puts( " [hmmscan example: hmmscan --max --domtblout P53_hmmscan_#{Constants::PFAM_V_FOR_EX}_10 -E 10 Pfam-A.hmm P53_ni.fasta]") puts() puts( "Example:" ) puts() -- 1.7.10.2