log d.to_str
end
log
- log'Failing target domain(s):'
+ log 'Failing target domain(s) (in proteins sequences with target domain architecture):'
@failing_domains_data = @failing_domains_data.sort{|a, b|a<=>b}.to_h
@failing_domains_data.each do |n, d|
log d.to_str
target_das = target_da_str.split '--'
target_das.each do |x|
inds = x.split '='
- unless inds.size == 4
+ unless inds.size == 1 || inds.size == 4
raise IOError, 'domain architecture is ill formatted: ' + x
end
+
target_domain_name = inds[0]
- ie_cutoff = Float(inds[1])
- abs_len_cutoff = Integer(inds[2])
- rel_len_cutoff = Float(inds[3])
+ ie_cutoff = inds.size == 4 ? Float(inds[1]) : IE_CUTOFF_FOR_DA_OVERVIEW
+ abs_len_cutoff = inds.size == 4 ? Integer(inds[2]) : 0
+ rel_len_cutoff = inds.size == 4 ? Float(inds[3]) : REL_LEN_CUTOFF_FOR_DA_OVERVIEW
if target_domain_hash.has_key? target_domain_name
target_domain_ary.push target_domain_hash[target_domain_name]
else
puts
Util.print_message( PRG_NAME, "wrote: " + outfile )
- Util.print_message( PRG_NAME, "next step in standard analysis pipeline: dsx.rb")
+ Util.print_message( PRG_NAME, "next step in standard analysis pipeline: dsx.rb or mdsx.rb")
Util.print_message( PRG_NAME, 'OK' )
puts
puts( " options: -" + E_VALUE_THRESHOLD_OPTION + "=<f>: E-value threshold, default is no threshold" )
puts( " -" + OVERWRITE_IF_SAME_FROM_TO_OPTION + " : overwrite domain with same start and end with domain with better E-value" )
puts
- puts( " [next step in standard analysis pipeline: dsx.rb]")
+ puts( " [next step in standard analysis pipeline: dsx.rb or mdsx.rb]")
puts()
puts( "Examples:" )
puts
#DECORATOR_OPTIONS_DOMAINS = '-r=1'
DECORATOR_OPTIONS_DOMAINS = '-p -t'
IDS_MAPFILE_SUFFIX = '.nim'
- DOMAINS_MAPFILE_SUFFIX = '_hmmscan_10.dff'
+ DOMAINS_MAPFILE_SUFFIX = '.dff'
SLEEP_TIME = 0.05
REMOVE_NI = true
IDS_ONLY = false #TODO this should be a command line option
- FIXED_NIM_FILE = 'all.nim' #TODO this should be a command line option
+ FIXED_NIM_FILE = nil #'all.nim' #TODO this should be a command line option
TMP_FILE_1 = '___PD1___'
TMP_FILE_2 = '___PD2___'
LOG_FILE = '00_phylogenies_decorator.log'
Util.fatal_error( PRG_NAME, 'could not get id from ' + phylogeny_file.to_s )
end
puts
- Util.print_message( PRG_NAME, "id: " + phylogeny_id )
- log << "id: " + phylogeny_id + NL
+ Util.print_message( PRG_NAME, "Id: " + phylogeny_id )
+ log << "Id: " + phylogeny_id + NL
ids_mapfile_name = nil
domains_mapfile_name = nil
else
ids_mapfile_name = FIXED_NIM_FILE
end
-
+
+ Util.print_message( PRG_NAME, "Ids mapfile: " + ids_mapfile_name )
+ log << "Ids mapfile: " + ids_mapfile_name + NL
+
unless IDS_ONLY
domains_mapfile_name = get_file( files, phylogeny_id, DOMAINS_MAPFILE_SUFFIX )
seqs_file_name = get_seq_file( files, phylogeny_id )
+ Util.print_message( PRG_NAME, "Domains file: " + domains_mapfile_name )
+ log << "Domains file: " + domains_mapfile_name + NL
+ Util.print_message( PRG_NAME, "Seq file: " + seqs_file_name )
+ log << "Seq file: " + seqs_file_name + NL
end
unless IDS_ONLY
begin
Util.check_file_for_readability( domains_mapfile_name )
- rescue ArgumentError
+ rescue IOError
Util.fatal_error( PRG_NAME, 'failed to read from [#{domains_mapfile_name}]: ' + $! )
end
begin
Util.check_file_for_readability( seqs_file_name )
- rescue ArgumentError
+ rescue IOError
Util.fatal_error( PRG_NAME, 'failed to read from [#{seqs_file_name }]: ' + $! )
end
end
begin
Util.check_file_for_readability( ids_mapfile_name )
- rescue ArgumentError
+ rescue IOError
Util.fatal_error( PRG_NAME, 'failed to read from [#{ids_mapfile_name}]: ' + $! )
end
end
def get_id( phylogeny_file_name )
- if phylogeny_file_name =~ /^(.+?_.+?)_/
- return $1
- elsif phylogeny_file_name =~ /^(.+?)__/
- return $1
- elsif phylogeny_file_name =~ /^(.+?)_/
+ if phylogeny_file_name =~ /^(.+?)_/
return $1
end
nil
Util.print_message( PRG_NAME, "wrote: " + list_file )
Util.print_message( PRG_NAME, "wrote: " + output )
Util.print_message( PRG_NAME, "next steps in standard analysis pipeline: hmmscan followed by hsp.rb")
+ Util.print_message( PRG_NAME, "hmmscan example: hmmscan --max --domtblout P53_hmmscan_#{Constants::PFAM_V_FOR_EX}_10 -E 10 Pfam-A.hmm P53_ni.fasta")
+
Util.print_message( PRG_NAME, "OK" )
end
puts( " -" + ANNOTATION_OPTION + "=<s>: to add an annotation to all entries" )
puts()
puts( " [next steps in standard analysis pipeline: hmmscan followed by hsp.rb]")
+ puts( " [hmmscan example: hmmscan --max --domtblout P53_hmmscan_#{Constants::PFAM_V_FOR_EX}_10 -E 10 Pfam-A.hmm P53_ni.fasta]")
puts()
puts( "Example:" )
puts()