+ if in_msa.get_number_of_seqs < total_sequences
+ error_msg = "hmmscan output contains more protein sequences than fasta sequence file"
+ raise IOError, error_msg
+ end
+
+ log
+ log 'Domain architecture overview using default iE-cutoff ' +
+ IE_CUTOFF_FOR_DA_OVERVIEW.to_s + ' and relative length cutoff ' + REL_LEN_CUTOFF_FOR_DA_OVERVIEW.to_s + ':'
+
+ @encountered_domain_architectures = @encountered_domain_architectures.sort_by {|k, v| v}.reverse.to_h
+ counter = 1;
+ @encountered_domain_architectures.each do |k, v|
+ log counter.to_s.rjust(2) + ') ' + v.to_s.rjust(5) + ': ' + k
+ counter += 1
+ if counter > 40
+ break
+ end
+ end
+
+ log
+ log 'Passing domain arrangements of target domain(s):'
+ @passsing_domain_architectures = @passsing_domain_architectures.sort{|a, b|a<=>b}.to_h
+ passing_da_sum = 0
+ @passsing_domain_architectures.each do |da, count|
+ passing_da_sum += count
+ log count.to_s.rjust(4) + ': ' + da
+ end
+ log
+ log 'Failing domain arrangements of target domain(s):'
+ @failing_domain_architectures = @failing_domain_architectures.sort{|a, b|a<=>b}.to_h
+ failing_da_sum = 0
+ @failing_domain_architectures .each do |da, count|
+ failing_da_sum += count
+ log count.to_s.rjust(4) + ': ' + da
+ end
+ log
+ log 'Passing target domain(s):'
+ @passing_domains_data = @passing_domains_data.sort{|a, b|a<=>b}.to_h
+ @passing_domains_data.each do |n, d|
+ log d.to_str
+ end
+ log
+ log 'Failing target domain(s) (in proteins sequences with target domain architecture):'
+ @failing_domains_data = @failing_domains_data.sort{|a, b|a<=>b}.to_h
+ @failing_domains_data.each do |n, d|
+ log d.to_str
+ end
+
+ unless total_sequences == (passing_sequences.size + failing_sequences.size)
+ error_msg = "this should not have happened: total seqs not equal to passing plus failing seqs"
+ raise StandardError, error_msg
+ end
+
+ unless failing_sequences.size == (@failing_proteins_bc_not_all_target_doms_present + @failing_proteins_bc_missing_cutoffs)
+ error_msg = "this should not have happened: failing seqs sums not consistent"
+ raise StandardError, error_msg
+ end
+
+ unless @failing_proteins_bc_missing_cutoffs >= failing_da_sum
+ error_msg = "this should not have happened: failing seqs larger than failing da sum"
+ raise StandardError, error_msg
+ end
+
+ unless passing_sequences.size == passing_da_sum
+ error_msg = "this should not have happened: passing seqs not equal to passing da sum"
+ raise StandardError, error_msg
+ end
+
+ log
+ log "Protein sequences in sequence (fasta) file: " + in_msa.get_number_of_seqs.to_s.rjust(5)
+ log "Protein sequences in hmmscan output file : " + total_sequences.to_s.rjust(5)
+ log " Passing protein sequences : " + passing_sequences.size.to_s.rjust(5)
+ log " Failing protein sequences : " + failing_sequences.size.to_s.rjust(5)
+ log " Not all target domain present : " + @failing_proteins_bc_not_all_target_doms_present.to_s.rjust(5)
+ log " Target domain(s) failing cutoffs : " + @failing_proteins_bc_missing_cutoffs.to_s.rjust(5)
+ log
+