v 1.001
[jalview.git] / forester / ruby / evoruby / lib / evo / io / parser / hmmscan_multi_domain_extractor.rb
1 #
2 # = lib/evo/io/parser/hmmscan_domain_extractor.rb - HmmscanMultiDomainExtractor class
3 #
4 # Copyright::    Copyright (C) 2017 Christian M. Zmasek
5 # License::      GNU Lesser General Public License (LGPL)
6 #
7 # Last modified: 2017/03/08
8
9 ####
10 # Import: if multiple copies of same domain, thresholds need to be same!
11 ####
12
13 require 'lib/evo/util/constants'
14 require 'lib/evo/msa/msa_factory'
15 require 'lib/evo/io/msa_io'
16 require 'lib/evo/io/writer/fasta_writer'
17 require 'lib/evo/io/parser/fasta_parser'
18 require 'lib/evo/io/parser/hmmscan_parser'
19
20 module Evoruby
21   class HmmscanMultiDomainExtractor
22
23     TESTING = false
24     OUTPUT_ID = 'mdsx'
25     DOMAIN_DELIMITER = ' -- '
26
27     IE_CUTOFF_FOR_DA_OVERVIEW      = 1E-6
28     REL_LEN_CUTOFF_FOR_DA_OVERVIEW = 0.5
29
30     PASSING_FL_SEQS_SUFFIX    = "_#{OUTPUT_ID}_passing_full_length_seqs.fasta"
31     FAILING_FL_SEQS_SUFFIX    = "_#{OUTPUT_ID}_failing_full_length_seqs.fasta"
32     TARGET_DA_SUFFIX          = "_#{OUTPUT_ID}_target_da.fasta"
33     CONCAT_TARGET_DOM_SUFFIX  = "_#{OUTPUT_ID}_concat_target_doms.fasta"
34     TARGET_DOM_OUTPUT_MIDPART = "_#{OUTPUT_ID}_target_dom_"
35     LOG_FILE_SUFFIX           = "_#{OUTPUT_ID}.log"
36     def initialize
37       @passing_domains_data = nil
38       @failing_domains_data = nil
39       @failing_domain_architectures = nil
40       @passsing_domain_architectures = nil
41       @failing_proteins_bc_not_all_target_doms_present = nil
42       @failing_proteins_bc_missing_cutoffs = nil
43       @encountered_domain_architectures = nil
44       @log = nil
45     end
46
47     # raises ArgumentError, IOError, StandardError
48     def parse( target_da,
49       hmmscan_output,
50       fasta_sequence_file,
51       outfile_base,
52       log_str )
53
54       passing_fl_seqs_outfile   = outfile_base + PASSING_FL_SEQS_SUFFIX
55       failing_fl_seqs_outfile   = outfile_base + FAILING_FL_SEQS_SUFFIX
56       target_da_outfile         = outfile_base + TARGET_DA_SUFFIX
57       concat_target_dom_outfile = outfile_base + CONCAT_TARGET_DOM_SUFFIX
58       logfile                   = outfile_base + LOG_FILE_SUFFIX
59
60       Util.check_file_for_readability( hmmscan_output )
61       Util.check_file_for_readability( fasta_sequence_file )
62       Util.check_file_for_writability( passing_fl_seqs_outfile )
63       Util.check_file_for_writability( failing_fl_seqs_outfile )
64       Util.check_file_for_writability( target_da_outfile )
65       Util.check_file_for_writability( concat_target_dom_outfile )
66       Util.check_file_for_writability( logfile )
67
68       @log = log_str
69
70       in_msa = nil
71       factory = MsaFactory.new()
72       in_msa = factory.create_msa_from_file( fasta_sequence_file, FastaParser.new() )
73
74       if ( in_msa == nil || in_msa.get_number_of_seqs() < 1 )
75         error_msg = "could not find fasta sequences in " + fasta_sequence_file
76         raise IOError, error_msg
77       end
78
79       failed_seqs_msa = Msa.new
80       passed_seqs_msa = Msa.new
81
82       hmmscan_parser = HmmscanParser.new( hmmscan_output )
83       results = hmmscan_parser.parse
84
85       target_domain_ary = parse_da target_da
86
87       target_domains = Hash.new
88
89       target_domain_architecure = ''
90
91       target_domain_ary.each do |target_domain|
92         target_domains[target_domain.name] = target_domain
93         if target_domain_architecure.length > 0
94           target_domain_architecure << DOMAIN_DELIMITER
95         end
96         target_domain_architecure << target_domain.name
97       end
98
99       target_domain_architecure.freeze
100
101       log 'Hmmscan outputfile             : ' + hmmscan_output
102       log 'Full length fasta sequence file: ' + fasta_sequence_file
103       log 'Target domain architecture     : ' + target_domain_architecure
104       target_domain_ary.each do |x|
105         log x.to_str
106       end
107
108       target_domain_names = Set.new
109
110       target_domains.each_key {|key| target_domain_names.add( key ) }
111
112       prev_query_seq_name = nil
113       domains_in_query_seq = Array.new
114       passing_sequences = Array.new # This will be an Array of Array of HmmscanResult for the same query seq.
115       failing_sequences = Array.new # This will be an Array of Array of HmmscanResult for the same query seq.
116       total_sequences = 0
117
118       @failing_domain_architectures = Hash.new
119       @passsing_domain_architectures = Hash.new
120       @passing_domains_data = Hash.new
121       @failing_domains_data = Hash.new
122       @encountered_domain_architectures= Hash.new
123       @failing_proteins_bc_not_all_target_doms_present = 0
124       @failing_proteins_bc_missing_cutoffs = 0
125       out_domain_msas = Hash.new
126       out_domain_architecture_msa = Msa.new
127       out_concatenated_domains_msa = Msa.new
128       results.each do |hmmscan_result|
129         if ( prev_query_seq_name != nil ) && ( hmmscan_result.query != prev_query_seq_name )
130           if compare(domains_in_query_seq, target_domain_names, target_domains, in_msa, out_domain_msas, out_domain_architecture_msa, out_concatenated_domains_msa, target_domain_architecure)
131             passing_sequences.push(domains_in_query_seq)
132           else
133             failing_sequences.push(domains_in_query_seq)
134           end
135           domains_in_query_seq = Array.new
136           total_sequences += 1
137         end
138         prev_query_seq_name = hmmscan_result.query
139         domains_in_query_seq.push(hmmscan_result)
140       end # each result
141
142       if prev_query_seq_name != nil
143         if compare(domains_in_query_seq, target_domain_names, target_domains, in_msa, out_domain_msas, out_domain_architecture_msa, out_concatenated_domains_msa, target_domain_architecure)
144           passing_sequences.push(domains_in_query_seq)
145         else
146           failing_sequences.push(domains_in_query_seq)
147         end
148         total_sequences += 1
149       end
150
151       if in_msa.get_number_of_seqs < total_sequences
152         error_msg = "hmmscan output contains more protein sequences than fasta sequence file"
153         raise IOError, error_msg
154       end
155
156       log
157       log 'Domain architecture overview using default iE-cutoff ' +
158       IE_CUTOFF_FOR_DA_OVERVIEW.to_s + ' and relative length cutoff ' + REL_LEN_CUTOFF_FOR_DA_OVERVIEW.to_s + ':'
159
160       @encountered_domain_architectures = @encountered_domain_architectures.sort_by {|k, v| v}.reverse.to_h
161       counter = 1;
162       @encountered_domain_architectures.each do |k, v|
163         log counter.to_s.rjust(2) + ') ' +  v.to_s.rjust(5) + ': ' + k
164         counter += 1
165         if counter > 40
166           break
167         end
168       end
169
170       log
171       log 'Passing domain arrangements of target domain(s):'
172       @passsing_domain_architectures = @passsing_domain_architectures.sort{|a, b|a<=>b}.to_h
173       passing_da_sum = 0
174       @passsing_domain_architectures.each do |da, count|
175         passing_da_sum += count
176         log count.to_s.rjust(4) + ': ' + da
177       end
178       log
179       log 'Failing domain arrangements of target domain(s):'
180       @failing_domain_architectures = @failing_domain_architectures.sort{|a, b|a<=>b}.to_h
181       failing_da_sum = 0
182       @failing_domain_architectures .each do |da, count|
183         failing_da_sum += count
184         log count.to_s.rjust(4) + ': ' + da
185       end
186       log
187       log 'Passing target domain(s):'
188       @passing_domains_data = @passing_domains_data.sort{|a, b|a<=>b}.to_h
189       @passing_domains_data.each do |n, d|
190         log d.to_str
191       end
192       log
193       log'Failing target domain(s):'
194       @failing_domains_data = @failing_domains_data.sort{|a, b|a<=>b}.to_h
195       @failing_domains_data.each do |n, d|
196         log d.to_str
197       end
198
199       unless total_sequences == (passing_sequences.size + failing_sequences.size)
200         error_msg = "this should not have happened: total seqs not equal to passing plus failing seqs"
201         raise StandardError, error_msg
202       end
203
204       unless failing_sequences.size == (@failing_proteins_bc_not_all_target_doms_present + @failing_proteins_bc_missing_cutoffs)
205         error_msg = "this should not have happened: failing seqs sums not consistent"
206         raise StandardError, error_msg
207       end
208
209       unless @failing_proteins_bc_missing_cutoffs >= failing_da_sum
210         error_msg = "this should not have happened: failing seqs larger than failing da sum"
211         raise StandardError, error_msg
212       end
213
214       unless passing_sequences.size == passing_da_sum
215         error_msg = "this should not have happened: passing seqs not equal to passing da sum"
216         raise StandardError, error_msg
217       end
218
219       log
220       log "Protein sequences in sequence (fasta) file: " + in_msa.get_number_of_seqs.to_s.rjust(5)
221       log "Protein sequences in hmmscan output file  : " + total_sequences.to_s.rjust(5)
222       log "  Passing protein sequences               : " + passing_sequences.size.to_s.rjust(5)
223       log "  Failing protein sequences               : " + failing_sequences.size.to_s.rjust(5)
224       log "    Not all target domain present         : " + @failing_proteins_bc_not_all_target_doms_present.to_s.rjust(5)
225       log "    Target domain(s) failing cutoffs      : " + @failing_proteins_bc_missing_cutoffs.to_s.rjust(5)
226       log
227
228       out_domain_msas.keys.sort.each do |domain_name|
229         file_name = outfile_base + TARGET_DOM_OUTPUT_MIDPART + domain_name + '.fasta'
230         write_msa out_domain_msas[domain_name], file_name
231         log "Wrote passing target domain sequence for " +  domain_name.ljust(16) + ': ' + file_name
232       end
233
234       write_msa out_domain_architecture_msa, target_da_outfile
235       log 'Wrote target domain architecture                         : ' + target_da_outfile
236
237       write_msa out_concatenated_domains_msa, concat_target_dom_outfile
238       log 'Wrote concatenated target domain(s)                      : ' + concat_target_dom_outfile
239
240       passing_sequences.each do | domains |
241         query_name = domains[0].query
242         if (!TESTING) || (passed_seqs_msa.find_by_name_start( query_name, true ).length < 1)
243           add_sequence( query_name, in_msa, passed_seqs_msa )
244         else
245           error_msg = 'this should not have happened'
246           raise StandardError, error_msg
247         end
248       end
249
250       failing_sequences.each do | domains |
251         query_name = domains[0].query
252         if (!TESTING) || (failed_seqs_msa.find_by_name_start( query_name, true ).length < 1)
253           add_sequence( query_name, in_msa, failed_seqs_msa )
254         else
255           error_msg = 'this should not have happened'
256           raise StandardError, error_msg
257         end
258       end
259
260       write_msa passed_seqs_msa, passing_fl_seqs_outfile
261       log 'Wrote passing full length protein sequences              : ' + passing_fl_seqs_outfile
262       write_msa failed_seqs_msa, failing_fl_seqs_outfile
263       log 'Wrote failing full length protein sequences              : ' + failing_fl_seqs_outfile
264
265       begin
266         f = File.open( logfile, 'w' )
267         f.print( @log )
268         f.close
269       rescue Exception => e
270         Util.fatal_error( PRG_NAME, "error: " + e.to_s )
271       end
272       log 'Wrote log file                                           : ' + logfile
273
274     end # parse
275
276     private
277
278     # domains: Array of HmmscanResult
279     def collect(domains, ie_cutoff, rel_len_cutoff)
280       passed = Array.new
281       domains.each do |domain|
282         length = 1 + domain.env_to - domain.env_from
283         if (domain.i_e_value <= ie_cutoff) && (length >= (rel_len_cutoff * domain.tlen))
284           passed.push domain
285         end
286       end
287       passed.sort! { |a,b| a.ali_from <=> b.ali_from }
288       s = ''
289       passed.each do |domain|
290         if s.length > 0
291           s << DOMAIN_DELIMITER
292         end
293         s << domain.model
294       end
295       if s.length > 0
296         if @encountered_domain_architectures.has_key? s
297           @encountered_domain_architectures[s] = 1 + @encountered_domain_architectures[s]
298         else
299           @encountered_domain_architectures[s] = 1
300         end
301       end
302     end
303
304     # domains_in_query_seq: Array of HmmscanResult
305     # target_domain_names: Set of String
306     # target_domains: Hash String->TargetDomain
307     # target_domain_architecture: String
308     def compare(domains_in_query_seq,
309       target_domain_names,
310       target_domains,
311       in_msa,
312       out_domain_msas,
313       out_domain_architecture_msa,
314       out_contactenated_domains_msa,
315       target_domain_architecture)
316
317       collect(domains_in_query_seq, IE_CUTOFF_FOR_DA_OVERVIEW, REL_LEN_CUTOFF_FOR_DA_OVERVIEW)
318
319       domain_names_in_query_seq = Set.new
320       domains_in_query_seq.each do |domain|
321         domain_names_in_query_seq.add(domain.model)
322       end
323
324       passed_domains = Array.new
325       passed_domains_counts = Hash.new
326
327       if (domain_names_in_query_seq.length > 0) && (target_domain_names.subset?(domain_names_in_query_seq))
328
329         domains_in_query_seq.each do |domain|
330           if target_domains.has_key?(domain.model)
331             target_domain = target_domains[domain.model]
332
333             if target_domain.i_e_value >= 0
334               if domain.i_e_value > target_domain.i_e_value
335                 addToFailingDomainData(domain)
336                 next
337               end
338             end
339             if target_domain.abs_len > 0
340               length = 1 + domain.env_to - domain.env_from
341               if length < target_domain.abs_len
342                 addToFailingDomainData(domain)
343                 next
344               end
345             end
346             if target_domain.rel_len > 0
347               length = 1 + domain.env_to - domain.env_from
348               if length < (target_domain.rel_len * domain.tlen)
349                 addToFailingDomainData(domain)
350                 next
351               end
352             end
353
354             passed_domains.push(domain)
355
356             if (passed_domains_counts.key?(domain.model))
357               passed_domains_counts[domain.model] = passed_domains_counts[domain.model] + 1
358             else
359               passed_domains_counts[domain.model] = 1
360             end
361
362             addToPassingDomainData(domain)
363
364           end # if target_domains.has_key?(domain.model)
365         end # domains_in_query_seq.each do |domain|
366       else
367         @failing_proteins_bc_not_all_target_doms_present += 1
368         return false
369       end # target_domain_names.subset?(domain_names_in_query_seq)
370
371       if passed_domains.length < 1
372         @failing_proteins_bc_missing_cutoffs += 1
373         return false
374       end
375
376       passed_domains.sort! { |a,b| a.ali_from <=> b.ali_from }
377       # Compare architectures
378       if !compareArchitectures(target_domain_architecture, passed_domains, false)
379         @failing_proteins_bc_missing_cutoffs += 1
380         return false
381       end
382
383       domain_counter = Hash.new
384
385       min_env_from = 10000000
386       max_env_to = 0
387
388       query_seq = nil
389
390       concatenated_domains = ''
391
392       passed_domains.each do |domain|
393         domain_name = domain.model
394
395         unless out_domain_msas.has_key? domain_name
396           out_domain_msas[ domain_name ] = Msa.new
397         end
398
399         if domain_counter.key?(domain_name)
400           domain_counter[domain_name] = domain_counter[domain_name] + 1
401         else
402           domain_counter[domain_name] = 1
403         end
404
405         if query_seq == nil
406           query_seq = domain.query
407           query_seq.freeze
408         elsif query_seq != domain.query
409           error_msg = "seq names do not match: this should not have happened"
410           raise StandardError, error_msg
411         end
412
413         extracted_domain_seq = extract_domain( query_seq,
414         domain_counter[domain_name],
415         passed_domains_counts[domain_name],
416         domain.env_from,
417         domain.env_to,
418         in_msa,
419         out_domain_msas[ domain_name ] )
420
421         concatenated_domains += extracted_domain_seq
422
423         if domain.env_from < min_env_from
424           min_env_from = domain.env_from
425         end
426         if domain.env_to > max_env_to
427           max_env_to = domain.env_to
428         end
429       end
430
431       extract_domain( query_seq,
432       1,
433       1,
434       min_env_from,
435       max_env_to,
436       in_msa,
437       out_domain_architecture_msa )
438
439       out_contactenated_domains_msa.add( query_seq, concatenated_domains)
440
441       true
442     end
443
444     def addToPassingDomainData(domain)
445       unless ( @passing_domains_data.key?(domain.model))
446         @passing_domains_data[domain.model] = DomainData.new(domain.model)
447       end
448       @passing_domains_data[domain.model].add( domain.model, (1 + domain.env_to - domain.env_from), domain.i_e_value)
449     end
450
451     def addToFailingDomainData(domain)
452       unless ( @failing_domains_data.key?(domain.model))
453         @failing_domains_data[domain.model] = DomainData.new(domain.model)
454       end
455       @failing_domains_data[domain.model].add( domain.model, (1 + domain.env_to - domain.env_from), domain.i_e_value)
456     end
457
458     # passed_domains needs to be sorted!
459     def compareArchitectures(target_domain_architecture, passed_domains, strict = false)
460       domain_architecture = ''
461       passed_domains.each do |domain|
462         if domain_architecture.length > 0
463           domain_architecture += DOMAIN_DELIMITER
464         end
465         domain_architecture += domain.model
466       end
467       pass = false
468       if strict
469         pass = (target_domain_architecture == domain_architecture)
470       else
471         pass = (domain_architecture.index(target_domain_architecture) != nil)
472       end
473       if pass
474         if (@passsing_domain_architectures.key?(domain_architecture))
475           @passsing_domain_architectures[domain_architecture] = @passsing_domain_architectures[domain_architecture] + 1
476         else
477           @passsing_domain_architectures[domain_architecture] = 1
478         end
479       else
480         if ( @failing_domain_architectures.key?(domain_architecture))
481           @failing_domain_architectures[domain_architecture] =  @failing_domain_architectures[domain_architecture] + 1
482         else
483           @failing_domain_architectures[domain_architecture] = 1
484         end
485       end
486       return pass
487     end
488
489     def write_msa( msa, filename )
490       io = MsaIO.new()
491       w = FastaWriter.new()
492       w.set_line_width( 60 )
493       w.clean( true )
494       File.delete(filename) if File.exist?(filename)
495       begin
496         io.write_to_file( msa, filename, w )
497       rescue Exception
498         error_msg = "could not write to \"" + filename + "\""
499         raise IOError, error_msg
500       end
501     end
502
503     def add_sequence( sequence_name, in_msa, add_to_msa )
504       seqs = in_msa.find_by_name_start( sequence_name, true )
505       if ( seqs.length < 1 )
506         error_msg = "sequence \"" + sequence_name + "\" not found in sequence file"
507         raise StandardError, error_msg
508       end
509       if ( seqs.length > 1 )
510         error_msg = "sequence \"" + sequence_name + "\" not unique in sequence file"
511         raise StandardError, error_msg
512       end
513       seq = in_msa.get_sequence( seqs[ 0 ] )
514       add_to_msa.add_sequence( seq )
515     end
516
517     def extract_domain( seq_name,
518       number,
519       out_of,
520       seq_from,
521       seq_to,
522       in_msa,
523       out_msa)
524       if number < 1 || out_of < 1 || number > out_of
525         error_msg = "number=" + number.to_s + ", out of=" + out_of.to_s
526         raise StandardError, error_msg
527       end
528       if seq_from < 1 || seq_to < 1 || seq_from >= seq_to
529         error_msg = "impossible: seq-from=" + seq_from.to_s + ", seq-to=" + seq_to.to_s
530         raise StandardError, error_msg
531       end
532       seqs = in_msa.find_by_name_start(seq_name, true)
533       if seqs.length < 1
534         error_msg = "sequence \"" + seq_name + "\" not found in sequence file"
535         raise IOError, error_msg
536       end
537       if seqs.length > 1
538         error_msg = "sequence \"" + seq_name + "\" not unique in sequence file"
539         raise IOError, error_msg
540       end
541       # hmmscan is 1 based, whereas sequences are 0 bases in this package.
542       seq = in_msa.get_sequence( seqs[ 0 ] ).get_subsequence( seq_from - 1, seq_to - 1 )
543
544       orig_name = seq.get_name
545
546       seq.set_name( orig_name.split[ 0 ] )
547
548       if out_of != 1
549         seq.set_name( seq.get_name + "~" + number.to_s + "-" + out_of.to_s )
550       end
551
552       out_msa.add_sequence( seq )
553
554       seq.get_sequence_as_string
555     end
556
557     def parse_da( target_da_str )
558       target_domain_hash = Hash.new
559       target_domain_ary = Array.new
560       target_das = target_da_str.split '--'
561       target_das.each do |x|
562         inds = x.split '='
563         unless inds.size == 4
564           raise IOError, 'domain architecture is ill formatted: ' + x
565         end
566         target_domain_name = inds[0]
567         ie_cutoff = Float(inds[1])
568         abs_len_cutoff = Integer(inds[2])
569         rel_len_cutoff = Float(inds[3])
570         if target_domain_hash.has_key? target_domain_name
571           target_domain_ary.push target_domain_hash[target_domain_name]
572         else
573           td = TargetDomain.new(target_domain_name, ie_cutoff, abs_len_cutoff, rel_len_cutoff)
574           target_domain_hash[target_domain_name] = td
575           target_domain_ary.push td
576         end
577       end
578       target_domain_ary
579     end
580
581     def log(str = '')
582       puts str
583       @log << str << Constants::LINE_DELIMITER
584     end
585
586   end # class HmmscanMultiDomainExtractor
587
588   class DomainData
589     def initialize( name )
590       if (name == nil) || name.size < 1
591         error_msg = "domain name nil or empty"
592         raise IOError, error_msg
593       end
594       @name = name
595       @count = 0
596       @i_e_value_min = 10000000.0
597       @i_e_value_max = -1.0
598       @i_e_value_sum = 0.0
599       @len_min = 10000000
600       @len_max = -1
601       @len_sum = 0.0
602     end
603
604     def add( name, length, i_e_value)
605       if name != @name
606         error_msg = "domain names do not match"
607         raise IOError, error_msg
608       end
609
610       if length < 0
611         error_msg = "length cannot me negative"
612         raise IOError, error_msg
613       end
614       if i_e_value < 0
615         error_msg = "iE-value cannot me negative"
616         raise IOError, error_msg
617       end
618       @count += 1
619       @i_e_value_sum += i_e_value
620       @len_sum += length
621       if i_e_value > @i_e_value_max
622         @i_e_value_max = i_e_value
623       end
624       if i_e_value < @i_e_value_min
625         @i_e_value_min = i_e_value
626       end
627       if length > @len_max
628         @len_max = length
629       end
630       if length < @len_min
631         @len_min = length
632       end
633     end
634
635     def avg_length
636       if @count == 0
637         return 0
638       end
639       @len_sum / @count
640     end
641
642     def avg_i_e_value
643       if @count == 0
644         return 0
645       end
646       @i_e_value_sum / @count
647     end
648
649     def to_str
650       s = ''
651       s << @name.rjust(16) + ': '
652       s << @count.to_s.rjust(4) + '  '
653       s << avg_length.round(1).to_s.rjust(6) + ' '
654       s << @len_min.to_s.rjust(4) + ' -'
655       s << @len_max.to_s.rjust(4) + '  '
656       s << ("%.2E" % avg_i_e_value).rjust(9) + ' '
657       s << ("%.2E" % @i_e_value_min).rjust(9) + ' -'
658       s << ("%.2E" % @i_e_value_max).rjust(9) + ' '
659       s
660     end
661
662     attr_reader :name, :count, :i_e_value_min, :i_e_value_max, :length_min, :length_max
663
664   end
665
666   class TargetDomain
667     def initialize(name, i_e_value, abs_len, rel_len)
668       if (name == nil) || name.size < 1
669         error_msg = "target domain name nil or empty"
670         raise IOError, error_msg
671       end
672       if rel_len > 1
673         error_msg = name + ": target domain relative length is greater than 1"
674         raise IOError, error_msg
675       end
676       if (abs_len <= 0) && (rel_len <= 0)
677         error_msg = name + ": need to have either absolute length or relative length cutoff"
678         raise IOError, error_msg
679       end
680       if (abs_len > 0) && (rel_len > 0)
681         error_msg = name + ": cannot have both absolute length and relative length cutoff"
682         raise IOError, error_msg
683       end
684       @name = name
685       @i_e_value = i_e_value
686       @abs_len = abs_len
687       @rel_len = rel_len
688     end
689
690     def to_str
691       s = @name.rjust(16) + ':'
692       s << ' iE-cutoff: ' + ("%.2E" % @i_e_value).rjust(9)
693       if @abs_len > 0
694         s << ', abs len-cutoff: ' + @abs_len.to_s.rjust(4)
695       end
696       if @rel_len > 0
697         s << ', rel len-cutoff: ' + @rel_len.to_s.rjust(4)
698       end
699       s
700     end
701     attr_reader :name, :i_e_value, :abs_len, :rel_len
702   end
703
704 end # module Evoruby
705