2 # = lib/evo/tool/multi_domain_seq_extractor - MultiDomainSeqExtractor class
4 # Copyright:: Copyright (C) 2017 Christian M. Zmasek
5 # License:: GNU Lesser General Public License (LGPL)
7 require 'lib/evo/util/constants'
8 require 'lib/evo/util/util'
9 require 'lib/evo/util/command_line_arguments'
10 require 'lib/evo/io/parser/hmmscan_multi_domain_extractor'
13 class MultiDomainSeqExtractor
17 PRG_DESC = "Extraction of multi domain sequences from hmmscan output"
19 WWW = "https://sites.google.com/site/cmzmasek/home/software/forester"
21 HELP_OPTION_1 = 'help'
25 Util.print_program_information( PRG_NAME,
32 if ( ARGV == nil || ( ARGV.length < 1 ) )
38 cla = CommandLineArguments.new( ARGV )
40 Util.fatal_error( PRG_NAME, "error: " + $!, STDOUT )
43 if ( cla.is_option_set?( HELP_OPTION_1 ) ||
44 cla.is_option_set?( HELP_OPTION_2 ) )
49 unless ( cla.get_number_of_files == 2 || cla.get_number_of_files == 3 )
54 allowed_opts = Array.new
56 disallowed = cla.validate_allowed_options_as_str( allowed_opts )
57 if ( disallowed.length > 0 )
58 Util.fatal_error( PRG_NAME,
59 "unknown option(s): " + disallowed,
63 domain_id = cla.get_file_name( 0 )
64 hmmscan_output = cla.get_file_name( 1 )
65 fasta_sequence_file = ""
68 if cla.get_number_of_files == 3
69 fasta_sequence_file = cla.get_file_name( 2 )
71 hmmscan_index = hmmscan_output.index(Constants::HMMSCAN)
72 if ( hmmscan_index != nil )
73 prefix = hmmscan_output[0 .. hmmscan_index-1 ]
74 suffix = Constants::ID_NORMALIZED_FASTA_FILE_SUFFIX
75 files = Dir.entries( "." )
76 matching_files = Util.get_matching_files( files, prefix, suffix)
77 if matching_files.length < 1
78 Util.fatal_error( PRG_NAME, 'no file matching [' + prefix +
79 '...' + suffix + '] present in current directory: need to indicate <file containing complete sequences in fasta format> as second argument' )
81 if matching_files.length > 1
82 Util.fatal_error( PRG_NAME, 'more than one file matching [' +
83 prefix + '...' + suffix + '] present in current directory: need to indicate <file containing complete sequences in fasta format> as second argument' )
85 fasta_sequence_file = matching_files[ 0 ]
87 Util.fatal_error( PRG_NAME, 'input files do not seem in format for standard analysis pipeline, need to explicitly indicate all' )
90 hmmscan_index = hmmscan_output.index(Constants::HMMSCAN)
91 if hmmscan_index != nil
92 outfile_base = hmmscan_output.sub(Constants::HMMSCAN, '_')
94 Util.fatal_error( PRG_NAME, 'input files do not seem in format for standard analysis pipeline, need to explicitly indicate all' )
99 log_str << PRG_NAME << Constants::LINE_DELIMITER
100 log_str << PRG_VERSION << Constants::LINE_DELIMITER
101 log_str << PRG_DESC << Constants::LINE_DELIMITER
102 log_str << PRG_DATE << Constants::LINE_DELIMITER
103 log_str << Constants::LINE_DELIMITER
106 parser = HmmscanMultiDomainExtractor.new()
107 parser.parse( domain_id,
112 rescue ArgumentError, IOError => e
113 Util.fatal_error( PRG_NAME, "error: " + e.to_s, STDOUT )
115 rescue Exception => e
117 Util.fatal_error( PRG_NAME, "unexpected exception: " + e.to_s, STDOUT )
121 Util.print_message( PRG_NAME, "OK" )
130 puts " " + PRG_NAME + ".rb <da> <hmmscan outputfile> [file containing complete sequences in fasta format]"
136 puts " " + PRG_NAME + ".rb "
141 end # class MultiDomainSeqExtractor