2 # = lib/evo/apps/fasta_extractor.rb - FastaExtractor class
4 # Copyright:: Copyright (C) 2017 Christian M. Zmasek
5 # License:: GNU Lesser General Public License (LGPL)
7 require 'lib/evo/util/constants'
8 require 'lib/evo/util/util'
9 require 'lib/evo/util/command_line_arguments'
16 PRG_DESC = "extraction of molecular sequences from a fasta file"
18 WWW = "https://sites.google.com/site/cmzmasek/home/software/forester"
20 HELP_OPTION_1 = 'help'
24 Util.print_program_information( PRG_NAME,
31 if ( ARGV == nil || ( ARGV.length < 1 ) )
37 cla = CommandLineArguments.new( ARGV )
38 rescue ArgumentError => e
39 Util.fatal_error( PRG_NAME, "error: " + e.to_s )
42 if ( cla.is_option_set?( HELP_OPTION_1 ) ||
43 cla.is_option_set?( HELP_OPTION_2 ) )
48 if ( cla.get_number_of_files != 3 )
53 allowed_opts = Array.new
55 disallowed = cla.validate_allowed_options_as_str( allowed_opts )
56 if ( disallowed.length > 0 )
57 Util.fatal_error( PRG_NAME,
58 "unknown option(s): " + disallowed,
62 input_file = cla.get_file_name( 0 )
63 query = cla.get_file_name( 1 )
64 output_file = cla.get_file_name( 2 )
66 if !File.exist?( input_file )
67 Util.fatal_error( PRG_NAME, "error: input file [#{input_file}] does not exist" )
69 if File.exist?( output_file )
70 Util.fatal_error( PRG_NAME, "error: [#{output_file}] already exists" )
73 results = extract_sequences( query, input_file, output_file )
75 Util.print_message( PRG_NAME, "matched: " + results )
76 Util.print_message( PRG_NAME, "wrote: " + output_file )
77 Util.print_message( PRG_NAME, "OK" )
81 def extract_sequences( query, fasta_file, output_file )
82 output = File.open( output_file, "a" )
83 matching_state = false
86 File.open( fasta_file ) do | file |
87 while line = file.gets
88 if !Util.is_string_empty?( line )
92 STDOUT.write "\r#{matches}/#{total}"
100 matching_state = false
109 matches.to_s + "/" + total.to_s
115 puts( " " + PRG_NAME + ".rb <input fasta file> <query> <output file>" )
119 puts( " " + PRG_NAME + ".rb Pfam-A.fasta kinase kinases" )
123 end # class FastaExtractor