2 # = lib/evo/apps/fasta_extractor.rb - FastaExtractor class
4 # Copyright:: Copyright (C) 2006-2008 Christian M. Zmasek
5 # License:: GNU Lesser General Public License (LGPL)
7 # $Id: fasta_extractor.rb,v 1.2 2010/12/13 19:00:11 cmzmasek Exp $
10 require 'lib/evo/util/util'
11 require 'lib/evo/util/constants'
12 require 'lib/evo/util/command_line_arguments'
21 PRG_DESC = "extraction of nucleotide sequences from a fasta file by names from wublast search"
22 PRG_DATE = "2008.08.09"
23 COPYRIGHT = "2008-2009 Christian M Zmasek"
24 CONTACT = "phylosoft@gmail.com"
25 WWW = "www.phylosoft.org"
26 HELP_OPTION_1 = 'help'
32 Util.print_program_information( PRG_NAME,
41 ld = Constants::LINE_DELIMITER
44 cla = CommandLineArguments.new( ARGV )
45 rescue ArgumentError => e
46 Util.fatal_error( PRG_NAME, "error: " + e.to_s )
49 if ( cla.is_option_set?( HELP_OPTION_1 ) ||
50 cla.is_option_set?( HELP_OPTION_2 ) )
55 if ( cla.get_number_of_files != 3 )
60 allowed_opts = Array.new
62 disallowed = cla.validate_allowed_options_as_str( allowed_opts )
63 if ( disallowed.length > 0 )
64 Util.fatal_error( PRG_NAME,
65 "unknown option(s): " + disallowed,
69 input_file = cla.get_file_name( 0 )
70 names_file = cla.get_file_name( 1 )
71 output_file = cla.get_file_name( 2 )
73 if !File.exist?( input_file )
74 Util.fatal_error( PRG_NAME, "error: input file [#{input_file}] does not exist" )
76 if !File.exist?( names_file )
77 Util.fatal_error( PRG_NAME, "error: names file [#{names_file}] does not exist" )
79 if File.exist?( output_file )
80 Util.fatal_error( PRG_NAME, "error: [#{output_file }] already exists" )
83 names = extract_names_with_frames( names_file )
85 extract_sequences( names, input_file, output_file )
88 Util.print_message( PRG_NAME, "OK" )
94 def extract_names_with_frames( names_file )
96 File.open( names_file ) do | file |
97 while line = file.gets
98 if ( !Util.is_string_empty?( line ) && !(line =~ /\s*#/ ) )
99 if ( line =~ /(\S+)\s+([+|-]\d)\s+\d+\s+(\S+)/ )
103 names[ name ] = "[" + frame + "] [" + e + "]"
111 def extract_sequences( names, fasta_file, output_file )
112 output = File.open( output_file, "a" )
113 matching_state = false
115 File.open( fasta_file ) do | file |
116 while line = file.gets
117 if !Util.is_string_empty?( line )
118 if ( line =~ /\s*>\s*(.+)/ )
120 if names.has_key?( name )
121 matching_state = true
123 puts counter.to_s + ". " +name + " " + names[ name ]
124 output.print( ">" + name + " " + names[ name ] )
125 output.print( Evoruby::Constants::LINE_DELIMITER )
127 matching_state = false
141 puts( " " + PRG_NAME + ".rb <input fasta file> <names file based on blast output> <output file>" )
145 end # class FastaExtractor