ADD_DOMAIN_NUMBER_OPTION = 'd'
ADD_DOMAIN_NUMBER_OPTION_AS_DIGIT = 'dd'
ADD_DOMAIN_NUMBER_OPTION_AS_LETTER = 'dl'
+ ADD_SPECIES = 's'
TRIM_OPTION = 't'
LOG_FILE_SUFFIX = '_domain_seq_extr.log'
PASSED_SEQS_SUFFIX = '_domain_seq_extr_passed'
add_domain_number_as_digit = true
end
+ add_species = false
+ if cla.is_option_set? ADD_SPECIES
+ add_species = true
+ end
+
if ( add_domain_number_as_letter && add_domain_number_as_digit )
puts( "attempt to add domain number as letter and digit at the same time" )
print_help
add_domain_number_as_digit,
add_domain_number_as_letter,
trim,
+ add_species,
log )
rescue ArgumentError, IOError, StandardError => e
Util.fatal_error( PRG_NAME, "error: " + e.to_s, STDOUT )
puts( " -" + ADD_DOMAIN_NUMBER_OPTION_AS_DIGIT + ": to add numbers to extracted domains as digit (example \"domain2\")" )
puts( " -" + ADD_DOMAIN_NUMBER_OPTION_AS_LETTER + ": to add numbers to extracted domains as letter (example \"domaina\")" )
puts( " -" + TRIM_OPTION + ": to remove the last 2 characters from sequence names" )
+ puts( " -" + ADD_SPECIES + ": to add species [in brackets]" )
puts()
end
add_domain_number_as_digit,
add_domain_number_as_letter,
trim_name,
+ add_species,
log )
Util.check_file_for_readability( hmmsearch_output )
add_domain_number,
add_domain_number_as_digit,
add_domain_number_as_letter,
- trim_name )
+ trim_name,
+ add_species )
if ( number < 1 || out_of < 1 || number > out_of )
error_msg = "impossible: number=" + number.to_s + ", out of=" + out_of.to_s
raise ArgumentError, error_msg
# hmmsearch is 1 based, wheres sequences are 0 bases in this package.
seq = in_msa.get_sequence( seqs[ 0 ] ).get_subsequence( seq_from - 1, seq_to - 1 )
- seq.set_name( seq.get_name.split[ 0 ] )
+ orig_name = seq.get_name
+
+ seq.set_name( orig_name.split[ 0 ] )
if add_position
seq.set_name( seq.get_name + "_" + seq_from.to_s + "-" + seq_to.to_s )
elsif ( add_domain_number )
seq.set_name( seq.get_name + "~" + number.to_s + "-" + out_of.to_s )
end
+
+ if add_species
+ a = orig_name.rindex "["
+ b = orig_name.rindex "]"
+ unless a && b
+ error_msg = "species not found in " + orig_name
+ raise StandardError, error_msg
+ end
+ species = orig_name[ a .. b ]
+ seq.set_name( seq.get_name + " [" + species + "]" )
+ end
+
end
# if ( seq.get_name.length > 10 )