From: cmzmasek@gmail.com Date: Fri, 14 Sep 2012 02:29:50 +0000 (+0000) Subject: in progress X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=96d1fea5f915d4e080e57a4eeb86966e29160737;p=jalview.git in progress --- diff --git a/forester/ruby/evoruby/lib/evo/apps/domain_sequence_extractor_new.rb b/forester/ruby/evoruby/lib/evo/apps/domain_sequence_extractor_new.rb index efa3968..098cf05 100644 --- a/forester/ruby/evoruby/lib/evo/apps/domain_sequence_extractor_new.rb +++ b/forester/ruby/evoruby/lib/evo/apps/domain_sequence_extractor_new.rb @@ -30,6 +30,7 @@ module Evoruby ADD_DOMAIN_NUMBER_OPTION = 'd' ADD_DOMAIN_NUMBER_OPTION_AS_DIGIT = 'dd' ADD_DOMAIN_NUMBER_OPTION_AS_LETTER = 'dl' + ADD_SPECIES = 's' TRIM_OPTION = 't' LOG_FILE_SUFFIX = '_domain_seq_extr.log' PASSED_SEQS_SUFFIX = '_domain_seq_extr_passed' @@ -112,6 +113,11 @@ module Evoruby add_domain_number_as_digit = true end + add_species = false + if cla.is_option_set? ADD_SPECIES + add_species = true + end + if ( add_domain_number_as_letter && add_domain_number_as_digit ) puts( "attempt to add domain number as letter and digit at the same time" ) print_help @@ -217,6 +223,7 @@ module Evoruby add_domain_number_as_digit, add_domain_number_as_letter, trim, + add_species, log ) rescue ArgumentError, IOError, StandardError => e Util.fatal_error( PRG_NAME, "error: " + e.to_s, STDOUT ) @@ -258,6 +265,7 @@ module Evoruby puts( " -" + ADD_DOMAIN_NUMBER_OPTION_AS_DIGIT + ": to add numbers to extracted domains as digit (example \"domain2\")" ) puts( " -" + ADD_DOMAIN_NUMBER_OPTION_AS_LETTER + ": to add numbers to extracted domains as letter (example \"domaina\")" ) puts( " -" + TRIM_OPTION + ": to remove the last 2 characters from sequence names" ) + puts( " -" + ADD_SPECIES + ": to add species [in brackets]" ) puts() end diff --git a/forester/ruby/evoruby/lib/evo/io/parser/hmmscan_domain_extractor.rb b/forester/ruby/evoruby/lib/evo/io/parser/hmmscan_domain_extractor.rb index bd3ae8b..3bf3fb7 100644 --- a/forester/ruby/evoruby/lib/evo/io/parser/hmmscan_domain_extractor.rb +++ b/forester/ruby/evoruby/lib/evo/io/parser/hmmscan_domain_extractor.rb @@ -37,6 +37,7 @@ module Evoruby add_domain_number_as_digit, add_domain_number_as_letter, trim_name, + add_species, log ) Util.check_file_for_readability( hmmsearch_output ) @@ -225,7 +226,8 @@ module Evoruby add_domain_number, add_domain_number_as_digit, add_domain_number_as_letter, - trim_name ) + trim_name, + add_species ) if ( number < 1 || out_of < 1 || number > out_of ) error_msg = "impossible: number=" + number.to_s + ", out of=" + out_of.to_s raise ArgumentError, error_msg @@ -246,7 +248,9 @@ module Evoruby # hmmsearch is 1 based, wheres sequences are 0 bases in this package. seq = in_msa.get_sequence( seqs[ 0 ] ).get_subsequence( seq_from - 1, seq_to - 1 ) - seq.set_name( seq.get_name.split[ 0 ] ) + orig_name = seq.get_name + + seq.set_name( orig_name.split[ 0 ] ) if add_position seq.set_name( seq.get_name + "_" + seq_from.to_s + "-" + seq_to.to_s ) @@ -268,6 +272,18 @@ module Evoruby elsif ( add_domain_number ) seq.set_name( seq.get_name + "~" + number.to_s + "-" + out_of.to_s ) end + + if add_species + a = orig_name.rindex "[" + b = orig_name.rindex "]" + unless a && b + error_msg = "species not found in " + orig_name + raise StandardError, error_msg + end + species = orig_name[ a .. b ] + seq.set_name( seq.get_name + " [" + species + "]" ) + end + end # if ( seq.get_name.length > 10 )