X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fruby%2Fevoruby%2Flib%2Fevo%2Ftool%2Fhmmscan_analysis.rb;h=f702b5558febcdd841404857935b5a7c032b950f;hb=7e37741c149bbf6c27555ab9cf92766155d3f4d3;hp=0aaab8465f38f347167cba6c420d9cb711e96efe;hpb=7c34ec9ee7fcf1d90dec18ecc07bd41b0c888747;p=jalview.git diff --git a/forester/ruby/evoruby/lib/evo/tool/hmmscan_analysis.rb b/forester/ruby/evoruby/lib/evo/tool/hmmscan_analysis.rb index 0aaab84..f702b55 100644 --- a/forester/ruby/evoruby/lib/evo/tool/hmmscan_analysis.rb +++ b/forester/ruby/evoruby/lib/evo/tool/hmmscan_analysis.rb @@ -21,10 +21,10 @@ module Evoruby class HmmscanAnalysis PRG_NAME = "hsa" - PRG_VERSION = "1.000" + PRG_VERSION = "1.001" PRG_DESC = "hmmscan analysis" - PRG_DATE = "130319" - COPYRIGHT = "2013 Christian M Zmasek" + PRG_DATE = "140128" + COPYRIGHT = "2014 Christian Zmasek" CONTACT = "phyloxml@gmail.com" WWW = "https://sites.google.com/site/cmzmasek/home/software/forester" @@ -63,7 +63,7 @@ module Evoruby exit( 0 ) end - if ( cla.get_number_of_files != 1 && cla.get_number_of_files != 3 ) + if ( cla.get_number_of_files != 2 && cla.get_number_of_files != 3 ) print_help exit( -1 ) end @@ -83,12 +83,12 @@ module Evoruby inpath = cla.get_file_name( 0 ) Util.check_file_for_readability( inpath ) - seq_file_path = nil - extraction_output = nil + seq_file_path = cla.get_file_name( 1 ) + Util.check_file_for_readability( seq_file_path ) + + extraction_output = nil if ( cla.get_number_of_files == 3 ) - seq_file_path = cla.get_file_name( 1 ) - Util.check_file_for_readability( seq_file_path ) extraction_output = cla.get_file_name( 2 ) if File.exist?( extraction_output ) Util.fatal_error( PRG_NAME, "error: [#{extraction_output}] already exists" ) @@ -129,7 +129,7 @@ module Evoruby if ( cla.is_option_set?( TARGET_MODELS ) ) begin hmm_for_protein_output = cla.get_option_value( TARGET_MODELS ) - target_models = hmm_for_protein_output.split( "/" ); + target_models = hmm_for_protein_output.split( "/" ) rescue ArgumentError => e Util.fatal_error( PRG_NAME, "error: " + e.to_s, STDOUT ) end @@ -138,13 +138,42 @@ module Evoruby x_models = [] if ( cla.is_option_set?( EXTRACTION ) ) begin - hmm_for_protein_output = cla.get_option_value( EXTRACTION ) - x_models = hmm_for_protein_output.split( "~" ); + x_models = cla.get_option_value( EXTRACTION ).split( "/" ) rescue ArgumentError => e Util.fatal_error( PRG_NAME, "error: " + e.to_s, STDOUT ) end end + + if i_e_value_threshold > 0 + puts "i E-value threshold:" + "\t" + i_e_value_threshold.to_s + else + puts "i E-value threshold:" + "\t" + "n/a" + end + + if fs_e_value_threshold > 0 + puts "fs E-value threshold:" + "\t" + fs_e_value_threshold.to_s + else + puts "fs E-value threshold:" + "\t" + "n/a" + end + + puts + + title = "ID" + "\t" + title << "SPECIES" + "\t" + target_models.each do | t | + title << t + "\t" + end + title << "LENGTH" + "\t" + title << "#DOMAINS" + "\t" + title << "DOMAINS" + "\t" + unless x_models.empty? + title << "LINKERS" + "\t" + end + title << "DA" + "\t" + title << "DETAILED DA" + puts title + begin parse( inpath, i_e_value_threshold, @@ -247,11 +276,13 @@ module Evoruby raise StandardError, "target hmms is empty" if target_hmms.length < 1 raise StandardError, "results is empty" if hmmscan_results_per_protein.length < 1 + raise StandardError, "msa is empty" if ( msa == nil || msa.get_number_of_seqs < 1 ) + # filter according to i-Evalue threshold # abort if fs Evalue too high - if fs_e_value_threshold >= 0.0 + if fs_e_value_threshold >= 0 hmmscan_results_per_protein.each do | r | target_hmms.each do | hmm | if r.model == hmm && r.fs_e_value > fs_e_value_threshold @@ -273,14 +304,13 @@ module Evoruby break end end - end end if matched.length < target_hmms.length return end - if hmmscan_results_per_protein_filtered.length < 1 + if hmmscan_results_per_protein_filtered.length < 1 return end @@ -310,13 +340,11 @@ module Evoruby species = nil ll = nil - if msa != nil - seq = get_sequence( query, msa ) - species = get_species( seq ) - raise StandardError, "could not get species" if species == nil || species.empty? - if x_models != nil && x_models.length > 0 - ll = extract_linker( hmmscan_results_per_protein_filtered, x_models, seq, extraction_output_file ) - end + seq = get_sequence( query, msa ) + species = get_species( seq ) + raise StandardError, "could not get species" if species == nil || species.empty? + if x_models != nil && !x_models.empty? + ll = extract_linker( hmmscan_results_per_protein_filtered, x_models, seq, extraction_output_file ) end s = query + "\t" @@ -332,13 +360,14 @@ module Evoruby s << r.model + " " end s << "\t" - if msa != nil - if ll != nil - s << ll.to_s - end + + if ll != nil + s << ll.to_s s << "\t" end - s << make_overview_da( hmmscan_results_per_protein_filtered ) + + + s << make_overview_da( hmmscan_results_per_protein_filtered ) s << "\t" s << make_detailed_da( hmmscan_results_per_protein_filtered, qlen ) puts s @@ -454,14 +483,15 @@ module Evoruby def print_help() - puts( "Usage:" ) - puts() - puts( " " + PRG_NAME + ".rb [options] " ) - puts() - puts( " options: -" + I_E_VALUE_THRESHOLD_OPTION + ": i-E-value threshold, default is no threshold" ) - puts( " -" + FS_E_VALUE_THRESHOLD_OPTION + ": E-value threshold for full protein sequences, only for protein summary" ) - puts( " -" + TARGET_MODELS + ": target HMMs" ) - puts() + puts "Usage:" + puts + puts " " + PRG_NAME + ".rb [options] [outputfile]" + puts + puts " options: -" + I_E_VALUE_THRESHOLD_OPTION + ": i-E-value threshold, default is no threshold" + puts " -" + FS_E_VALUE_THRESHOLD_OPTION + ": E-value threshold for full protein sequences, only for protein summary" + puts " -" + TARGET_MODELS + ": target HMMs (separated by /)" + puts " -" + EXTRACTION + ": to extract matching sequences to [outputfile]" + puts end end # class