X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fruby%2Fevoruby%2Flib%2Fevo%2Ftool%2Fhmmscan_summary.rb;h=db82d4a8bebcde3e6f7253bf9a3809d4cff2347c;hb=258424df68a8b78b20d2ccf1baedf7697c935c57;hp=82b27823333a20b07a1c0747639484c5e29746da;hpb=fea8aa1f2d8d2eebfbbb57ede28edc003367c12f;p=jalview.git diff --git a/forester/ruby/evoruby/lib/evo/tool/hmmscan_summary.rb b/forester/ruby/evoruby/lib/evo/tool/hmmscan_summary.rb index 82b2782..db82d4a 100644 --- a/forester/ruby/evoruby/lib/evo/tool/hmmscan_summary.rb +++ b/forester/ruby/evoruby/lib/evo/tool/hmmscan_summary.rb @@ -8,11 +8,13 @@ # # last modified: 121003 +require 'set' + require 'lib/evo/util/constants' require 'lib/evo/util/util' require 'lib/evo/util/command_line_arguments' require 'lib/evo/io/parser/hmmscan_parser' -require 'lib/evo/io/parser/uniprot_parser' +require 'lib/evo/io/web/uniprotkb' module Evoruby @@ -21,7 +23,7 @@ module Evoruby PRG_NAME = "hsp" PRG_VERSION = "2.000" PRG_DESC = "hmmscan summary" - PRG_DATE = "2012.10.19" + PRG_DATE = "2012.10.23" COPYRIGHT = "2012 Christian M Zmasek" CONTACT = "phylosoft@gmail.com" WWW = "www.phylosoft.org" @@ -46,6 +48,8 @@ module Evoruby def run + + Util.print_program_information( PRG_NAME, PRG_VERSION, PRG_DESC, @@ -79,7 +83,7 @@ module Evoruby allowed_opts.push( IGNORE_DUF_OPTION ) allowed_opts.push( PARSE_OUT_DESCRIPITION_OPTION ) allowed_opts.push( HMM_FOR_PROTEIN_OUTPUT ) - allowed_opts.push( UNIPROT ) + allowed_opts.push( UNIPROT ) disallowed = cla.validate_allowed_options_as_str( allowed_opts ) if ( disallowed.length > 0 ) @@ -132,11 +136,11 @@ module Evoruby Util.fatal_error( PRG_NAME, "error: " + e.to_s, STDOUT ) end end - + uniprot = "" - if ( cla.is_option_set?( UNIPROT ) ) + if ( cla.is_option_set?( UNIPROT ) ) begin - uniprot = cla.get_option_value( UNIPROT ) + uniprot = cla.get_option_value( UNIPROT ) rescue ArgumentError => e Util.fatal_error( PRG_NAME, "error: " + e.to_s, STDOUT ) end @@ -175,15 +179,15 @@ module Evoruby else puts( "column delimiter : " + column_delimiter ) end - if fs_e_value_threshold >= 0.0 + if fs_e_value_threshold >= 0.0 puts( "E-value threshold : " + fs_e_value_threshold.to_s ) else puts( "E-value threshold : no threshold" ) end - if !hmm_for_protein_output.empty? + if !hmm_for_protein_output.empty? puts( "HMM for proteins : " + hmm_for_protein_output ) end - if !uniprot.empty? + if !uniprot.empty? puts( "Uniprot : " + uniprot ) end puts() @@ -198,7 +202,7 @@ module Evoruby fs_e_value_threshold, hmm_for_protein_output, uniprot ) - rescue ArgumentError, IOError => e + rescue IOError => e Util.fatal_error( PRG_NAME, "error: " + e.to_s, STDOUT ) end domain_counts = get_domain_counts() @@ -227,17 +231,15 @@ module Evoruby fs_e_value_threshold, hmm_for_protein_output, uniprot ) + + + Util.check_file_for_readability( inpath ) Util.check_file_for_writability( outpath ) hmmscan_parser = HmmscanParser.new( inpath ) results = hmmscan_parser.parse - - uniprot_entries = nil - if !uniprot.empty? - uniprot_entries = read_uniprot( results, uniprot ) - end - + outfile = File.open( outpath, "a" ) query = "" @@ -249,7 +251,7 @@ module Evoruby hmmscan_results_per_protein = [] - + prev_query = "" @@ -285,7 +287,8 @@ module Evoruby process_hmmscan_results_per_protein( hmmscan_results_per_protein, fs_e_value_threshold, hmm_for_protein_output, - i_e_value_threshold ) + i_e_value_threshold, + true ) end hmmscan_results_per_protein.clear end @@ -300,30 +303,28 @@ module Evoruby end end end - if !hmm_for_protein_output.empty? - if !hmmscan_results_per_protein.empty? - process_hmmscan_results_per_protein( hmmscan_results_per_protein, - fs_e_value_threshold, - hmm_for_protein_output, - i_e_value_threshold ) - end + if !hmm_for_protein_output.empty? && !hmmscan_results_per_protein.empty? + process_hmmscan_results_per_protein( hmmscan_results_per_protein, + fs_e_value_threshold, + hmm_for_protein_output, + i_e_value_threshold, + true ) end + outfile.flush() outfile.close() end # def parse - - def read_uniprot( hmmscan_results, uniprot ) - ids = [] - hmmscan_results.each do | r | - ids << r.query - end - uniprot_parser = UniprotParser.new uniprot - uniprot_entries = uniprot_parser.parse ids - uniprot_entries - end - + def process_id( id ) + if id =~ /(sp|tr)\|\S+\|(\S+)/ + id = $2 + end + id + end + + + def count_model( model ) if ( @domain_counts.has_key?( model ) ) count = @domain_counts[ model ].to_i @@ -337,7 +338,8 @@ module Evoruby def process_hmmscan_results_per_protein( hmmscan_results_per_protein, fs_e_value_threshold, hmm_for_protein_output, - i_e_value_threshold ) + i_e_value_threshold, + uniprotkb ) dc = 0 # filter according to i-Evalue threshold @@ -383,7 +385,34 @@ module Evoruby s << r.model + " " end s << "\t" + e = UniprotKB::get_entry_by_id( process_id( own.query ) ) + # if e != nil && e.de != nil + # e.de.each do |i| + # + # end + # else + # s << "-" + # end + s << "\t" + if e != nil && e.dr != nil + e.dr.each do | dr | + if dr != nil + if dr =~ /PDB;\s+([A-Z0-9]{4});/ + s << $1 + + end + end + end + else + s << "-" + end + s << "\t" + + + + + s << "\t" overview = make_overview( hmmscan_results_per_protein_filtered, hmm_for_protein_output ) s << overview + "\t"