X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fruby%2Fevoruby%2Flib%2Fevo%2Ftool%2Fhmmscan_summary.rb;h=08de9c125e9385c009c34134d8a87d8e4cf90f0e;hb=156517428746b21481eb815bf860ffb6cb0314ef;hp=75f5dd8b80fc44d20be4dcc1df65c44b4e06f280;hpb=542a08505f0a84d5d1d17b3e3b3f4dea5690a029;p=jalview.git diff --git a/forester/ruby/evoruby/lib/evo/tool/hmmscan_summary.rb b/forester/ruby/evoruby/lib/evo/tool/hmmscan_summary.rb index 75f5dd8..08de9c1 100644 --- a/forester/ruby/evoruby/lib/evo/tool/hmmscan_summary.rb +++ b/forester/ruby/evoruby/lib/evo/tool/hmmscan_summary.rb @@ -6,7 +6,6 @@ # # $Id: hmmscan_parser.rb,v 1.5 2010/12/13 19:00:11 cmzmasek Exp $ # -# last modified: 121003 require 'set' @@ -14,19 +13,18 @@ require 'lib/evo/util/constants' require 'lib/evo/util/util' require 'lib/evo/util/command_line_arguments' require 'lib/evo/io/parser/hmmscan_parser' -require 'lib/evo/io/web/uniprotkb' module Evoruby class HmmscanSummary PRG_NAME = "hsp" - PRG_VERSION = "2.000" + PRG_VERSION = "2.002" PRG_DESC = "hmmscan summary" - PRG_DATE = "2012.10.23" - COPYRIGHT = "2012 Christian M Zmasek" - CONTACT = "phylosoft@gmail.com" - WWW = "www.phylosoft.org" + PRG_DATE = "130319" + COPYRIGHT = "2013 Christian M Zmasek" + CONTACT = "phyloxml@gmail.com" + WWW = "https://sites.google.com/site/cmzmasek/home/software/forester" DELIMITER_OPTION = "d" SPECIES_OPTION = "s" @@ -35,7 +33,6 @@ module Evoruby HMM_FOR_PROTEIN_OUTPUT = "m" IGNORE_DUF_OPTION = "i" PARSE_OUT_DESCRIPITION_OPTION = "a" - UNIPROT = "u" HELP_OPTION_1 = "help" HELP_OPTION_2 = "h" @@ -49,8 +46,6 @@ module Evoruby def run - - Util.print_program_information( PRG_NAME, PRG_VERSION, PRG_DESC, @@ -67,7 +62,7 @@ module Evoruby end if ( cla.is_option_set?( HELP_OPTION_1 ) || - cla.is_option_set?( HELP_OPTION_2 ) ) + cla.is_option_set?( HELP_OPTION_2 ) ) print_help exit( 0 ) end @@ -84,7 +79,6 @@ module Evoruby allowed_opts.push( IGNORE_DUF_OPTION ) allowed_opts.push( PARSE_OUT_DESCRIPITION_OPTION ) allowed_opts.push( HMM_FOR_PROTEIN_OUTPUT ) - allowed_opts.push( UNIPROT ) allowed_opts.push( SPECIES_OPTION ) disallowed = cla.validate_allowed_options_as_str( allowed_opts ) @@ -118,8 +112,6 @@ module Evoruby end end - - fs_e_value_threshold = -1.0 if ( cla.is_option_set?( FS_E_VALUE_THRESHOLD_OPTION ) ) begin @@ -141,17 +133,8 @@ module Evoruby end end - uniprot = "" - if ( cla.is_option_set?( UNIPROT ) ) - begin - uniprot = cla.get_option_value( UNIPROT ) - rescue ArgumentError => e - Util.fatal_error( PRG_NAME, "error: " + e.to_s, STDOUT ) - end - end - species = "HUMAN" - if ( cla.is_option_set?( SPECIES_OPTION ) ) + if ( cla.is_option_set?( SPECIES_OPTION ) ) begin species = cla.get_option_value( SPECIES_OPTION ) rescue ArgumentError => e @@ -201,9 +184,6 @@ module Evoruby if !hmm_for_protein_output.empty? puts( "HMM for proteins : " + hmm_for_protein_output ) end - if !uniprot.empty? - puts( "Uniprot : " + uniprot ) - end puts() begin @@ -215,14 +195,12 @@ module Evoruby parse_descriptions, fs_e_value_threshold, hmm_for_protein_output, - uniprot, species ) rescue IOError => e Util.fatal_error( PRG_NAME, "error: " + e.to_s, STDOUT ) end domain_counts = get_domain_counts() - puts puts( "domain counts (considering potential i-E-value threshold and ignoring of DUFs):" ) puts( "(number of different domains: " + domain_counts.length.to_s + ")" ) @@ -245,11 +223,8 @@ module Evoruby get_descriptions, fs_e_value_threshold, hmm_for_protein_output, - uniprot, species ) - - Util.check_file_for_readability( inpath ) Util.check_file_for_writability( outpath ) @@ -267,8 +242,6 @@ module Evoruby hmmscan_results_per_protein = [] - - prev_query = "" results.each do | r | @@ -279,21 +252,21 @@ module Evoruby env_to = r.env_to if ( ( i_e_value_threshold < 0.0 ) || ( i_e_value <= i_e_value_threshold ) ) && - ( !ignore_dufs || ( model !~ /^DUF\d+/ ) ) + ( !ignore_dufs || ( model !~ /^DUF\d+/ ) ) count_model( model ) outfile.print( query + - column_delimiter ) + column_delimiter ) if ( get_descriptions ) outfile.print( desc + - column_delimiter ) + column_delimiter ) end outfile.print( model + - column_delimiter + - env_from.to_s + - column_delimiter + - env_to.to_s + - column_delimiter + - i_e_value.to_s ) + column_delimiter + + env_from.to_s + + column_delimiter + + env_to.to_s + + column_delimiter + + i_e_value.to_s ) outfile.print( Constants::LINE_DELIMITER ) end @@ -304,7 +277,6 @@ module Evoruby fs_e_value_threshold, hmm_for_protein_output, i_e_value_threshold, - false, species ) end hmmscan_results_per_protein.clear @@ -320,20 +292,17 @@ module Evoruby end end end - puts hmm_for_protein_output - puts hmmscan_results_per_protein + if !hmm_for_protein_output.empty? && !hmmscan_results_per_protein.empty? process_hmmscan_results_per_protein( hmmscan_results_per_protein, fs_e_value_threshold, hmm_for_protein_output, i_e_value_threshold, - false, species ) end outfile.flush() outfile.close() - end # def parse def process_id( id ) @@ -343,8 +312,6 @@ module Evoruby id end - - def count_model( model ) if ( @domain_counts.has_key?( model ) ) count = @domain_counts[ model ].to_i @@ -359,10 +326,7 @@ module Evoruby fs_e_value_threshold, hmm_for_protein_output, i_e_value_threshold, - uniprotkb, - species ) - - puts "+" + species ) dc = 0 # filter according to i-Evalue threshold @@ -370,12 +334,14 @@ module Evoruby hmmscan_results_per_protein_filtered = [] hmmscan_results_per_protein.each do | r | + + if r.model == hmm_for_protein_output - if r.fs_e_value > fs_e_value_threshold + if fs_e_value_threshold > 0.0 && r.fs_e_value > fs_e_value_threshold return end end - if r.i_e_value <= i_e_value_threshold + if i_e_value_threshold <= 0 || r.i_e_value <= i_e_value_threshold hmmscan_results_per_protein_filtered << r if r.model == hmm_for_protein_output dc += 1 @@ -408,13 +374,6 @@ module Evoruby s << r.model + " " end s << "\t" - puts s - #e = UniprotKB::get_entry_by_id( process_id( own.query ) ) - - #if e != nil - # s << uniprot_annotation( e ) - # # s << "\uniprot_annotationt" - #end overview = make_overview( hmmscan_results_per_protein_filtered, hmm_for_protein_output ) @@ -424,7 +383,6 @@ module Evoruby prev_r = nil hmmscan_results_per_protein_filtered.each do | r | - if prev_r != nil s << make_interdomain_sequence( r.env_from - prev_r.env_to - 1 ) else @@ -438,23 +396,10 @@ module Evoruby s << "]" prev_r = r end - s << make_interdomain_sequence( own.qlen - prev_r.env_from, false ) + s << make_interdomain_sequence( own.qlen - prev_r.env_to, false ) puts s end - def uniprot_annotation( e ) - s = "" - pdb_ids = e.get_pdb_ids - if !pdb_ids.empty? - pdb_ids.each do | pdb | - s << pdb << ", " - end - else - s << "-" - end - s - end - def calc_linkers( hmmscan_results_per_protein_filtered, hmm_for_protein_output ) linkers = "" prev_r = nil @@ -509,7 +454,6 @@ module Evoruby end - def print_help() puts( "Usage:" ) puts() @@ -521,6 +465,7 @@ module Evoruby puts( " -" + IGNORE_DUF_OPTION + ": ignore DUFs" ) puts( " -" + FS_E_VALUE_THRESHOLD_OPTION + ": E-value threshold for full protein sequences, only for protein summary" ) puts( " -" + HMM_FOR_PROTEIN_OUTPUT + ": HMM for protein summary" ) + puts( " -" + SPECIES_OPTION + ": species for protein summary" ) puts() end