git://source.jalview.org
/
jalview.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
inprogress
[jalview.git]
/
forester
/
ruby
/
evoruby
/
lib
/
evo
/
tool
/
hmmscan_summary.rb
diff --git
a/forester/ruby/evoruby/lib/evo/tool/hmmscan_summary.rb
b/forester/ruby/evoruby/lib/evo/tool/hmmscan_summary.rb
index
1563a82
..
08de9c1
100644
(file)
--- a/
forester/ruby/evoruby/lib/evo/tool/hmmscan_summary.rb
+++ b/
forester/ruby/evoruby/lib/evo/tool/hmmscan_summary.rb
@@
-6,7
+6,6
@@
#
# $Id: hmmscan_parser.rb,v 1.5 2010/12/13 19:00:11 cmzmasek Exp $
#
#
# $Id: hmmscan_parser.rb,v 1.5 2010/12/13 19:00:11 cmzmasek Exp $
#
-# last modified: 121003
require 'set'
require 'set'
@@
-14,19
+13,18
@@
require 'lib/evo/util/constants'
require 'lib/evo/util/util'
require 'lib/evo/util/command_line_arguments'
require 'lib/evo/io/parser/hmmscan_parser'
require 'lib/evo/util/util'
require 'lib/evo/util/command_line_arguments'
require 'lib/evo/io/parser/hmmscan_parser'
-require 'lib/evo/io/web/uniprotkb'
module Evoruby
class HmmscanSummary
PRG_NAME = "hsp"
module Evoruby
class HmmscanSummary
PRG_NAME = "hsp"
- PRG_VERSION = "2.000"
+ PRG_VERSION = "2.002"
PRG_DESC = "hmmscan summary"
PRG_DESC = "hmmscan summary"
- PRG_DATE = "2012.10.23"
- COPYRIGHT = "2012 Christian M Zmasek"
- CONTACT = "phylosoft@gmail.com"
- WWW = "www.phylosoft.org"
+ PRG_DATE = "130319"
+ COPYRIGHT = "2013 Christian M Zmasek"
+ CONTACT = "phyloxml@gmail.com"
+ WWW = "https://sites.google.com/site/cmzmasek/home/software/forester"
DELIMITER_OPTION = "d"
SPECIES_OPTION = "s"
DELIMITER_OPTION = "d"
SPECIES_OPTION = "s"
@@
-35,7
+33,6
@@
module Evoruby
HMM_FOR_PROTEIN_OUTPUT = "m"
IGNORE_DUF_OPTION = "i"
PARSE_OUT_DESCRIPITION_OPTION = "a"
HMM_FOR_PROTEIN_OUTPUT = "m"
IGNORE_DUF_OPTION = "i"
PARSE_OUT_DESCRIPITION_OPTION = "a"
- UNIPROT = "u"
HELP_OPTION_1 = "help"
HELP_OPTION_2 = "h"
HELP_OPTION_1 = "help"
HELP_OPTION_2 = "h"
@@
-49,8
+46,6
@@
module Evoruby
def run
def run
-
-
Util.print_program_information( PRG_NAME,
PRG_VERSION,
PRG_DESC,
Util.print_program_information( PRG_NAME,
PRG_VERSION,
PRG_DESC,
@@
-67,7
+62,7
@@
module Evoruby
end
if ( cla.is_option_set?( HELP_OPTION_1 ) ||
end
if ( cla.is_option_set?( HELP_OPTION_1 ) ||
- cla.is_option_set?( HELP_OPTION_2 ) )
+ cla.is_option_set?( HELP_OPTION_2 ) )
print_help
exit( 0 )
end
print_help
exit( 0 )
end
@@
-84,7
+79,6
@@
module Evoruby
allowed_opts.push( IGNORE_DUF_OPTION )
allowed_opts.push( PARSE_OUT_DESCRIPITION_OPTION )
allowed_opts.push( HMM_FOR_PROTEIN_OUTPUT )
allowed_opts.push( IGNORE_DUF_OPTION )
allowed_opts.push( PARSE_OUT_DESCRIPITION_OPTION )
allowed_opts.push( HMM_FOR_PROTEIN_OUTPUT )
- allowed_opts.push( UNIPROT )
allowed_opts.push( SPECIES_OPTION )
disallowed = cla.validate_allowed_options_as_str( allowed_opts )
allowed_opts.push( SPECIES_OPTION )
disallowed = cla.validate_allowed_options_as_str( allowed_opts )
@@
-118,8
+112,6
@@
module Evoruby
end
end
end
end
-
-
fs_e_value_threshold = -1.0
if ( cla.is_option_set?( FS_E_VALUE_THRESHOLD_OPTION ) )
begin
fs_e_value_threshold = -1.0
if ( cla.is_option_set?( FS_E_VALUE_THRESHOLD_OPTION ) )
begin
@@
-141,17
+133,8
@@
module Evoruby
end
end
end
end
- uniprot = ""
- if ( cla.is_option_set?( UNIPROT ) )
- begin
- uniprot = cla.get_option_value( UNIPROT )
- rescue ArgumentError => e
- Util.fatal_error( PRG_NAME, "error: " + e.to_s, STDOUT )
- end
- end
-
species = "HUMAN"
species = "HUMAN"
- if ( cla.is_option_set?( SPECIES_OPTION ) )
+ if ( cla.is_option_set?( SPECIES_OPTION ) )
begin
species = cla.get_option_value( SPECIES_OPTION )
rescue ArgumentError => e
begin
species = cla.get_option_value( SPECIES_OPTION )
rescue ArgumentError => e
@@
-201,9
+184,6
@@
module Evoruby
if !hmm_for_protein_output.empty?
puts( "HMM for proteins : " + hmm_for_protein_output )
end
if !hmm_for_protein_output.empty?
puts( "HMM for proteins : " + hmm_for_protein_output )
end
- if !uniprot.empty?
- puts( "Uniprot : " + uniprot )
- end
puts()
begin
puts()
begin
@@
-215,14
+195,12
@@
module Evoruby
parse_descriptions,
fs_e_value_threshold,
hmm_for_protein_output,
parse_descriptions,
fs_e_value_threshold,
hmm_for_protein_output,
- uniprot,
species )
rescue IOError => e
Util.fatal_error( PRG_NAME, "error: " + e.to_s, STDOUT )
end
domain_counts = get_domain_counts()
species )
rescue IOError => e
Util.fatal_error( PRG_NAME, "error: " + e.to_s, STDOUT )
end
domain_counts = get_domain_counts()
-
puts
puts( "domain counts (considering potential i-E-value threshold and ignoring of DUFs):" )
puts( "(number of different domains: " + domain_counts.length.to_s + ")" )
puts
puts( "domain counts (considering potential i-E-value threshold and ignoring of DUFs):" )
puts( "(number of different domains: " + domain_counts.length.to_s + ")" )
@@
-245,11
+223,8
@@
module Evoruby
get_descriptions,
fs_e_value_threshold,
hmm_for_protein_output,
get_descriptions,
fs_e_value_threshold,
hmm_for_protein_output,
- uniprot,
species )
species )
-
-
Util.check_file_for_readability( inpath )
Util.check_file_for_writability( outpath )
Util.check_file_for_readability( inpath )
Util.check_file_for_writability( outpath )
@@
-267,8
+242,6
@@
module Evoruby
hmmscan_results_per_protein = []
hmmscan_results_per_protein = []
-
-
prev_query = ""
results.each do | r |
prev_query = ""
results.each do | r |
@@
-279,21
+252,21
@@
module Evoruby
env_to = r.env_to
if ( ( i_e_value_threshold < 0.0 ) || ( i_e_value <= i_e_value_threshold ) ) &&
env_to = r.env_to
if ( ( i_e_value_threshold < 0.0 ) || ( i_e_value <= i_e_value_threshold ) ) &&
- ( !ignore_dufs || ( model !~ /^DUF\d+/ ) )
+ ( !ignore_dufs || ( model !~ /^DUF\d+/ ) )
count_model( model )
outfile.print( query +
count_model( model )
outfile.print( query +
- column_delimiter )
+ column_delimiter )
if ( get_descriptions )
outfile.print( desc +
if ( get_descriptions )
outfile.print( desc +
- column_delimiter )
+ column_delimiter )
end
outfile.print( model +
end
outfile.print( model +
- column_delimiter +
- env_from.to_s +
- column_delimiter +
- env_to.to_s +
- column_delimiter +
- i_e_value.to_s )
+ column_delimiter +
+ env_from.to_s +
+ column_delimiter +
+ env_to.to_s +
+ column_delimiter +
+ i_e_value.to_s )
outfile.print( Constants::LINE_DELIMITER )
end
outfile.print( Constants::LINE_DELIMITER )
end
@@
-304,7
+277,7
@@
module Evoruby
fs_e_value_threshold,
hmm_for_protein_output,
i_e_value_threshold,
fs_e_value_threshold,
hmm_for_protein_output,
i_e_value_threshold,
- true )
+ species )
end
hmmscan_results_per_protein.clear
end
end
hmmscan_results_per_protein.clear
end
@@
-319,17
+292,17
@@
module Evoruby
end
end
end
end
end
end
+
if !hmm_for_protein_output.empty? && !hmmscan_results_per_protein.empty?
process_hmmscan_results_per_protein( hmmscan_results_per_protein,
fs_e_value_threshold,
hmm_for_protein_output,
i_e_value_threshold,
if !hmm_for_protein_output.empty? && !hmmscan_results_per_protein.empty?
process_hmmscan_results_per_protein( hmmscan_results_per_protein,
fs_e_value_threshold,
hmm_for_protein_output,
i_e_value_threshold,
- true )
+ species )
end
outfile.flush()
outfile.close()
end
outfile.flush()
outfile.close()
-
end # def parse
def process_id( id )
end # def parse
def process_id( id )
@@
-339,8
+312,6
@@
module Evoruby
id
end
id
end
-
-
def count_model( model )
if ( @domain_counts.has_key?( model ) )
count = @domain_counts[ model ].to_i
def count_model( model )
if ( @domain_counts.has_key?( model ) )
count = @domain_counts[ model ].to_i
@@
-355,7
+326,7
@@
module Evoruby
fs_e_value_threshold,
hmm_for_protein_output,
i_e_value_threshold,
fs_e_value_threshold,
hmm_for_protein_output,
i_e_value_threshold,
- uniprotkb )
+ species )
dc = 0
# filter according to i-Evalue threshold
dc = 0
# filter according to i-Evalue threshold
@@
-363,12
+334,14
@@
module Evoruby
hmmscan_results_per_protein_filtered = []
hmmscan_results_per_protein.each do | r |
hmmscan_results_per_protein_filtered = []
hmmscan_results_per_protein.each do | r |
+
+
if r.model == hmm_for_protein_output
if r.model == hmm_for_protein_output
- if r.fs_e_value > fs_e_value_threshold
+ if fs_e_value_threshold > 0.0 && r.fs_e_value > fs_e_value_threshold
return
end
end
return
end
end
- if r.i_e_value <= i_e_value_threshold
+ if i_e_value_threshold <= 0 || r.i_e_value <= i_e_value_threshold
hmmscan_results_per_protein_filtered << r
if r.model == hmm_for_protein_output
dc += 1
hmmscan_results_per_protein_filtered << r
if r.model == hmm_for_protein_output
dc += 1
@@
-401,12
+374,6
@@
module Evoruby
s << r.model + " "
end
s << "\t"
s << r.model + " "
end
s << "\t"
- e = UniprotKB::get_entry_by_id( process_id( own.query ) )
-
- if e != nil
- s << uniprot_annotation( e )
- # s << "\uniprot_annotationt"
- end
overview = make_overview( hmmscan_results_per_protein_filtered, hmm_for_protein_output )
overview = make_overview( hmmscan_results_per_protein_filtered, hmm_for_protein_output )
@@
-416,7
+383,6
@@
module Evoruby
prev_r = nil
hmmscan_results_per_protein_filtered.each do | r |
prev_r = nil
hmmscan_results_per_protein_filtered.each do | r |
-
if prev_r != nil
s << make_interdomain_sequence( r.env_from - prev_r.env_to - 1 )
else
if prev_r != nil
s << make_interdomain_sequence( r.env_from - prev_r.env_to - 1 )
else
@@
-430,23
+396,10
@@
module Evoruby
s << "]"
prev_r = r
end
s << "]"
prev_r = r
end
- s << make_interdomain_sequence( own.qlen - prev_r.env_from, false )
+ s << make_interdomain_sequence( own.qlen - prev_r.env_to, false )
puts s
end
puts s
end
- def uniprot_annotation( e )
- s = ""
- pdb_ids = e.get_pdb_ids
- if !pdb_ids.empty?
- pdb_ids.each do | pdb |
- s << pdb << ", "
- end
- else
- s << "-"
- end
- s
- end
-
def calc_linkers( hmmscan_results_per_protein_filtered, hmm_for_protein_output )
linkers = ""
prev_r = nil
def calc_linkers( hmmscan_results_per_protein_filtered, hmm_for_protein_output )
linkers = ""
prev_r = nil
@@
-501,7
+454,6
@@
module Evoruby
end
end
-
def print_help()
puts( "Usage:" )
puts()
def print_help()
puts( "Usage:" )
puts()
@@
-513,6
+465,7
@@
module Evoruby
puts( " -" + IGNORE_DUF_OPTION + ": ignore DUFs" )
puts( " -" + FS_E_VALUE_THRESHOLD_OPTION + ": E-value threshold for full protein sequences, only for protein summary" )
puts( " -" + HMM_FOR_PROTEIN_OUTPUT + ": HMM for protein summary" )
puts( " -" + IGNORE_DUF_OPTION + ": ignore DUFs" )
puts( " -" + FS_E_VALUE_THRESHOLD_OPTION + ": E-value threshold for full protein sequences, only for protein summary" )
puts( " -" + HMM_FOR_PROTEIN_OUTPUT + ": HMM for protein summary" )
+ puts( " -" + SPECIES_OPTION + ": species for protein summary" )
puts()
end
puts()
end