From 258424df68a8b78b20d2ccf1baedf7697c935c57 Mon Sep 17 00:00:00 2001 From: "cmzmasek@gmail.com" Date: Sat, 27 Oct 2012 00:17:30 +0000 Subject: [PATCH] pdb --- .../evoruby/lib/evo/io/parser/uniprot_parser.rb | 34 +++++-------- forester/ruby/evoruby/lib/evo/io/web/uniprotkb.rb | 25 +++++++--- .../ruby/evoruby/lib/evo/tool/hmmscan_summary.rb | 51 +++++++++----------- 3 files changed, 52 insertions(+), 58 deletions(-) diff --git a/forester/ruby/evoruby/lib/evo/io/parser/uniprot_parser.rb b/forester/ruby/evoruby/lib/evo/io/parser/uniprot_parser.rb index a47a6b2..e8de372 100644 --- a/forester/ruby/evoruby/lib/evo/io/parser/uniprot_parser.rb +++ b/forester/ruby/evoruby/lib/evo/io/parser/uniprot_parser.rb @@ -23,38 +23,26 @@ module Evoruby DR = "DR" LAST = '//' - def initialize file - Util.check_file_for_readability file - @file = file + def initialize + end - def parse( ids ) - entries = Hash.new + def parse( lines ) de = [] dr = [] id = nil - File.open( @file ).each do | line | - if line.index( ID ) == 0 - # puts line - ids.each do | i | - #puts " " + i - if line.include?( i ) && line.split[ 1 ] == i - id = i - break - end - end - end - if id != nil + lines.each do | line | + + if line.include?( ID ) && line.index( ID ) == 0 + id = line.split[ 1 ] + elsif id != nil if line.include?( LAST ) && line.index( LAST ) == 0 e = UniprotEntry.new + e.id = id e.de = de e.dr = dr - entries[ id ] = e - puts id - id = nil - de = [] - dr = [] + return e else if line.include?( DE ) && line.index( DE ) == 0 add( line, de ) @@ -64,7 +52,7 @@ module Evoruby end end end - entries + return nil end private diff --git a/forester/ruby/evoruby/lib/evo/io/web/uniprotkb.rb b/forester/ruby/evoruby/lib/evo/io/web/uniprotkb.rb index 81f0c76..c63b1df 100644 --- a/forester/ruby/evoruby/lib/evo/io/web/uniprotkb.rb +++ b/forester/ruby/evoruby/lib/evo/io/web/uniprotkb.rb @@ -2,22 +2,33 @@ require 'net/http' require 'uri' +require 'lib/evo/io/parser/uniprot_parser' + module Evoruby class UniprotKB - def initialize - end + BASE_URL = "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch?db=uniprotkb;" - def get - require 'net/http' - require 'uri' - uri = URI.parse("http://www.ebi.ac.uk/Tools/dbfetch/dbfetch?db=uniprotkb;id=1433X_MAIZE;format=uniprot;style=raw") + def UniprotKB::get_by_id( id, style = "raw", format = "uniprot" ) + url_str = BASE_URL + "id=#{id};format=#{format};style=#{style}" + uri = URI.parse url_str response = Net::HTTP.get_response uri - puts response.body + lines = [] + response.body.each_line do |line| + lines << line + puts line + end + lines + end + + def UniprotKB::get_entry_by_id( id ) + lines = get_by_id( id, "raw", "uniprot" ) + p = UniprotParser.new + return p.parse( lines ) end end diff --git a/forester/ruby/evoruby/lib/evo/tool/hmmscan_summary.rb b/forester/ruby/evoruby/lib/evo/tool/hmmscan_summary.rb index a8f1590..db82d4a 100644 --- a/forester/ruby/evoruby/lib/evo/tool/hmmscan_summary.rb +++ b/forester/ruby/evoruby/lib/evo/tool/hmmscan_summary.rb @@ -14,7 +14,6 @@ require 'lib/evo/util/constants' require 'lib/evo/util/util' require 'lib/evo/util/command_line_arguments' require 'lib/evo/io/parser/hmmscan_parser' -require 'lib/evo/io/parser/uniprot_parser' require 'lib/evo/io/web/uniprotkb' module Evoruby @@ -49,8 +48,7 @@ module Evoruby def run - ukb = UniprotKB.new - ukb.get + Util.print_program_information( PRG_NAME, PRG_VERSION, @@ -242,11 +240,6 @@ module Evoruby hmmscan_parser = HmmscanParser.new( inpath ) results = hmmscan_parser.parse - uniprot_entries = nil - if !uniprot.empty? && !hmm_for_protein_output.empty? - uniprot_entries = read_uniprot( results, uniprot ) - end - outfile = File.open( outpath, "a" ) query = "" @@ -295,7 +288,7 @@ module Evoruby fs_e_value_threshold, hmm_for_protein_output, i_e_value_threshold, - uniprot_entries ) + true ) end hmmscan_results_per_protein.clear end @@ -315,7 +308,7 @@ module Evoruby fs_e_value_threshold, hmm_for_protein_output, i_e_value_threshold, - uniprot_entries ) + true ) end outfile.flush() @@ -330,16 +323,7 @@ module Evoruby id end - def read_uniprot( hmmscan_results, uniprot ) - ids = Set.new - hmmscan_results.each do | r | - ids << process_id( r.query ) - end - uniprot_parser = UniprotParser.new uniprot - uniprot_entries = uniprot_parser.parse ids - uniprot_entries - end def count_model( model ) if ( @domain_counts.has_key?( model ) ) @@ -355,7 +339,7 @@ module Evoruby fs_e_value_threshold, hmm_for_protein_output, i_e_value_threshold, - uniprot_entries ) + uniprotkb ) dc = 0 # filter according to i-Evalue threshold @@ -401,19 +385,30 @@ module Evoruby s << r.model + " " end s << "\t" - e = uniprot_entries[ process_id( own.query ) ] - if e != nil && e.de != nil - e.de.each { |i| s << i + " " } - else - s << "-" - end + e = UniprotKB::get_entry_by_id( process_id( own.query ) ) + # if e != nil && e.de != nil + # e.de.each do |i| + # + # end + # else + # s << "-" + # end s << "\t" + if e != nil && e.dr != nil + e.dr.each do | dr | + if dr != nil + if dr =~ /PDB;\s+([A-Z0-9]{4});/ + s << $1 - if e != nil && e.gn != nil - e.gn.each { |i| s << i + " " } + end + end + end else s << "-" end + s << "\t" + + -- 1.7.10.2