pdb
authorcmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Sat, 27 Oct 2012 00:17:30 +0000 (00:17 +0000)
committercmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Sat, 27 Oct 2012 00:17:30 +0000 (00:17 +0000)
forester/ruby/evoruby/lib/evo/io/parser/uniprot_parser.rb
forester/ruby/evoruby/lib/evo/io/web/uniprotkb.rb
forester/ruby/evoruby/lib/evo/tool/hmmscan_summary.rb

index a47a6b2..e8de372 100644 (file)
@@ -23,38 +23,26 @@ module Evoruby
     DR = "DR"
     LAST = '//'
 
-    def initialize file
-      Util.check_file_for_readability file
-      @file = file
+    def initialize 
+      
     end
 
 
-    def parse( ids )
-      entries = Hash.new
+    def parse( lines )
       de = []
       dr = []
       id = nil
-      File.open( @file ).each do | line |
-        if line.index( ID ) == 0
-          #   puts line
-          ids.each do | i |
-            #puts " " + i
-            if line.include?( i ) && line.split[ 1 ] == i
-              id = i
-              break
-            end
-          end
-        end
-        if id != nil
+      lines.each do | line |
+    
+        if line.include?( ID ) && line.index( ID ) == 0
+          id = line.split[ 1 ]
+        elsif id != nil
           if line.include?( LAST ) && line.index( LAST ) == 0
             e = UniprotEntry.new
+            e.id = id
             e.de = de
             e.dr = dr
-            entries[ id ] = e
-            puts id
-            id = nil
-            de = []
-            dr = []
+            return e
           else
             if line.include?( DE ) && line.index( DE ) == 0
               add( line, de )
@@ -64,7 +52,7 @@ module Evoruby
           end
         end
       end
-      entries
+      return nil
     end
 
     private
index 81f0c76..c63b1df 100644 (file)
@@ -2,22 +2,33 @@
 require 'net/http'
 require 'uri'
 
+require 'lib/evo/io/parser/uniprot_parser'
+
 module Evoruby
 
 
   class UniprotKB
-    def initialize
 
-    end
+    BASE_URL = "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch?db=uniprotkb;"
 
-    def get
-      require 'net/http'
-      require 'uri'
 
-      uri = URI.parse("http://www.ebi.ac.uk/Tools/dbfetch/dbfetch?db=uniprotkb;id=1433X_MAIZE;format=uniprot;style=raw")
+    def UniprotKB::get_by_id( id, style = "raw", format = "uniprot" )
+      url_str = BASE_URL + "id=#{id};format=#{format};style=#{style}"
+      uri = URI.parse url_str
       response = Net::HTTP.get_response uri
-      puts response.body
+      lines = []
+      response.body.each_line do |line|
+        lines << line
+        puts line
+      end
+      lines
+    end
+
 
+    def UniprotKB::get_entry_by_id( id  )
+      lines = get_by_id( id,  "raw", "uniprot" )
+      p = UniprotParser.new
+      return p.parse( lines )
     end
 
   end
index a8f1590..db82d4a 100644 (file)
@@ -14,7 +14,6 @@ require 'lib/evo/util/constants'
 require 'lib/evo/util/util'
 require 'lib/evo/util/command_line_arguments'
 require 'lib/evo/io/parser/hmmscan_parser'
-require 'lib/evo/io/parser/uniprot_parser'
 require 'lib/evo/io/web/uniprotkb'
 
 module Evoruby
@@ -49,8 +48,7 @@ module Evoruby
 
     def run
 
-      ukb = UniprotKB.new
-      ukb.get
+
 
       Util.print_program_information( PRG_NAME,
         PRG_VERSION,
@@ -242,11 +240,6 @@ module Evoruby
       hmmscan_parser = HmmscanParser.new( inpath )
       results = hmmscan_parser.parse
 
-      uniprot_entries = nil
-      if !uniprot.empty? && !hmm_for_protein_output.empty?
-        uniprot_entries = read_uniprot( results, uniprot  )
-      end
-
       outfile = File.open( outpath, "a" )
 
       query     = ""
@@ -295,7 +288,7 @@ module Evoruby
                 fs_e_value_threshold,
                 hmm_for_protein_output,
                 i_e_value_threshold,
-                uniprot_entries )
+                true )
             end
             hmmscan_results_per_protein.clear
           end
@@ -315,7 +308,7 @@ module Evoruby
           fs_e_value_threshold,
           hmm_for_protein_output,
           i_e_value_threshold,
-          uniprot_entries )
+          true )
       end
 
       outfile.flush()
@@ -330,16 +323,7 @@ module Evoruby
       id
     end
 
-    def read_uniprot( hmmscan_results, uniprot  )
-      ids = Set.new
-      hmmscan_results.each do | r |
 
-        ids << process_id( r.query )
-      end
-      uniprot_parser = UniprotParser.new uniprot
-      uniprot_entries = uniprot_parser.parse ids
-      uniprot_entries
-    end
 
     def count_model( model )
       if ( @domain_counts.has_key?( model ) )
@@ -355,7 +339,7 @@ module Evoruby
         fs_e_value_threshold,
         hmm_for_protein_output,
         i_e_value_threshold,
-        uniprot_entries )
+        uniprotkb )
 
       dc = 0
       # filter according to i-Evalue threshold
@@ -401,19 +385,30 @@ module Evoruby
         s << r.model + " "
       end
       s << "\t"
-      e = uniprot_entries[ process_id( own.query ) ]
-      if e != nil && e.de != nil
-        e.de.each { |i| s << i + " " }
-      else
-        s << "-"
-      end
+      e = UniprotKB::get_entry_by_id( process_id( own.query ) )
+      # if e != nil && e.de != nil
+      #   e.de.each do |i|
+      #
+      #   end
+      # else
+      #   s << "-"
+      # end
       s << "\t"
+      if e != nil && e.dr != nil
+        e.dr.each do | dr |
+          if dr != nil
+            if dr =~ /PDB;\s+([A-Z0-9]{4});/
+              s << $1
 
-      if e != nil && e.gn != nil
-        e.gn.each { |i| s << i + " " }
+            end
+          end
+        end
       else
         s << "-"
       end
+      s << "\t"
+
+