in progress
authorcmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Thu, 25 Oct 2012 02:41:23 +0000 (02:41 +0000)
committercmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Thu, 25 Oct 2012 02:41:23 +0000 (02:41 +0000)
forester/ruby/evoruby/lib/evo/io/parser/uniprot_parser.rb
forester/ruby/evoruby/lib/evo/io/web/uniprotkb.rb [new file with mode: 0644]
forester/ruby/evoruby/lib/evo/tool/hmmscan_summary.rb
forester/ruby/evoruby/lib/evo/util/util.rb

index ec2b9f0..a47a6b2 100644 (file)
@@ -28,38 +28,37 @@ module Evoruby
       @file = file
     end
 
-   
 
     def parse( ids )
-      #ic = Iconv.new( 'UTF-8//IGNORE', 'UTF-8' )
-      entries = []
+      entries = Hash.new
       de = []
       dr = []
-      read = false
+      id = nil
       File.open( @file ).each do | line |
         if line.index( ID ) == 0
-          puts line 
-          ids.each do | id |
-            puts " " + id
-            if line.index( id ) == 0
-              read = true
+          #   puts line
+          ids.each do | i |
+            #puts " " + i
+            if line.include?( i ) && line.split[ 1 ] == i
+              id = i
               break
             end
           end
         end
-        if read
-          if line.index LAST == 0
-            read = false
+        if id != nil
+          if line.include?( LAST ) && line.index( LAST ) == 0
             e = UniprotEntry.new
             e.de = de
             e.dr = dr
             entries[ id ] = e
+            puts id
+            id = nil
             de = []
             dr = []
           else
-            if line.index DE == 0
+            if line.include?( DE ) && line.index( DE ) == 0
               add( line, de )
-            elsif line.index DR == 0
+            elsif line.include?( DR ) && line.index( DR ) == 0
               add( line, dr )
             end
           end
diff --git a/forester/ruby/evoruby/lib/evo/io/web/uniprotkb.rb b/forester/ruby/evoruby/lib/evo/io/web/uniprotkb.rb
new file mode 100644 (file)
index 0000000..81f0c76
--- /dev/null
@@ -0,0 +1,25 @@
+
+require 'net/http'
+require 'uri'
+
+module Evoruby
+
+
+  class UniprotKB
+    def initialize
+
+    end
+
+    def get
+      require 'net/http'
+      require 'uri'
+
+      uri = URI.parse("http://www.ebi.ac.uk/Tools/dbfetch/dbfetch?db=uniprotkb;id=1433X_MAIZE;format=uniprot;style=raw")
+      response = Net::HTTP.get_response uri
+      puts response.body
+
+    end
+
+  end
+
+end
index e6ba67d..a8f1590 100644 (file)
@@ -8,11 +8,14 @@
 #
 # last modified: 121003
 
+require 'set'
+
 require 'lib/evo/util/constants'
 require 'lib/evo/util/util'
 require 'lib/evo/util/command_line_arguments'
 require 'lib/evo/io/parser/hmmscan_parser'
 require 'lib/evo/io/parser/uniprot_parser'
+require 'lib/evo/io/web/uniprotkb'
 
 module Evoruby
 
@@ -46,6 +49,9 @@ module Evoruby
 
     def run
 
+      ukb = UniprotKB.new
+      ukb.get
+
       Util.print_program_information( PRG_NAME,
         PRG_VERSION,
         PRG_DESC,
@@ -132,11 +138,11 @@ module Evoruby
           Util.fatal_error( PRG_NAME, "error: " + e.to_s, STDOUT )
         end
       end
-      
+
       uniprot = ""
-       if ( cla.is_option_set?( UNIPROT ) )
+      if ( cla.is_option_set?( UNIPROT ) )
         begin
-           uniprot = cla.get_option_value( UNIPROT )
+          uniprot = cla.get_option_value( UNIPROT )
         rescue ArgumentError => e
           Util.fatal_error( PRG_NAME, "error: " + e.to_s, STDOUT )
         end
@@ -175,15 +181,15 @@ module Evoruby
       else
         puts( "column delimiter     : " + column_delimiter )
       end
-      if fs_e_value_threshold >= 0.0 
+      if fs_e_value_threshold >= 0.0
         puts( "E-value threshold   : " + fs_e_value_threshold.to_s )
       else
         puts( "E-value threshold   : no threshold" )
       end
-      if !hmm_for_protein_output.empty? 
+      if !hmm_for_protein_output.empty?
         puts( "HMM for proteins    : " + hmm_for_protein_output )
       end
-      if !uniprot.empty? 
+      if !uniprot.empty?
         puts( "Uniprot             : " + uniprot )
       end
       puts()
@@ -198,7 +204,7 @@ module Evoruby
           fs_e_value_threshold,
           hmm_for_protein_output,
           uniprot )
-      rescue ArgumentError, IOError => e
+      rescue IOError => e
         Util.fatal_error( PRG_NAME, "error: " + e.to_s, STDOUT )
       end
       domain_counts = get_domain_counts()
@@ -227,17 +233,20 @@ module Evoruby
         fs_e_value_threshold,
         hmm_for_protein_output,
         uniprot )
+
+
+
       Util.check_file_for_readability( inpath )
       Util.check_file_for_writability( outpath )
 
       hmmscan_parser = HmmscanParser.new( inpath )
       results = hmmscan_parser.parse
-      
+
       uniprot_entries = nil
-      if !uniprot.empty? 
+      if !uniprot.empty? && !hmm_for_protein_output.empty?
         uniprot_entries = read_uniprot( results, uniprot  )
       end
-      
+
       outfile = File.open( outpath, "a" )
 
       query     = ""
@@ -249,7 +258,7 @@ module Evoruby
 
       hmmscan_results_per_protein = []
 
-      
+
 
       prev_query = ""
 
@@ -285,7 +294,8 @@ module Evoruby
               process_hmmscan_results_per_protein( hmmscan_results_per_protein,
                 fs_e_value_threshold,
                 hmm_for_protein_output,
-                i_e_value_threshold )
+                i_e_value_threshold,
+                uniprot_entries )
             end
             hmmscan_results_per_protein.clear
           end
@@ -300,31 +310,37 @@ module Evoruby
           end
         end
       end
-      if !hmm_for_protein_output.empty?
-        if !hmmscan_results_per_protein.empty?
-          process_hmmscan_results_per_protein( hmmscan_results_per_protein,
-            fs_e_value_threshold,
-            hmm_for_protein_output,
-            i_e_value_threshold, 
-             uniprot_entries )
-        end
+      if !hmm_for_protein_output.empty? && !hmmscan_results_per_protein.empty?
+        process_hmmscan_results_per_protein( hmmscan_results_per_protein,
+          fs_e_value_threshold,
+          hmm_for_protein_output,
+          i_e_value_threshold,
+          uniprot_entries )
       end
+
       outfile.flush()
       outfile.close()
 
     end # def parse
 
-    
-     def read_uniprot( hmmscan_results, uniprot  )  
-        ids = []
-         hmmscan_results.each do | r |
-           ids << r.query
-         end 
-         uniprot_parser = UniprotParser.new uniprot
-         uniprot_entries = uniprot_parser.parse ids 
-         uniprot_entries
-      end
-    
+    def process_id( id )
+      if  id =~ /(sp|tr)\|\S+\|(\S+)/
+        id = $2
+      end
+      id
+    end
+
+    def read_uniprot( hmmscan_results, uniprot  )
+      ids = Set.new
+      hmmscan_results.each do | r |
+
+        ids << process_id( r.query )
+      end
+      uniprot_parser = UniprotParser.new uniprot
+      uniprot_entries = uniprot_parser.parse ids
+      uniprot_entries
+    end
+
     def count_model( model )
       if ( @domain_counts.has_key?( model ) )
         count = @domain_counts[ model ].to_i
@@ -339,7 +355,7 @@ module Evoruby
         fs_e_value_threshold,
         hmm_for_protein_output,
         i_e_value_threshold,
-         uniprot_entries      )
+        uniprot_entries )
 
       dc = 0
       # filter according to i-Evalue threshold
@@ -385,8 +401,23 @@ module Evoruby
         s << r.model + " "
       end
       s << "\t"
-     s <<  uniprot_entries[  own.query ]
-       s << "\t"
+      e = uniprot_entries[ process_id( own.query ) ]
+      if e != nil && e.de != nil
+        e.de.each { |i| s << i + " " }
+      else
+        s << "-"
+      end
+      s << "\t"
+
+      if e != nil && e.gn != nil
+        e.gn.each { |i| s << i + " " }
+      else
+        s << "-"
+      end
+
+
+
+      s << "\t"
       overview = make_overview( hmmscan_results_per_protein_filtered, hmm_for_protein_output )
 
       s << overview   + "\t"
index 234a625..79f92e0 100644 (file)
@@ -12,229 +12,215 @@ require 'lib/evo/util/constants'
 
 module Evoruby
 
-    class Util
-
-        def Util.normalize_seq_name( name, length )
-            if name.length > length
-                name = name[ 0, length ]
-            elsif name.length < length
-                for i in 0 ... length - name.length
-                    name = name + " "
-                end
-            end
-            name
-        end
-
-        #  def Util.normalize_mol_sequence( seq )
-        #      new_seq = String.new()
-        #      for i in 0 ... seq.length
-        #          c = seq.get_slice( i )
-        #          if is_aa_gap_character?( c )
-        #              new_seq = new_seq + "-"
-        #          else
-        #              new_seq = new_seq + c
-        #          end
-        #      end
-        #      new_seq
-        #  end
-
-
-        # Returns true if char_code corresponds to: space * - . _
-        def Util.is_aa_gap_character?( char_code )
-            return ( char_code <= 32  || char_code == 42 || char_code == 45 || char_code == 46 ||char_code == 95  )
-        end
-
-        # Deletes *, digits, and whitespace, replaces BJOUZ? with X, and replaces non-(letters, -) with -
-        def Util.clean_seq_str( seq_str )
-            seq_str = seq_str.upcase
-            seq_str = seq_str.gsub( /\s+/, '' )
-            seq_str = seq_str.gsub( /\d+/, '' )
-            seq_str = seq_str.gsub( '*', '' )
-            seq_str = seq_str.gsub( /[BJOUZ?]/, 'X' )
-            seq_str = seq_str.gsub( /[^A-Z\-]/, '-' )
-            seq_str
-        end
-
-        # raises ArgumentError
-        def Util.check_file_for_readability( path )
-            unless ( File.exist?( path ) )
-                error_msg = "file [#{path}] does not exist"
-                raise ArgumentError, error_msg
-            end
-            unless ( File.file?( path ) )
-                error_msg = "file [#{path}] is not a regular file"
-                raise ArgumentError, error_msg
-            end
-            unless ( File.readable?( path ) )
-                error_msg = "file [#{path}] is not a readable file"
-                raise ArgumentError, error_msg
-            end
-            if ( File.zero?( path ) )
-                error_msg = "file [#{path}] is empty"
-                raise ArgumentError, error_msg
-            end
-        end
-
-        # raises ArgumentError
-        def Util.check_file_for_writability( path )
-            if File.directory?( path )
-                error_msg = "file [#{path}] is an existing directory"
-                raise ArgumentError, error_msg
-            elsif File.exist?( path )
-                error_msg = "file [#{path}] already exists"
-                raise ArgumentError, error_msg
-            elsif File.writable?( path )
-                error_msg = "file [#{path}] is not writeable"
-                raise ArgumentError, error_msg
-            end
-        end
-
-        def Util.fatal_error_if_not_writable( prg_name, path )
-            begin
-                Util.check_file_for_writability( path )
-            rescue ArgumentError => e
-                Util.fatal_error( prg_name, e.to_s )
-            end
-        end
-
-        def Util.fatal_error_if_not_readable( prg_name, path )
-            begin
-                Util.check_file_for_readability( path )
-            rescue ArgumentError => e
-                Util.fatal_error( prg_name, e.to_s )
-            end
-        end
-
-        def Util.get_env_variable_value( env_variable ) 
-            value = ENV[env_variable]
-            if value == nil || value.empty?
-                error_msg = "apparently environment variable #{env_variable} has not been set"
-                raise StandardError, error_msg 
-            end
-            value
-        end
-        
-
-        # raises ArgumentError
-        def Util.file2array( path, split_by_semicolon )
-            Util.check_file_for_readability( path )
-            a = Array.new()
-            c = 0
-            File.open( path ) do | file |
-                while line = file.gets
-                    if ( line =~ /^\s*(\S.*?)\s*$/ )
-                        s = $1
-                        if ( split_by_semicolon && s =~/;/ )
-                            sa = s.split( /;/ )
-                            for i in 0 ... sa.length()
-                                a[ c ] = sa[ i ].strip!
-                            end
-                        else
-                            a[ c ] = s
-                        end
-                        c += 1
-                    end
-                end
-            end
-            return a
-        end
-
-        def Util.print_program_information( prg_name,
-                prg_version,
-                prg_desc,
-                date,
-                copyright,
-                contact,
-                www,
-                io = STDOUT )
-
-            if RUBY_VERSION !~ /1.9/
-                puts( "Your ruby version is #{RUBY_VERSION}, expected 1.9.x " )
-                exit( -1 )
-            end
-
-            ruby_version = RUBY_VERSION
-            l = prg_name.length + prg_version.length + date.length + ruby_version.length + 12
-            io.print( Evoruby::Constants::LINE_DELIMITER )
-            io.print( prg_name + " " + prg_version + " [" + date + "] [ruby " + ruby_version + "]")
-            io.print( Evoruby::Constants::LINE_DELIMITER )
-            l.times {
-                io.print( "_" )
-            }
-            io.print( Constants::LINE_DELIMITER )
-            io.print( Constants::LINE_DELIMITER )
-            io.print( prg_desc )
-            io.print( Constants::LINE_DELIMITER )
-            io.print( Constants::LINE_DELIMITER )
-            io.print( "Copyright (C) " + copyright )
-            io.print( Constants::LINE_DELIMITER )
-            io.print( "Contact: " + contact )
-            io.print( Constants::LINE_DELIMITER )
-            io.print( "         " + www )
-            io.print( Constants::LINE_DELIMITER )
-            io.print( Constants::LINE_DELIMITER )
-        end
-
-        def Util.fatal_error( prg_name, message, io = STDOUT )
-            io.print( Constants::LINE_DELIMITER )
-            if ( !Util.is_string_empty?( prg_name ) )
-                io.print( "[" + prg_name + "] > " + message )
+  class Util
+
+    def Util.normalize_seq_name( name, length )
+      if name.length > length
+        name = name[ 0, length ]
+      elsif name.length < length
+        for i in 0 ... length - name.length
+          name = name + " "
+        end
+      end
+      name
+    end
+
+    # Returns true if char_code corresponds to: space * - . _
+    def Util.is_aa_gap_character?( char_code )
+      return ( char_code <= 32  || char_code == 42 || char_code == 45 || char_code == 46 ||char_code == 95  )
+    end
+
+    # Deletes *, digits, and whitespace, replaces BJOUZ? with X, and replaces non-(letters, -) with -
+    def Util.clean_seq_str( seq_str )
+      seq_str = seq_str.upcase
+      seq_str = seq_str.gsub( /\s+/, '' )
+      seq_str = seq_str.gsub( /\d+/, '' )
+      seq_str = seq_str.gsub( '*', '' )
+      seq_str = seq_str.gsub( /[BJOUZ?]/, 'X' )
+      seq_str = seq_str.gsub( /[^A-Z\-]/, '-' )
+      seq_str
+    end
+
+    # raises ArgumentError
+    def Util.check_file_for_readability( path )
+      unless ( File.exist?( path ) )
+        error_msg = "file [#{path}] does not exist"
+        raise IOError, error_msg
+      end
+      unless ( File.file?( path ) )
+        error_msg = "file [#{path}] is not a regular file"
+        raise IOError, error_msg
+      end
+      unless ( File.readable?( path ) )
+        error_msg = "file [#{path}] is not a readable file"
+        raise IOError, error_msg
+      end
+      if ( File.zero?( path ) )
+        error_msg = "file [#{path}] is empty"
+        raise IOError, error_msg
+      end
+    end
+
+    # raises ArgumentError
+    def Util.check_file_for_writability( path )
+      if File.directory?( path )
+        error_msg = "file [#{path}] is an existing directory"
+        raise IOError, error_msg
+      elsif File.exist?( path )
+        error_msg = "file [#{path}] already exists"
+        raise IOError, error_msg
+      elsif File.writable?( path )
+        error_msg = "file [#{path}] is not writeable"
+        raise IOError, error_msg
+      end
+    end
+
+    def Util.fatal_error_if_not_writable( prg_name, path )
+      begin
+        Util.check_file_for_writability( path )
+      rescue IOError => e
+        Util.fatal_error( prg_name, e.to_s )
+      end
+    end
+
+    def Util.fatal_error_if_not_readable( prg_name, path )
+      begin
+        Util.check_file_for_readability( path )
+      rescue IOError => e
+        Util.fatal_error( prg_name, e.to_s )
+      end
+    end
+
+    def Util.get_env_variable_value( env_variable )
+      value = ENV[env_variable]
+      if value == nil || value.empty?
+        error_msg = "apparently environment variable #{env_variable} has not been set"
+        raise StandardError, error_msg
+      end
+      value
+    end
+
+
+    # raises ArgumentError
+    def Util.file2array( path, split_by_semicolon )
+      Util.check_file_for_readability( path )
+      a = Array.new()
+      c = 0
+      File.open( path ) do | file |
+        while line = file.gets
+          if ( line =~ /^\s*(\S.*?)\s*$/ )
+            s = $1
+            if ( split_by_semicolon && s =~/;/ )
+              sa = s.split( /;/ )
+              for i in 0 ... sa.length()
+                a[ c ] = sa[ i ].strip!
+              end
             else
-                io.print( " > " + message )
-            end
-            io.print( Constants::LINE_DELIMITER )
-            io.print( Constants::LINE_DELIMITER )
-            exit( -1 )
-        end
-
-        def Util.print_message( prg_name, message, io = STDOUT )
-            if ( !Util.is_string_empty?( prg_name ) )
-                io.print( "[" + prg_name + "] > " + message )
-            else
-                io.print( " > " + message )
-            end
-            io.print( Constants::LINE_DELIMITER )
-        end
-
-        def Util.print_warning_message( prg_name, message, io = STDOUT )
-            if ( !Util.is_string_empty?( prg_name ) )
-                io.print( "[" + prg_name + "] > WARNING: " + message )
-            else
-                io.print( " > " + message )
-            end
-            io.print( Constants::LINE_DELIMITER )
-        end
-
-        def Util.is_string_empty?( s )
-            return ( s == nil || s.length < 1 )
-        end
-
-        # From "Ruby Cookbook"
-        # counts_hash: key is a "name", value is the count (integer)
-        def Util.draw_histogram( counts_hash, char = "#" )
-            pairs = counts_hash.keys.collect { |x| [ x.to_s, counts_hash[ x ] ] }.sort
-            largest_key_size = pairs.max { |x, y| x[ 0 ].size <=> y[ 0 ].size }[ 0 ].size
-            pairs.inject( "" ) do | s, kv |
-                s << "#{ kv[ 0 ].ljust( largest_key_size ) }  | #{ char*kv[ 1 ] }" + Constants::LINE_DELIMITER
-            end
-        end
-
-        def Util.looks_like_fasta?( path )
-            Util.check_file_for_readability( path )
-            File.open( path ) do | file |
-                while line = file.gets
-                    if ( line !~ /\S/ || line =~ /^\s*#/ )
-                    elsif line =~ /^\s*>\s*(.+)/
-                        return true
-                    else
-                        return false
-                    end
-                end
-            end
-            error_msg = "unexpected format"
-            raise IOError, error_msg
-        end
-
-    end # class Util
+              a[ c ] = s
+            end
+            c += 1
+          end
+        end
+      end
+      return a
+    end
+
+    def Util.print_program_information( prg_name,
+        prg_version,
+        prg_desc,
+        date,
+        copyright,
+        contact,
+        www,
+        io = STDOUT )
+
+      if RUBY_VERSION !~ /1.9/
+        puts( "Your ruby version is #{RUBY_VERSION}, expected 1.9.x " )
+        exit( -1 )
+      end
+
+      ruby_version = RUBY_VERSION
+      l = prg_name.length + prg_version.length + date.length + ruby_version.length + 12
+      io.print( Evoruby::Constants::LINE_DELIMITER )
+      io.print( prg_name + " " + prg_version + " [" + date + "] [ruby " + ruby_version + "]")
+      io.print( Evoruby::Constants::LINE_DELIMITER )
+      l.times {
+        io.print( "_" )
+      }
+      io.print( Constants::LINE_DELIMITER )
+      io.print( Constants::LINE_DELIMITER )
+      io.print( prg_desc )
+      io.print( Constants::LINE_DELIMITER )
+      io.print( Constants::LINE_DELIMITER )
+      io.print( "Copyright (C) " + copyright )
+      io.print( Constants::LINE_DELIMITER )
+      io.print( "Contact: " + contact )
+      io.print( Constants::LINE_DELIMITER )
+      io.print( "         " + www )
+      io.print( Constants::LINE_DELIMITER )
+      io.print( Constants::LINE_DELIMITER )
+    end
+
+    def Util.fatal_error( prg_name, message, io = STDOUT )
+      io.print( Constants::LINE_DELIMITER )
+      if ( !Util.is_string_empty?( prg_name ) )
+        io.print( "[" + prg_name + "] > " + message )
+      else
+        io.print( " > " + message )
+      end
+      io.print( Constants::LINE_DELIMITER )
+      io.print( Constants::LINE_DELIMITER )
+      exit( -1 )
+    end
+
+    def Util.print_message( prg_name, message, io = STDOUT )
+      if ( !Util.is_string_empty?( prg_name ) )
+        io.print( "[" + prg_name + "] > " + message )
+      else
+        io.print( " > " + message )
+      end
+      io.print( Constants::LINE_DELIMITER )
+    end
+
+    def Util.print_warning_message( prg_name, message, io = STDOUT )
+      if ( !Util.is_string_empty?( prg_name ) )
+        io.print( "[" + prg_name + "] > WARNING: " + message )
+      else
+        io.print( " > " + message )
+      end
+      io.print( Constants::LINE_DELIMITER )
+    end
+
+    def Util.is_string_empty?( s )
+      return ( s == nil || s.length < 1 )
+    end
+
+    # From "Ruby Cookbook"
+    # counts_hash: key is a "name", value is the count (integer)
+    def Util.draw_histogram( counts_hash, char = "#" )
+      pairs = counts_hash.keys.collect { |x| [ x.to_s, counts_hash[ x ] ] }.sort
+      largest_key_size = pairs.max { |x, y| x[ 0 ].size <=> y[ 0 ].size }[ 0 ].size
+      pairs.inject( "" ) do | s, kv |
+        s << "#{ kv[ 0 ].ljust( largest_key_size ) }  | #{ char*kv[ 1 ] }" + Constants::LINE_DELIMITER
+      end
+    end
+
+    def Util.looks_like_fasta?( path )
+      Util.check_file_for_readability( path )
+      File.open( path ) do | file |
+        while line = file.gets
+          if ( line !~ /\S/ || line =~ /^\s*#/ )
+          elsif line =~ /^\s*>\s*(.+)/
+            return true
+          else
+            return false
+          end
+        end
+      end
+      error_msg = "unexpected format"
+      raise IOError, error_msg
+    end
+
+  end # class Util
 
 end # module Evoruby