in progress
authorcmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Fri, 19 Oct 2012 06:02:00 +0000 (06:02 +0000)
committercmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Fri, 19 Oct 2012 06:02:00 +0000 (06:02 +0000)
forester/ruby/evoruby/lib/evo/tool/hmmscan_parser.rb [deleted file]

diff --git a/forester/ruby/evoruby/lib/evo/tool/hmmscan_parser.rb b/forester/ruby/evoruby/lib/evo/tool/hmmscan_parser.rb
deleted file mode 100644 (file)
index c2f8177..0000000
+++ /dev/null
@@ -1,265 +0,0 @@
-#
-# = lib/evo/apps/hmmscan_parser.rb - HmmscanParser class
-#
-# Copyright::  Copyright (C) 2006-2007 Christian M. Zmasek
-# License::    GNU Lesser General Public License (LGPL)
-#
-# $Id: hmmscan_parser.rb,v 1.5 2010/12/13 19:00:11 cmzmasek Exp $
-#
-# last modified: 11/24/2009
-
-require 'lib/evo/util/constants'
-require 'lib/evo/util/util'
-require 'lib/evo/util/command_line_arguments'
-
-module Evoruby
-
-    class HmmscanParser
-
-        PRG_NAME       = "hsp"
-        PRG_VERSION    = "1.0.1"
-        PRG_DESC       = "hmmscan parser"
-        PRG_DATE       = "2009.11.24"
-        COPYRIGHT      = "2009 Christian M Zmasek"
-        CONTACT        = "phylosoft@gmail.com"
-        WWW            = "www.phylosoft.org"
-
-        DELIMITER_OPTION              = "d"
-        E_VALUE_THRESHOLD_OPTION      = "e"
-        IGNORE_DUF_OPTION             = "i"
-        PARSE_OUT_DESCRIPITION_OPTION = "a"
-        HELP_OPTION_1                 = "help"
-        HELP_OPTION_2                 = "h"
-
-        def initialize
-            @domain_counts = Hash.new
-        end
-
-        # raises ArgumentError, IOError
-        def parse( inpath,
-                outpath,
-                column_delimiter,
-                e_value_threshold,
-                ignore_dufs,
-                get_descriptions )
-            Util.check_file_for_readability( inpath )
-            Util.check_file_for_writability( outpath )
-
-            outfile = File.open( outpath, "a" )
-
-            query    = String.new
-            desc     = String.new
-            model    = String.new
-            env_from = String.new
-            env_to   = String.new
-            i_e_value  = String.new
-
-            queries_count = 0
-
-            nl = Constants::LINE_DELIMITER
-
-            File.open( inpath ) do | file |
-                while line = file.gets
-                    if !HmmscanParser.is_ignorable?( line ) && line =~ /^\S+\s+\S/
-
-                        #         tn      acc     tlen    query   acc     qlen    Evalue  score   bias    #       of      c-E     i-E     score   bias    hf      ht      af      at      ef      et      acc     desc
-                        #         1       2       3       4       5       6       7       8       9       10      11      12      13      14      15      16      17      18      19      20      21      22      23
-                        line =~ /^(\S+)\s+(\S+)\s+(\d+)\s+(\S+)\s+(\S+)\s+(\d+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\d+)\s+(\d+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\S+)\s+(.*)/
-
-                        model     = $1
-                        query     = $4
-                        i_e_value = $13.to_f
-                        env_from  = $20.to_i
-                        env_to    = $21.to_i
-
-                        if ( ( ( e_value_threshold < 0.0 ) || ( i_e_value <= e_value_threshold ) ) &&
-                                 ( !ignore_dufs || ( model !~ /^DUF\d+/ ) ) )
-                            count_model( model )
-                            outfile.print( query +
-                                 column_delimiter )
-                            if ( get_descriptions )
-                                outfile.print( desc +
-                                     column_delimiter )
-                            end
-                            outfile.print( model +
-                                 column_delimiter +
-                                 env_from.to_s +
-                                 column_delimiter +
-                                 env_to.to_s +
-                                 column_delimiter +
-                                 i_e_value.to_s )
-                            outfile.print( nl )
-                        end
-                    end
-                end # while line = file.gets
-            end
-            outfile.flush()
-            outfile.close()
-
-            return queries_count
-
-        end # def parse
-
-        def count_model( model )
-            if ( @domain_counts.has_key?( model ) )
-                count = @domain_counts[ model ].to_i
-                count += 1
-                @domain_counts[ model ] = count
-            else
-                @domain_counts[ model ] = 1
-            end
-        end
-
-
-        def get_domain_counts()
-            return @domain_counts
-        end
-
-        def run()
-
-            Util.print_program_information( PRG_NAME,
-                PRG_VERSION,
-                PRG_DESC,
-                PRG_DATE,
-                COPYRIGHT,
-                CONTACT,
-                WWW,
-                STDOUT )
-
-            begin
-                cla = CommandLineArguments.new( ARGV )
-            rescue ArgumentError => e
-                Util.fatal_error( PRG_NAME, "error: " + e.to_s, STDOUT )
-            end
-
-            if ( cla.is_option_set?( HELP_OPTION_1 ) ||
-                     cla.is_option_set?( HELP_OPTION_2 ) )
-                print_help
-                exit( 0 )
-            end
-
-            if ( cla.get_number_of_files != 2 )
-                print_help
-                exit( -1 )
-            end
-
-            allowed_opts = Array.new
-            allowed_opts.push( DELIMITER_OPTION )
-            allowed_opts.push( E_VALUE_THRESHOLD_OPTION )
-            allowed_opts.push( IGNORE_DUF_OPTION )
-            allowed_opts.push( PARSE_OUT_DESCRIPITION_OPTION )
-
-            disallowed = cla.validate_allowed_options_as_str( allowed_opts )
-            if ( disallowed.length > 0 )
-                Util.fatal_error( PRG_NAME,
-                    "unknown option(s): " + disallowed,
-                    STDOUT )
-            end
-
-            inpath = cla.get_file_name( 0 )
-            outpath = cla.get_file_name( 1 )
-
-            column_delimiter = "\t"
-            if ( cla.is_option_set?( DELIMITER_OPTION ) )
-                begin
-                    column_delimiter = cla.get_option_value( DELIMITER_OPTION )
-                rescue ArgumentError => e
-                    Util.fatal_error( PRG_NAME, "error: " + e.to_s, STDOUT )
-                end
-            end
-
-            e_value_threshold = -1.0
-            if ( cla.is_option_set?( E_VALUE_THRESHOLD_OPTION ) )
-                begin
-                    e_value_threshold = cla.get_option_value_as_float( E_VALUE_THRESHOLD_OPTION )
-                rescue ArgumentError => e
-                    Util.fatal_error( PRG_NAME, "error: " + e.to_s, STDOUT )
-                end
-                if ( e_value_threshold < 0.0 )
-                    Util.fatal_error( PRG_NAME, "attempt to use a negative E-value threshold", STDOUT )
-                end
-            end
-
-            ignore_dufs = false
-            if ( cla.is_option_set?( IGNORE_DUF_OPTION ) )
-                ignore_dufs = true
-            end
-
-            parse_descriptions = false
-            if ( cla.is_option_set?( PARSE_OUT_DESCRIPITION_OPTION ) )
-                parse_descriptions = true
-            end
-
-            puts()
-            puts( "hmmpfam outputfile: " + inpath )
-            puts( "outputfile        : " + outpath )
-            if ( e_value_threshold >= 0.0 )
-                puts( "E-value threshold : " + e_value_threshold.to_s )
-            else
-                puts( "E-value threshold : no threshold" )
-            end
-            if ( parse_descriptions )
-                puts( "parse descriptions: true" )
-            else
-                puts( "parse descriptions: false" )
-            end
-            if ( ignore_dufs )
-                puts( "ignore DUFs       : true" )
-            else
-                puts( "ignore DUFs       : false" )
-            end
-            if ( column_delimiter == "\t" )
-                puts( "column delimiter  : TAB" )
-            else
-                puts( "column delimiter  : " + column_delimiter )
-            end
-            puts()
-
-            begin
-                queries_count = parse( inpath,
-                    outpath,
-                    column_delimiter,
-                    e_value_threshold,
-                    ignore_dufs,
-                    parse_descriptions )
-            rescue ArgumentError, IOError => e
-                Util.fatal_error( PRG_NAME, "error: " + e.to_s, STDOUT )
-            end
-            domain_counts = get_domain_counts()
-
-            puts
-            puts( "read output for a total of " + queries_count.to_s + " query sequences" )
-            puts
-            puts( "domain counts (considering potential E-value threshold and ignoring of DUFs):" )
-            puts( "(number of different domains: " + domain_counts.length.to_s + ")" )
-            puts
-            puts( Util.draw_histogram( domain_counts, "#" ) )
-            puts
-            Util.print_message( PRG_NAME, 'OK' )
-            puts
-
-        end # def run()
-
-        def print_help()
-            puts( "Usage:" )
-            puts()
-            puts( "  " + PRG_NAME + ".rb [options] <hmmscan outputfile> <outputfile>" )
-            puts()
-            puts( "  options: -" + DELIMITER_OPTION + ": column delimiter for outputfile, default is TAB" )
-            puts( "           -" + E_VALUE_THRESHOLD_OPTION  + ": E-value threshold, default is no threshold" )
-            puts( "           -" + PARSE_OUT_DESCRIPITION_OPTION  + ": parse query description (in addition to query name)" )
-            puts( "           -" + IGNORE_DUF_OPTION  + ": ignore DUFs" )
-            puts()
-        end
-
-
-        private
-
-
-        def HmmscanParser.is_ignorable?( line )
-            return ( line !~ /[A-Za-z0-9-]/ || line =~/^#/ )
-        end
-
-    end # class HmmscanParser
-
-end # module Evoruby
\ No newline at end of file