in progress
authorcmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Sat, 29 Sep 2012 04:38:51 +0000 (04:38 +0000)
committercmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Sat, 29 Sep 2012 04:38:51 +0000 (04:38 +0000)
forester/ruby/evoruby/exe/dsx_old.rb [deleted file]
forester/ruby/evoruby/lib/evo/apps/domain_sequence_extractor_old.rb [deleted file]

diff --git a/forester/ruby/evoruby/exe/dsx_old.rb b/forester/ruby/evoruby/exe/dsx_old.rb
deleted file mode 100755 (executable)
index 1ff35f7..0000000
+++ /dev/null
@@ -1,20 +0,0 @@
-#!/usr/local/bin/ruby -w
-#
-# = exe/dsx
-#
-# Copyright::  Copyright (C) 2006-2007 Christian M. Zmasek
-# License::    GNU Lesser General Public License (LGPL)
-#
-# $Id: dsx.rb,v 1.3 2008/08/28 17:09:06 cmzmasek Exp $
-#
-# last modified: 06/11/2007
-
-require 'lib/evo/apps/domain_sequence_extractor'
-
-module Evoruby
-
-    dsx = DomainSequenceExtractor.new()
-
-    dsx.run()
-
-end  # module Evoruby
diff --git a/forester/ruby/evoruby/lib/evo/apps/domain_sequence_extractor_old.rb b/forester/ruby/evoruby/lib/evo/apps/domain_sequence_extractor_old.rb
deleted file mode 100644 (file)
index ac6199b..0000000
+++ /dev/null
@@ -1,262 +0,0 @@
-#
-# = lib/evo/apps/domain_sequence_extractor.rb - DomainSequenceExtractor class
-#
-# Copyright::  Copyright (C) 2006-2008 Christian M. Zmasek
-# License::    GNU Lesser General Public License (LGPL)
-#
-# $Id: domain_sequence_extractor.rb,v 1.19 2010/12/13 19:00:11 cmzmasek Exp $
-
-
-require 'lib/evo/util/constants'
-require 'lib/evo/util/util'
-require 'lib/evo/util/command_line_arguments'
-require 'lib/evo/io/parser/hmmsearch_domain_extractor'
-
-module Evoruby
-
-    class DomainSequenceExtractor
-
-        PRG_NAME       = "dsx"
-        PRG_VERSION    = "1.1.0"
-        PRG_DESC       = "extraction of domain sequences from hmmsearch output"
-        PRG_DATE       = "2008.01.03"
-        COPYRIGHT      = "2008-2009 Christian M Zmasek"
-        CONTACT        = "phylosoft@gmail.com"
-        WWW            = "www.phylosoft.org"
-
-        E_VALUE_THRESHOLD_OPTION           = 'e'
-        LENGTH_THRESHOLD_OPTION            = 'l'
-        ADD_POSITION_OPTION                = 'p'
-        ADD_DOMAIN_NUMBER_OPTION           = 'd'
-        ADD_DOMAIN_NUMBER_OPTION_AS_DIGIT  = 'dd'
-        ADD_DOMAIN_NUMBER_OPTION_AS_LETTER = 'dl'
-        TRIM_OPTION                        = 't'
-        LOG_FILE_SUFFIX                    = '_domain_seq_extr.log'
-        PASSED_SEQS_SUFFIX                 = '_domain_seq_extr_passed'
-        FAILED_SEQS_SUFFIX                 = '_domain_seq_extr_failed'
-        HELP_OPTION_1                      = 'help'
-        HELP_OPTION_2                      = 'h'
-
-        def run()
-
-            Util.print_program_information( PRG_NAME,
-                PRG_VERSION,
-                PRG_DESC ,
-                PRG_DATE,
-                COPYRIGHT,
-                CONTACT,
-                WWW,
-                STDOUT )
-
-            ld = Constants::LINE_DELIMITER
-
-            begin
-                cla = CommandLineArguments.new( ARGV )
-            rescue ArgumentError
-                Util.fatal_error( PRG_NAME, "error: " + $!, STDOUT )
-            end
-
-            if ( cla.is_option_set?( HELP_OPTION_1 ) ||
-                     cla.is_option_set?( HELP_OPTION_2 ) )
-                print_help
-                exit( 0 )
-            end
-
-            if ( cla.get_number_of_files != 3 )
-                print_help
-                exit( -1 )
-            end
-
-            allowed_opts = Array.new
-            allowed_opts.push( E_VALUE_THRESHOLD_OPTION )
-            allowed_opts.push( ADD_POSITION_OPTION )
-            allowed_opts.push( ADD_DOMAIN_NUMBER_OPTION )
-            allowed_opts.push( LENGTH_THRESHOLD_OPTION )
-            allowed_opts.push( ADD_DOMAIN_NUMBER_OPTION_AS_DIGIT )
-            allowed_opts.push( ADD_DOMAIN_NUMBER_OPTION_AS_LETTER )
-            allowed_opts.push( TRIM_OPTION )
-
-            disallowed = cla.validate_allowed_options_as_str( allowed_opts )
-            if ( disallowed.length > 0 )
-                Util.fatal_error( PRG_NAME,
-                    "unknown option(s): " + disallowed,
-                    STDOUT )
-            end
-
-            hmmsearch_output    = cla.get_file_name( 0 )
-            fasta_sequence_file = cla.get_file_name( 1 )
-            outfile             = cla.get_file_name( 2 )
-
-            add_position = false
-            if ( cla.is_option_set?( ADD_POSITION_OPTION ) )
-                add_position = true
-            end
-
-            trim = false
-            if ( cla.is_option_set?( TRIM_OPTION ) )
-                trim = true
-            end
-
-            add_domain_number           = false
-            add_domain_number_as_letter = false
-            add_domain_number_as_digit  = false
-
-            if ( cla.is_option_set?( ADD_DOMAIN_NUMBER_OPTION ) )
-                add_domain_number = true
-            end
-            if ( cla.is_option_set?( ADD_DOMAIN_NUMBER_OPTION_AS_LETTER ) )
-                add_domain_number_as_letter = true
-            end
-            if ( cla.is_option_set?( ADD_DOMAIN_NUMBER_OPTION_AS_DIGIT ) )
-                add_domain_number_as_digit = true
-            end
-
-            if ( add_domain_number_as_letter && add_domain_number_as_digit )
-                puts( "attempt to add domain number as letter and digit at the same time" )
-                print_help
-                exit( -1 )
-            end
-
-            e_value_threshold = -1.0
-            if ( cla.is_option_set?( E_VALUE_THRESHOLD_OPTION ) )
-                begin
-                    e_value_threshold = cla.get_option_value_as_float( E_VALUE_THRESHOLD_OPTION )
-                rescue ArgumentError => e
-                    Forester::Util.fatal_error( PRG_NAME, "error: " + e.to_s, STDOUT )
-                end
-                if ( e_value_threshold < 0.0 )
-                    Forester::Util.fatal_error( PRG_NAME, "attempt to use a negative E-value threshold", STDOUT )
-                end
-            end
-
-            length_threshold = -1
-            if ( cla.is_option_set?( LENGTH_THRESHOLD_OPTION ) )
-                begin
-                    length_threshold = cla.get_option_value_as_int( LENGTH_THRESHOLD_OPTION )
-                rescue ArgumentError => e
-                    Forester::Util.fatal_error( PRG_NAME, "error: " + e.to_s, STDOUT )
-                end
-                if ( length_threshold < 0)
-                    Forester::Util.fatal_error( PRG_NAME, "attempt to use a negative length threshold", STDOUT )
-                end
-            end
-
-            log = String.new
-
-            puts()
-            puts( "Hmmsearch outputfile                   : " + hmmsearch_output )
-            log << "Hmmsearch outputfile                   : " + hmmsearch_output + ld
-            puts( "Fasta sequencefile (complete sequences): " + fasta_sequence_file )
-            log << "Fasta sequencefile (complete sequences): " + fasta_sequence_file + ld
-            puts( "Outputfile                             : " + outfile )
-            log << "Outputfile                             : " + outfile + ld
-            puts( "Passed sequences outfile (fasta)       : " + outfile + PASSED_SEQS_SUFFIX )
-            log << "Passed sequences outfile (fasta)       : " + outfile + PASSED_SEQS_SUFFIX + ld
-            puts( "Failed sequences outfile (fasta)       : " + outfile + FAILED_SEQS_SUFFIX )
-            log << "Failed sequences outfile (fasta)       : " + outfile + FAILED_SEQS_SUFFIX + ld
-            puts( "Logfile                                : " + outfile + LOG_FILE_SUFFIX )
-            log <<  "Logfile                                : " + outfile + LOG_FILE_SUFFIX + ld
-            if ( e_value_threshold >= 0.0 )
-                puts( "E-value threshold : " + e_value_threshold.to_s )
-                log << "E-value threshold : " + e_value_threshold.to_s + ld
-            else
-                puts( "E-value threshold : no threshold" )
-                log << "E-value threshold : no threshold" + ld
-            end
-            if ( length_threshold > 0 )
-                puts( "Length threshold  : " + length_threshold.to_s )
-                log << "Length threshold  : " + length_threshold.to_s + ld
-            else
-                puts( "Length threshold  : no threshold" )
-                log << "Length threshold  : no threshold" + ld
-            end
-
-            if ( trim )
-                puts( "Trim last 2 chars : true" )
-                log << "Trim last 2 chars : true" + ld
-            else
-                puts( "Trim names        : false" )
-                log << "Trim names        : false" + ld
-            end
-
-
-            if ( add_position )
-                puts( "Add positions (rel to complete seq) to extracted domains: true" )
-                log << "Add positions (rel to complete seq) to extracted domains: true" + ld
-            else
-                puts( "Add positions (rel to complete seq) to extracted domains: false" )
-                log << "Add positions (rel to complete seq) to extracted domains: false" + ld
-            end
-
-            if ( add_domain_number || add_domain_number_as_digit || add_domain_number_as_letter )
-                puts( "Add numbers to extracted domains (in case of more than one domain per complete seq): true" )
-                log << "Add numbers to extracted domains (in case of more than one domain per complete seq): true" + ld
-            else
-                puts( "Add numbers to extracted domains (in case of more than one domain per complete seq): false" )
-                log << "Add numbers to extracted domains (in case of more than one domain per complete seq): false" + ld
-            end
-
-            puts
-
-            domain_count = 0
-            begin
-                parser = HmmsearchDomainExtractor.new()
-                domain_count = parser.parse( hmmsearch_output,
-                    fasta_sequence_file,
-                    outfile,
-                    outfile + PASSED_SEQS_SUFFIX,
-                    outfile + FAILED_SEQS_SUFFIX,
-                    e_value_threshold,
-                    length_threshold,
-                    add_position,
-                    add_domain_number,
-                    add_domain_number_as_digit,
-                    add_domain_number_as_letter,
-                    trim,
-                    log )
-            rescue ArgumentError, IOError, StandardError => e
-                Util.fatal_error( PRG_NAME, "error: " + e.to_s, STDOUT )
-            rescue Exception => e
-                Util.fatal_error( PRG_NAME, "unexpected exception!: " + e.to_s, STDOUT )
-            end
-
-            puts
-            Util.print_message( PRG_NAME, "extracted a total of " + domain_count.to_s + " domains" )
-            Util.print_message( PRG_NAME, "wrote;               " + outfile )
-            Util.print_message( PRG_NAME, "wrote:               " + outfile + LOG_FILE_SUFFIX )
-            Util.print_message( PRG_NAME, "(wrote:              " + outfile + PASSED_SEQS_SUFFIX + ")" )
-            Util.print_message( PRG_NAME, "(wrote:              " + outfile + FAILED_SEQS_SUFFIX + ")" )
-
-            begin
-                f = File.open( outfile + LOG_FILE_SUFFIX, 'a' )
-                f.print( log )
-                f.close
-            rescue Exception => e
-                Util.fatal_error( PRG_NAME, "error: " + e.to_s )
-            end
-
-            puts
-            Util.print_message( PRG_NAME, "OK" )
-            puts
-
-        end
-
-        def print_help()
-            puts()
-            puts( "Usage:" )
-            puts()
-            puts( "  " + PRG_NAME + ".rb [options] <hmmsearch outputfile> <file containing complete sequences in fasta format> <outputfile>" )
-            puts()
-            puts( "  options: -" + E_VALUE_THRESHOLD_OPTION  + "=<f>: E-value threshold, default is no threshold" )
-            puts( "           -" + LENGTH_THRESHOLD_OPTION   + "=<i>: length threshold, default is no threshold" )
-            puts( "           -" + ADD_POSITION_OPTION  + ": to add positions (rel to complete seq) to extracted domains" )
-            puts( "           -" + ADD_DOMAIN_NUMBER_OPTION  + ": to add numbers to extracted domains (in case of more than one domain per complete seq) (example \"domain~2-3\")" )
-            puts( "           -" + ADD_DOMAIN_NUMBER_OPTION_AS_DIGIT  + ": to add numbers to extracted domains as digit (example \"domain2\")" )
-            puts( "           -" + ADD_DOMAIN_NUMBER_OPTION_AS_LETTER  + ": to add numbers to extracted domains as letter (example \"domaina\")" )
-            puts( "           -" + TRIM_OPTION  + ": to remove the last 2 characters from sequence names" )
-            puts()
-        end
-
-    end # class DomainSequenceExtractor
-
-end # module Evoruby