X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fruby%2Fevoruby%2Flib%2Fevo%2Ftool%2Fmsa_processor.rb;h=57b5336615173a376eaddb6e322ead82c0528cfd;hb=2a165043be7f54dc4445bf4332a1af5283711fdf;hp=738db56274859619742fa10fee034df49c73fa4d;hpb=2044a46819969b8ac56d653aab5729fb782ec40c;p=jalview.git diff --git a/forester/ruby/evoruby/lib/evo/tool/msa_processor.rb b/forester/ruby/evoruby/lib/evo/tool/msa_processor.rb index 738db56..57b5336 100644 --- a/forester/ruby/evoruby/lib/evo/tool/msa_processor.rb +++ b/forester/ruby/evoruby/lib/evo/tool/msa_processor.rb @@ -9,7 +9,6 @@ require 'date' require 'set' - require 'lib/evo/util/constants' require 'lib/evo/util/util' require 'lib/evo/util/command_line_arguments' @@ -23,17 +22,13 @@ require 'lib/evo/io/parser/general_msa_parser' require 'lib/evo/io/writer/msa_writer' module Evoruby - class MsaProcessor PRG_NAME = "msa_pro" - PRG_DATE = "130411" + PRG_DATE = "170215" PRG_DESC = "processing of multiple sequence alignments" - PRG_VERSION = "1.07" - COPYRIGHT = "2008-2010 Christian M Zmasek" - CONTACT = "phylosoft@gmail.com" - WWW = "www.phylosoft.org" - + PRG_VERSION = "1.08" + WWW = "https://sites.google.com/site/cmzmasek/home/software/forester" NAME_LENGTH_DEFAULT = 10 WIDTH_DEFAULT_FASTA = 60 @@ -61,8 +56,6 @@ module Evoruby LOG_SUFFIX = "_msa_pro.log" HELP_OPTION_1 = "help" HELP_OPTION_2 = "h" - - def initialize() @input_format_set = false @output_format_set = false @@ -95,17 +88,14 @@ module Evoruby @last = -1 end - def run() Util.print_program_information( PRG_NAME, - PRG_VERSION, - PRG_DESC, - PRG_DATE, - COPYRIGHT, - CONTACT, - WWW, - STDOUT ) + PRG_VERSION, + PRG_DESC, + PRG_DATE, + WWW, + STDOUT ) if ( ARGV == nil || ARGV.length < 1 ) Util.print_message( PRG_NAME, "Illegal number of arguments" ) @@ -120,7 +110,7 @@ module Evoruby end if ( cla.is_option_set?( HELP_OPTION_1 ) || - cla.is_option_set?( HELP_OPTION_2 ) ) + cla.is_option_set?( HELP_OPTION_2 ) ) print_help exit( 0 ) end @@ -155,7 +145,7 @@ module Evoruby disallowed = cla.validate_allowed_options_as_str( allowed_opts ) if ( disallowed.length > 0 ) Util.fatal_error( PRG_NAME, - "unknown option(s): " + disallowed ) + "unknown option(s): " + disallowed ) end input = cla.get_file_name( 0 ) @@ -301,8 +291,8 @@ module Evoruby Util.print_message( PRG_NAME, "Gap-proportion of original alignment : " + gp.to_s ) log << "Gap-proportion of original alignment : " + gp.to_s + ld else - Util.print_message( PRG_NAME, "the input is not aligned" ) - log << "The input is not aligned" + ld + Util.print_message( PRG_NAME, "Input is not aligned" ) + log << "Input is not aligned" + ld end all_names = Set.new() @@ -430,8 +420,6 @@ module Evoruby msa = sort( msa ) end - - if ( @split > 0 ) begin msas = msa.split( @split, true ) @@ -484,8 +472,32 @@ module Evoruby Util.print_message( PRG_NAME, "Gap-proportion of processed alignment: " + gp.to_s ) log << "Gap-proportion of processed alignment: " + gp.to_s + ld else - Util.print_warning_message( PRG_NAME, "output is not aligned" ) - log << "output is not aligned" + ld + min = 0 + max = 0 + sum = 0 + first = true + for s in 0 ... msa.get_number_of_seqs + seq = msa.get_sequence( s ) + l = seq.get_length + sum += l + if l > max + max = l + end + if first || l < min + min = l + end + first = false + end + avg = sum / msa.get_number_of_seqs + Util.print_message( PRG_NAME, "Output is not aligned" ) + log << "Output is not aligned" + ld + Util.print_message( PRG_NAME, "Shortest sequence : " + min.to_s ) + log << "Shortest sequence : " + min.to_s + ld + Util.print_message( PRG_NAME, "Longest sequence : " + max.to_s ) + log << "Longest sequence : " + max.to_s + ld + Util.print_message( PRG_NAME, "Average length : " + avg.to_s ) + log << "Average length : " + avg.to_s + ld + end end @@ -494,8 +506,8 @@ module Evoruby if removed.size > 0 identicals = msa.get_identical_seqs_detected log << "the following " + identicals.size.to_s + " sequences are identical:" + ld - identicals.each { | s | - log << s + ld + identicals.each { | identical | + log << identical + ld } log << "ignoring the following " + removed.size.to_s + " redundant sequences:" + ld removed.each { | seq_name | @@ -535,13 +547,15 @@ module Evoruby w.set_exception_if_name_too_long( @die_if_name_too_long ) end - begin io.write_to_file( msa, output, w ) rescue Exception => e Util.fatal_error( PRG_NAME, "error: " + e.to_s ) end + Util.print_message( PRG_NAME, "Number of sequences in output : " + msa.get_number_of_seqs.to_s ) + log << "Number of sequences in output : " + msa.get_number_of_seqs.to_s + ld + begin f = File.open( output + LOG_SUFFIX, 'a' ) f.print( log ) @@ -550,13 +564,11 @@ module Evoruby Util.fatal_error( PRG_NAME, "error: " + e.to_s ) end - end Util.print_message( PRG_NAME, "OK" ) puts end - private def sort( msa ) @@ -578,77 +590,94 @@ module Evoruby @fasta_input = fi @input_format_set = true end + def set_phylip_input( pi = true ) @phylip_input = pi @input_format_set = true end + def set_name_length( i ) @name_length = i @name_length_set = true end + def set_width( i ) @width = i end + def set_fasta_output( fo = true ) @fasta_output = fo @output_format_set = true end + def set_pi_output( pso = true ) @pi_output = pso @output_format_set = true end + def set_nexus_output( nexus = true ) @nexus_output = nexus @output_format_set = true end + def set_clean( c = true ) @clean = c end + def set_remove_gap_columns( rgc = true ) @rgc = rgc end + def set_remove_gap_only_columns( rgoc = true ) @rgoc = rgoc end + def set_remove_gaps( rg = true ) @rg = rg end + def set_remove_gap_ratio( rgr ) @rgr = rgr end + def set_remove_seqs_gap_ratio( rsgr ) @rsgr = rsgr end + def set_remove_seqs_min_non_gap_length( rsl ) @rsl = rsl end + def set_remove_seqs( file ) @seqs_name_file = file @remove_seqs = true @keep_seqs = false end + def set_keep_seqs( file ) @seqs_name_file = file @keep_seqs = true @remove_seqs = false end + def set_trim( first, last ) @trim = true @first = first @last = last end + def set_remove_matching( remove ) @remove_matching = remove end + def set_keep_matching( keep ) @keep_matching = keep end + def set_rem_red( rr ) @rem_red = rr end - - def set_split( s ) if ( s > 0 ) @split = s @@ -817,7 +846,6 @@ module Evoruby @die_if_name_too_long = true end - end def print_help() @@ -849,11 +877,6 @@ module Evoruby puts() end - - - - end # class MsaProcessor - end # module Evoruby