X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fruby%2Fevoruby%2Flib%2Fevo%2Ftool%2Fmsa_processor.rb;h=d9522779393e558a33478c6a591d916757ec76e5;hb=b2b6a9c2cec8e4b397851121b230a233aa24b1d3;hp=6299417d5bf83aad279b0a710c695e779f9666e2;hpb=dbf5b588d65d1c62094dd5d339eca5056a5ade5f;p=jalview.git diff --git a/forester/ruby/evoruby/lib/evo/tool/msa_processor.rb b/forester/ruby/evoruby/lib/evo/tool/msa_processor.rb index 6299417..d952277 100644 --- a/forester/ruby/evoruby/lib/evo/tool/msa_processor.rb +++ b/forester/ruby/evoruby/lib/evo/tool/msa_processor.rb @@ -27,9 +27,9 @@ module Evoruby class MsaProcessor PRG_NAME = "msa_pro" - PRG_DATE = "2012.05.11" + PRG_DATE = "131112" PRG_DESC = "processing of multiple sequence alignments" - PRG_VERSION = "1.06" + PRG_VERSION = "1.08" COPYRIGHT = "2008-2010 Christian M Zmasek" CONTACT = "phylosoft@gmail.com" WWW = "www.phylosoft.org" @@ -40,6 +40,7 @@ module Evoruby INPUT_TYPE_OPTION = "i" OUTPUT_TYPE_OPTION = "o" MAXIMAL_NAME_LENGTH_OPTION = "n" + DIE_IF_NAME_TOO_LONG = "d" WIDTH_OPTION = "w" CLEAN_UP_SEQ_OPTION = "c" REM_RED_OPTION = "rem_red" @@ -78,6 +79,7 @@ module Evoruby @rgoc = false @rg = false # fasta only @rem_red = false + @die_if_name_too_long = false @rgr = -1 @rsgr = -1 @rsl = -1 @@ -148,6 +150,7 @@ module Evoruby allowed_opts.push( REM_RED_OPTION ) allowed_opts.push( KEEP_MATCHING_SEQUENCES_OPTION ) allowed_opts.push( REMOVE_MATCHING_SEQUENCES_OPTION ) + allowed_opts.push( DIE_IF_NAME_TOO_LONG ) disallowed = cla.validate_allowed_options_as_str( allowed_opts ) if ( disallowed.length > 0 ) @@ -162,13 +165,13 @@ module Evoruby begin Util.check_file_for_readability( input ) - rescue ArgumentError => e + rescue IOError => e Util.fatal_error( PRG_NAME, "error: " + e.to_s ) end begin Util.check_file_for_writability( output ) - rescue ArgumentError => e + rescue IOError => e Util.fatal_error( PRG_NAME, "error: " + e.to_s ) end @@ -298,8 +301,8 @@ module Evoruby Util.print_message( PRG_NAME, "Gap-proportion of original alignment : " + gp.to_s ) log << "Gap-proportion of original alignment : " + gp.to_s + ld else - Util.print_message( PRG_NAME, "the input is not aligned" ) - log << "The input is not aligned" + ld + Util.print_message( PRG_NAME, "Input is not aligned" ) + log << "Input is not aligned" + ld end all_names = Set.new() @@ -481,8 +484,32 @@ module Evoruby Util.print_message( PRG_NAME, "Gap-proportion of processed alignment: " + gp.to_s ) log << "Gap-proportion of processed alignment: " + gp.to_s + ld else - Util.print_warning_message( PRG_NAME, "output is not aligned" ) - log << "output is not aligned" + ld + min = 0 + max = 0 + sum = 0 + first = true + for s in 0 ... msa.get_number_of_seqs + seq = msa.get_sequence( s ) + l = seq.get_length + sum += l + if l > max + max = l + end + if first || l < min + min = l + end + first = false + end + avg = sum / msa.get_number_of_seqs + Util.print_message( PRG_NAME, "Output is not aligned" ) + log << "Output is not aligned" + ld + Util.print_message( PRG_NAME, "Shortest sequence : " + min.to_s ) + log << "Shortest sequence : " + min.to_s + ld + Util.print_message( PRG_NAME, "Longest sequence : " + max.to_s ) + log << "Longest sequence : " + max.to_s + ld + Util.print_message( PRG_NAME, "Average length : " + avg.to_s ) + log << "Average length : " + avg.to_s + ld + end end @@ -511,6 +538,7 @@ module Evoruby w = PhylipSequentialWriter.new() w.clean( @clean ) w.set_max_name_length( @name_length ) + w.set_exception_if_name_too_long( @die_if_name_too_long ) elsif( @fasta_output ) w = FastaWriter.new() w.set_line_width( @width ) @@ -522,20 +550,24 @@ module Evoruby w.clean( @clean ) if ( @name_length_set ) w.set_max_name_length( @name_length ) + w.set_exception_if_name_too_long( @die_if_name_too_long ) end elsif( @nexus_output ) w = NexusWriter.new() w.clean( @clean ) w.set_max_name_length( @name_length ) + w.set_exception_if_name_too_long( @die_if_name_too_long ) end - begin io.write_to_file( msa, output, w ) rescue Exception => e Util.fatal_error( PRG_NAME, "error: " + e.to_s ) end + Util.print_message( PRG_NAME, "Number of sequences in output : " + msa.get_number_of_seqs.to_s ) + log << "Number of sequences in output : " + msa.get_number_of_seqs.to_s + ld + begin f = File.open( output + LOG_SUFFIX, 'a' ) f.print( log ) @@ -661,6 +693,7 @@ module Evoruby @last = -1 end end + def analyze_command_line( cla ) if ( cla.is_option_set?( INPUT_TYPE_OPTION ) ) begin @@ -806,6 +839,9 @@ module Evoruby Util.fatal_error( PRG_NAME, "error: " + e.to_s, STDOUT ) end end + if ( cla.is_option_set?( DIE_IF_NAME_TOO_LONG ) ) + @die_if_name_too_long = true + end end @@ -819,6 +855,7 @@ module Evoruby puts( " options: -" + INPUT_TYPE_OPTION + "=: f for fasta, p for phylip selex type" ) puts( " -" + OUTPUT_TYPE_OPTION + "=: f for fasta, n for nexus, p for phylip sequential (default)" ) puts( " -" + MAXIMAL_NAME_LENGTH_OPTION + "=: n=maximal name length (default for phylip 10, for fasta: unlimited )" ) + puts( " -" + DIE_IF_NAME_TOO_LONG + ": die if sequence name too long" ) puts( " -" + WIDTH_OPTION + "=: n=width (fasta output only, default is 60)" ) puts( " -" + CLEAN_UP_SEQ_OPTION + ": clean up sequences" ) puts( " -" + REMOVE_GAP_COLUMNS_OPTION + ": remove gap columns" )