X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fruby%2Fevoruby%2Flib%2Fevo%2Ftool%2Fmsa_processor.rb;h=d9522779393e558a33478c6a591d916757ec76e5;hb=b2b6a9c2cec8e4b397851121b230a233aa24b1d3;hp=6299417d5bf83aad279b0a710c695e779f9666e2;hpb=dbf5b588d65d1c62094dd5d339eca5056a5ade5f;p=jalview.git
diff --git a/forester/ruby/evoruby/lib/evo/tool/msa_processor.rb b/forester/ruby/evoruby/lib/evo/tool/msa_processor.rb
index 6299417..d952277 100644
--- a/forester/ruby/evoruby/lib/evo/tool/msa_processor.rb
+++ b/forester/ruby/evoruby/lib/evo/tool/msa_processor.rb
@@ -27,9 +27,9 @@ module Evoruby
class MsaProcessor
PRG_NAME = "msa_pro"
- PRG_DATE = "2012.05.11"
+ PRG_DATE = "131112"
PRG_DESC = "processing of multiple sequence alignments"
- PRG_VERSION = "1.06"
+ PRG_VERSION = "1.08"
COPYRIGHT = "2008-2010 Christian M Zmasek"
CONTACT = "phylosoft@gmail.com"
WWW = "www.phylosoft.org"
@@ -40,6 +40,7 @@ module Evoruby
INPUT_TYPE_OPTION = "i"
OUTPUT_TYPE_OPTION = "o"
MAXIMAL_NAME_LENGTH_OPTION = "n"
+ DIE_IF_NAME_TOO_LONG = "d"
WIDTH_OPTION = "w"
CLEAN_UP_SEQ_OPTION = "c"
REM_RED_OPTION = "rem_red"
@@ -78,6 +79,7 @@ module Evoruby
@rgoc = false
@rg = false # fasta only
@rem_red = false
+ @die_if_name_too_long = false
@rgr = -1
@rsgr = -1
@rsl = -1
@@ -148,6 +150,7 @@ module Evoruby
allowed_opts.push( REM_RED_OPTION )
allowed_opts.push( KEEP_MATCHING_SEQUENCES_OPTION )
allowed_opts.push( REMOVE_MATCHING_SEQUENCES_OPTION )
+ allowed_opts.push( DIE_IF_NAME_TOO_LONG )
disallowed = cla.validate_allowed_options_as_str( allowed_opts )
if ( disallowed.length > 0 )
@@ -162,13 +165,13 @@ module Evoruby
begin
Util.check_file_for_readability( input )
- rescue ArgumentError => e
+ rescue IOError => e
Util.fatal_error( PRG_NAME, "error: " + e.to_s )
end
begin
Util.check_file_for_writability( output )
- rescue ArgumentError => e
+ rescue IOError => e
Util.fatal_error( PRG_NAME, "error: " + e.to_s )
end
@@ -298,8 +301,8 @@ module Evoruby
Util.print_message( PRG_NAME, "Gap-proportion of original alignment : " + gp.to_s )
log << "Gap-proportion of original alignment : " + gp.to_s + ld
else
- Util.print_message( PRG_NAME, "the input is not aligned" )
- log << "The input is not aligned" + ld
+ Util.print_message( PRG_NAME, "Input is not aligned" )
+ log << "Input is not aligned" + ld
end
all_names = Set.new()
@@ -481,8 +484,32 @@ module Evoruby
Util.print_message( PRG_NAME, "Gap-proportion of processed alignment: " + gp.to_s )
log << "Gap-proportion of processed alignment: " + gp.to_s + ld
else
- Util.print_warning_message( PRG_NAME, "output is not aligned" )
- log << "output is not aligned" + ld
+ min = 0
+ max = 0
+ sum = 0
+ first = true
+ for s in 0 ... msa.get_number_of_seqs
+ seq = msa.get_sequence( s )
+ l = seq.get_length
+ sum += l
+ if l > max
+ max = l
+ end
+ if first || l < min
+ min = l
+ end
+ first = false
+ end
+ avg = sum / msa.get_number_of_seqs
+ Util.print_message( PRG_NAME, "Output is not aligned" )
+ log << "Output is not aligned" + ld
+ Util.print_message( PRG_NAME, "Shortest sequence : " + min.to_s )
+ log << "Shortest sequence : " + min.to_s + ld
+ Util.print_message( PRG_NAME, "Longest sequence : " + max.to_s )
+ log << "Longest sequence : " + max.to_s + ld
+ Util.print_message( PRG_NAME, "Average length : " + avg.to_s )
+ log << "Average length : " + avg.to_s + ld
+
end
end
@@ -511,6 +538,7 @@ module Evoruby
w = PhylipSequentialWriter.new()
w.clean( @clean )
w.set_max_name_length( @name_length )
+ w.set_exception_if_name_too_long( @die_if_name_too_long )
elsif( @fasta_output )
w = FastaWriter.new()
w.set_line_width( @width )
@@ -522,20 +550,24 @@ module Evoruby
w.clean( @clean )
if ( @name_length_set )
w.set_max_name_length( @name_length )
+ w.set_exception_if_name_too_long( @die_if_name_too_long )
end
elsif( @nexus_output )
w = NexusWriter.new()
w.clean( @clean )
w.set_max_name_length( @name_length )
+ w.set_exception_if_name_too_long( @die_if_name_too_long )
end
-
begin
io.write_to_file( msa, output, w )
rescue Exception => e
Util.fatal_error( PRG_NAME, "error: " + e.to_s )
end
+ Util.print_message( PRG_NAME, "Number of sequences in output : " + msa.get_number_of_seqs.to_s )
+ log << "Number of sequences in output : " + msa.get_number_of_seqs.to_s + ld
+
begin
f = File.open( output + LOG_SUFFIX, 'a' )
f.print( log )
@@ -661,6 +693,7 @@ module Evoruby
@last = -1
end
end
+
def analyze_command_line( cla )
if ( cla.is_option_set?( INPUT_TYPE_OPTION ) )
begin
@@ -806,6 +839,9 @@ module Evoruby
Util.fatal_error( PRG_NAME, "error: " + e.to_s, STDOUT )
end
end
+ if ( cla.is_option_set?( DIE_IF_NAME_TOO_LONG ) )
+ @die_if_name_too_long = true
+ end
end
@@ -819,6 +855,7 @@ module Evoruby
puts( " options: -" + INPUT_TYPE_OPTION + "=: f for fasta, p for phylip selex type" )
puts( " -" + OUTPUT_TYPE_OPTION + "=