require 'date'
require 'set'
-
require 'lib/evo/util/constants'
require 'lib/evo/util/util'
require 'lib/evo/util/command_line_arguments'
require 'lib/evo/io/writer/msa_writer'
module Evoruby
-
class MsaProcessor
PRG_NAME = "msa_pro"
- PRG_DATE = "130411"
+ PRG_DATE = "170215"
PRG_DESC = "processing of multiple sequence alignments"
- PRG_VERSION = "1.07"
- COPYRIGHT = "2008-2010 Christian M Zmasek"
- CONTACT = "phylosoft@gmail.com"
- WWW = "www.phylosoft.org"
-
+ PRG_VERSION = "1.08"
+ WWW = "https://sites.google.com/site/cmzmasek/home/software/forester"
NAME_LENGTH_DEFAULT = 10
WIDTH_DEFAULT_FASTA = 60
LOG_SUFFIX = "_msa_pro.log"
HELP_OPTION_1 = "help"
HELP_OPTION_2 = "h"
-
-
def initialize()
@input_format_set = false
@output_format_set = false
@last = -1
end
-
def run()
Util.print_program_information( PRG_NAME,
- PRG_VERSION,
- PRG_DESC,
- PRG_DATE,
- COPYRIGHT,
- CONTACT,
- WWW,
- STDOUT )
+ PRG_VERSION,
+ PRG_DESC,
+ PRG_DATE,
+ WWW,
+ STDOUT )
if ( ARGV == nil || ARGV.length < 1 )
Util.print_message( PRG_NAME, "Illegal number of arguments" )
end
if ( cla.is_option_set?( HELP_OPTION_1 ) ||
- cla.is_option_set?( HELP_OPTION_2 ) )
+ cla.is_option_set?( HELP_OPTION_2 ) )
print_help
exit( 0 )
end
disallowed = cla.validate_allowed_options_as_str( allowed_opts )
if ( disallowed.length > 0 )
Util.fatal_error( PRG_NAME,
- "unknown option(s): " + disallowed )
+ "unknown option(s): " + disallowed )
end
input = cla.get_file_name( 0 )
Util.print_message( PRG_NAME, "Gap-proportion of original alignment : " + gp.to_s )
log << "Gap-proportion of original alignment : " + gp.to_s + ld
else
- Util.print_message( PRG_NAME, "the input is not aligned" )
- log << "The input is not aligned" + ld
+ Util.print_message( PRG_NAME, "Input is not aligned" )
+ log << "Input is not aligned" + ld
end
all_names = Set.new()
msa = sort( msa )
end
-
-
if ( @split > 0 )
begin
msas = msa.split( @split, true )
Util.print_message( PRG_NAME, "Gap-proportion of processed alignment: " + gp.to_s )
log << "Gap-proportion of processed alignment: " + gp.to_s + ld
else
- Util.print_warning_message( PRG_NAME, "output is not aligned" )
- log << "output is not aligned" + ld
+ min = 0
+ max = 0
+ sum = 0
+ first = true
+ for s in 0 ... msa.get_number_of_seqs
+ seq = msa.get_sequence( s )
+ l = seq.get_length
+ sum += l
+ if l > max
+ max = l
+ end
+ if first || l < min
+ min = l
+ end
+ first = false
+ end
+ avg = sum / msa.get_number_of_seqs
+ Util.print_message( PRG_NAME, "Output is not aligned" )
+ log << "Output is not aligned" + ld
+ Util.print_message( PRG_NAME, "Shortest sequence : " + min.to_s )
+ log << "Shortest sequence : " + min.to_s + ld
+ Util.print_message( PRG_NAME, "Longest sequence : " + max.to_s )
+ log << "Longest sequence : " + max.to_s + ld
+ Util.print_message( PRG_NAME, "Average length : " + avg.to_s )
+ log << "Average length : " + avg.to_s + ld
+
end
end
if removed.size > 0
identicals = msa.get_identical_seqs_detected
log << "the following " + identicals.size.to_s + " sequences are identical:" + ld
- identicals.each { | s |
- log << s + ld
+ identicals.each { | identical |
+ log << identical + ld
}
log << "ignoring the following " + removed.size.to_s + " redundant sequences:" + ld
removed.each { | seq_name |
w.set_exception_if_name_too_long( @die_if_name_too_long )
end
-
begin
io.write_to_file( msa, output, w )
rescue Exception => e
Util.fatal_error( PRG_NAME, "error: " + e.to_s )
end
+ Util.print_message( PRG_NAME, "Number of sequences in output : " + msa.get_number_of_seqs.to_s )
+ log << "Number of sequences in output : " + msa.get_number_of_seqs.to_s + ld
+
begin
f = File.open( output + LOG_SUFFIX, 'a' )
f.print( log )
Util.fatal_error( PRG_NAME, "error: " + e.to_s )
end
-
end
Util.print_message( PRG_NAME, "OK" )
puts
end
-
private
def sort( msa )
@fasta_input = fi
@input_format_set = true
end
+
def set_phylip_input( pi = true )
@phylip_input = pi
@input_format_set = true
end
+
def set_name_length( i )
@name_length = i
@name_length_set = true
end
+
def set_width( i )
@width = i
end
+
def set_fasta_output( fo = true )
@fasta_output = fo
@output_format_set = true
end
+
def set_pi_output( pso = true )
@pi_output = pso
@output_format_set = true
end
+
def set_nexus_output( nexus = true )
@nexus_output = nexus
@output_format_set = true
end
+
def set_clean( c = true )
@clean = c
end
+
def set_remove_gap_columns( rgc = true )
@rgc = rgc
end
+
def set_remove_gap_only_columns( rgoc = true )
@rgoc = rgoc
end
+
def set_remove_gaps( rg = true )
@rg = rg
end
+
def set_remove_gap_ratio( rgr )
@rgr = rgr
end
+
def set_remove_seqs_gap_ratio( rsgr )
@rsgr = rsgr
end
+
def set_remove_seqs_min_non_gap_length( rsl )
@rsl = rsl
end
+
def set_remove_seqs( file )
@seqs_name_file = file
@remove_seqs = true
@keep_seqs = false
end
+
def set_keep_seqs( file )
@seqs_name_file = file
@keep_seqs = true
@remove_seqs = false
end
+
def set_trim( first, last )
@trim = true
@first = first
@last = last
end
+
def set_remove_matching( remove )
@remove_matching = remove
end
+
def set_keep_matching( keep )
@keep_matching = keep
end
+
def set_rem_red( rr )
@rem_red = rr
end
-
-
def set_split( s )
if ( s > 0 )
@split = s
@die_if_name_too_long = true
end
-
end
def print_help()
puts()
end
-
-
-
-
end # class MsaProcessor
-
end # module Evoruby