2 # = lib/evo/apps/msa_processor.rb - MsaProcessor class
4 # Copyright:: Copyright (C) 2006-2007 Christian M. Zmasek
5 # License:: GNU Lesser General Public License (LGPL)
7 # $Id: msa_processor.rb,v 1.33 2010/12/13 19:00:10 cmzmasek Exp $
13 require 'lib/evo/util/constants'
14 require 'lib/evo/util/util'
15 require 'lib/evo/util/command_line_arguments'
16 require 'lib/evo/msa/msa_factory'
17 require 'lib/evo/io/msa_io'
18 require 'lib/evo/io/writer/phylip_sequential_writer'
19 require 'lib/evo/io/writer/nexus_writer'
20 require 'lib/evo/io/writer/fasta_writer'
21 require 'lib/evo/io/parser/fasta_parser'
22 require 'lib/evo/io/parser/general_msa_parser'
23 require 'lib/evo/io/writer/msa_writer'
31 PRG_DESC = "processing of multiple sequence alignments"
33 COPYRIGHT = "2008-2010 Christian M Zmasek"
34 CONTACT = "phylosoft@gmail.com"
35 WWW = "www.phylosoft.org"
38 NAME_LENGTH_DEFAULT = 10
39 WIDTH_DEFAULT_FASTA = 60
40 INPUT_TYPE_OPTION = "i"
41 OUTPUT_TYPE_OPTION = "o"
42 MAXIMAL_NAME_LENGTH_OPTION = "n"
43 DIE_IF_NAME_TOO_LONG = "d"
45 CLEAN_UP_SEQ_OPTION = "c"
46 REM_RED_OPTION = "rem_red"
47 REMOVE_GAP_COLUMNS_OPTION = "rgc"
48 REMOVE_GAP_ONLY_COLUMNS = "rgoc"
49 REMOVE_COLUMNS_GAP_RATIO_OPTION = "rr"
50 REMOVE_ALL_GAP_CHARACTERS_OPTION = "rg"
51 REMOVE_ALL_SEQUENCES_LISTED_OPTION = "r"
52 KEEP_ONLY_SEQUENCES_LISTED_OPTION = "k"
54 KEEP_MATCHING_SEQUENCES_OPTION = "mk"
55 REMOVE_MATCHING_SEQUENCES_OPTION = "mr"
58 REMOVE_SEQS_GAP_RATIO_OPTION = "rsgr"
59 REMOVE_SEQS_NON_GAP_LENGTH_OPTION = "rsl"
61 LOG_SUFFIX = "_msa_pro.log"
62 HELP_OPTION_1 = "help"
67 @input_format_set = false
68 @output_format_set = false
71 @name_length = NAME_LENGTH_DEFAULT
72 @name_length_set = false
73 @width = WIDTH_DEFAULT_FASTA # fasta only
77 @clean = false # phylip only
80 @rg = false # fasta only
82 @die_if_name_too_long = false
86 @remove_matching = nil
101 Util.print_program_information( PRG_NAME,
110 if ( ARGV == nil || ARGV.length < 1 )
111 Util.print_message( PRG_NAME, "Illegal number of arguments" )
117 cla = CommandLineArguments.new( ARGV )
118 rescue ArgumentError => e
119 Util.fatal_error( PRG_NAME, "Error: " + e.to_s, STDOUT )
122 if ( cla.is_option_set?( HELP_OPTION_1 ) ||
123 cla.is_option_set?( HELP_OPTION_2 ) )
128 if ( cla.get_number_of_files != 2 || ARGV.length < 2 )
129 Util.print_message( PRG_NAME, "Illegal number of arguments" )
134 allowed_opts = Array.new
135 allowed_opts.push( INPUT_TYPE_OPTION )
136 allowed_opts.push( OUTPUT_TYPE_OPTION )
137 allowed_opts.push( MAXIMAL_NAME_LENGTH_OPTION )
138 allowed_opts.push( WIDTH_OPTION )
139 allowed_opts.push( CLEAN_UP_SEQ_OPTION )
140 allowed_opts.push( REMOVE_GAP_COLUMNS_OPTION )
141 allowed_opts.push( REMOVE_GAP_ONLY_COLUMNS )
142 allowed_opts.push( REMOVE_COLUMNS_GAP_RATIO_OPTION )
143 allowed_opts.push( REMOVE_ALL_GAP_CHARACTERS_OPTION )
144 allowed_opts.push( REMOVE_ALL_SEQUENCES_LISTED_OPTION )
145 allowed_opts.push( KEEP_ONLY_SEQUENCES_LISTED_OPTION )
146 allowed_opts.push( TRIM_OPTION )
147 allowed_opts.push( REMOVE_SEQS_GAP_RATIO_OPTION )
148 allowed_opts.push( REMOVE_SEQS_NON_GAP_LENGTH_OPTION )
149 allowed_opts.push( SPLIT )
150 allowed_opts.push( REM_RED_OPTION )
151 allowed_opts.push( KEEP_MATCHING_SEQUENCES_OPTION )
152 allowed_opts.push( REMOVE_MATCHING_SEQUENCES_OPTION )
153 allowed_opts.push( DIE_IF_NAME_TOO_LONG )
155 disallowed = cla.validate_allowed_options_as_str( allowed_opts )
156 if ( disallowed.length > 0 )
157 Util.fatal_error( PRG_NAME,
158 "unknown option(s): " + disallowed )
161 input = cla.get_file_name( 0 )
162 output = cla.get_file_name( 1 )
164 analyze_command_line( cla )
167 Util.check_file_for_readability( input )
169 Util.fatal_error( PRG_NAME, "error: " + e.to_s )
173 Util.check_file_for_writability( output )
175 Util.fatal_error( PRG_NAME, "error: " + e.to_s )
179 set_pi_output( false )
180 set_fasta_output( true )
181 set_nexus_output( false )
184 if ( !@input_format_set )
187 fasta_like = Util.looks_like_fasta?( input )
188 rescue ArgumentError => e
189 Util.fatal_error( PRG_NAME, "error: " + e.to_s )
191 @fasta_input = fasta_like
192 @phylip_input = !fasta_like
193 if ( !@output_format_set )
194 @fasta_output = fasta_like
195 @pi_output = !fasta_like
196 @nexus_output = false
200 ld = Constants::LINE_DELIMITER
201 log = PRG_NAME + " " + PRG_VERSION + " [" + PRG_DATE + "]" + " LOG" + ld
203 log << "Date/time: " + now.to_s + ld
206 puts( "Input alignment : " + input )
207 log << "Input alignment : " + input + ld
208 puts( "Output alignment : " + output )
209 log << "Output alignment : " + output + ld
211 puts( "Input is : Phylip, or something like it" )
212 log << "Input is : Phylip, or something like it" + ld
213 elsif ( @fasta_input )
214 puts( "Input is : Fasta" )
215 log << "Input is : Fasta" + ld
218 puts( "Max col gap ratio: " + @rgr.to_s )
219 log << "Max col gap ratio: " + @rgr.to_s + ld
221 puts( "Remove gap colums" )
222 log << "Remove gap colums" + ld
224 puts( "Remove gap only colums" )
225 log << "Remove gap only colums" + ld
228 puts( "Clean up : true" )
229 log << "Clean up : true" + ld
233 puts( "Output is : Phylip interleaved" )
234 log << "Output is : Phylip interleaved" + ld
235 elsif ( @fasta_output )
236 puts( "Output is : Fasta" )
237 log << "Output is : Fasta" + ld
239 puts( "Width : " + @width.to_s )
240 log << "Width : " + @width.to_s + ld
243 puts( "Remove all gap characters (alignment is destroyed)" )
244 log << "Remove all gap characters (alignment is destroyed)" + ld
246 elsif ( @nexus_output )
247 puts( "Output is : Nexus" )
248 log << "Output is : Nexus" + ld
250 if ( @name_length_set || !@fasta_output )
251 puts( "Max name length : " + @name_length.to_s )
252 log << "Max name length : " + @name_length.to_s + ld
255 puts( "Remove sequences for which the gap ratio > " + @rsgr.to_s )
256 log << "Remove sequences for which the gap ratio > " + @rsgr.to_s + ld
259 puts( "Remove sequences with less than " + @rsl.to_s + " non-gap characters" )
260 log << "Remove sequences with less than " + @rsl.to_s + " non-gap characters" + ld
263 puts( "Remove sequences listed in: " + @seqs_name_file )
264 log << "Remove sequences listed in: " + @seqs_name_file + ld
266 puts( "Keep only sequences listed in: " + @seqs_name_file )
267 log << "Keep only sequences listed in: " + @seqs_name_file + ld
270 puts( "Keep only columns from: "+ @first.to_s + " to " + @last.to_s )
271 log << "Keep only columns from: "+ @first.to_s + " to " + @last.to_s + ld
274 puts( "Remove redundant sequences: true" )
275 log << "Remove redundant sequences: true" + ld
278 puts( "Split : " + @split.to_s )
279 log << "Split : " + @split.to_s + ld
289 msa = f.create_msa_from_file( input, GeneralMsaParser.new() )
290 elsif ( @fasta_input )
291 msa = f.create_msa_from_file( input, FastaParser.new() )
293 rescue Exception => e
294 Util.fatal_error( PRG_NAME, "error: " + e.to_s, STDOUT )
297 if ( msa.is_aligned() )
298 Util.print_message( PRG_NAME, "Length of original alignment : " + msa.get_length.to_s )
299 log << "Length of original alignment : " + msa.get_length.to_s + ld
300 gp = msa.calculate_gap_proportion
301 Util.print_message( PRG_NAME, "Gap-proportion of original alignment : " + gp.to_s )
302 log << "Gap-proportion of original alignment : " + gp.to_s + ld
304 Util.print_message( PRG_NAME, "the input is not aligned" )
305 log << "The input is not aligned" + ld
308 all_names = Set.new()
309 for i in 0 ... msa.get_number_of_seqs()
310 current_name = msa.get_sequence( i ).get_name
311 if all_names.include?( current_name )
312 Util.print_warning_message( PRG_NAME, "sequence name [" + current_name + "] is not unique" )
314 all_names.add( current_name )
320 if ( @remove_seqs || @keep_seqs )
321 names = Util.file2array( @seqs_name_file, true )
322 if ( names == nil || names.length() < 1 )
323 error_msg = "file \"" + @seqs_name_file.to_s + "\" appears empty"
324 Util.fatal_error( PRG_NAME, error_msg )
329 for i in 0 ... names.length()
330 to_delete = msa.find_by_name( names[ i ], true, false )
331 if ( to_delete.length() < 1 )
332 error_msg = "sequence name \"" + names[ i ] + "\" not found"
333 Util.fatal_error( PRG_NAME, error_msg )
334 elsif ( to_delete.length() > 1 )
335 error_msg = "sequence name \"" + names[ i ] + "\" is not unique"
336 Util.fatal_error( PRG_NAME, error_msg )
338 msa.remove_sequence!( to_delete[ 0 ] )
342 Util.print_message( PRG_NAME, "Removed " + c.to_s + " sequences" )
343 log << "Removed " + c.to_s + " sequences" + ld
348 for j in 0 ... msa.get_number_of_seqs()
349 if ( names.include?( msa.get_sequence( j ).get_name() ) )
350 msa_new.add_sequence( msa.get_sequence( j ) )
357 Util.print_message( PRG_NAME, "Kept " + k.to_s + " sequences" )
358 log << "Kept " + k.to_s + " sequences" + ld
359 Util.print_message( PRG_NAME, "Removed " + r.to_s + " sequences" )
360 log << "removed " + r.to_s + " sequences" + ld
365 msa.trim!( @first, @last )
368 msa.remove_gap_columns_w_gap_ratio!( @rgr )
370 msa.remove_gap_columns!()
372 msa.remove_gap_only_columns!()
375 n = msa.get_number_of_seqs()
376 removed = msa.remove_sequences_by_gap_ratio!( @rsgr )
377 k = msa.get_number_of_seqs()
379 Util.print_message( PRG_NAME, "Kept " + k.to_s + " sequences" )
380 log << "Kept " + k.to_s + " sequences" + ld
381 Util.print_message( PRG_NAME, "Removed " + r.to_s + " sequences" )
382 log << "Removed " + r.to_s + " sequences:" + ld
383 removed.each { | seq_name |
384 log << " " + seq_name + ld
388 n = msa.get_number_of_seqs()
389 removed = msa.remove_sequences_by_non_gap_length!( @rsl )
390 k = msa.get_number_of_seqs()
392 Util.print_message( PRG_NAME, "Kept " + k.to_s + " sequences" )
393 log << "Kept " + k.to_s + " sequences" + ld
394 Util.print_message( PRG_NAME, "Removed " + r.to_s + " sequences" )
395 log << "Removed " + r.to_s + " sequences:" + ld
396 removed.each { | seq_name |
397 log << " " + seq_name + ld
400 if ( @keep_matching )
401 n = msa.get_number_of_seqs
402 to_be_removed = Set.new
404 seq = msa.get_sequence( ii )
405 if !seq.get_name.downcase.index( @keep_matching.downcase )
406 to_be_removed.add( ii )
409 to_be_removed_ary = to_be_removed.to_a.sort.reverse
410 to_be_removed_ary.each { | index |
411 msa.remove_sequence!( index )
415 if ( @remove_matching )
416 n = msa.get_number_of_seqs
417 to_be_removed = Set.new
420 seq = msa.get_sequence( iii )
422 if seq.get_name.downcase.index( @remove_matching.downcase )
423 to_be_removed.add( iii )
426 to_be_removed_ary = to_be_removed.to_a.sort.reverse
427 to_be_removed_ary.each { | index |
428 msa.remove_sequence!( index )
437 msas = msa.split( @split, true )
441 w = PhylipSequentialWriter.new()
443 w.set_max_name_length( @name_length )
444 elsif( @fasta_output )
445 w = FastaWriter.new()
446 w.set_line_width( @width )
448 w.remove_gap_chars( true )
449 Util.print_warning_message( PRG_NAME, "removing gap character, the output is likely to become unaligned" )
450 log << "removing gap character, the output is likely to become unaligned" + ld
453 if ( @name_length_set )
454 w.set_max_name_length( @name_length )
456 elsif( @nexus_output )
457 w = NexusWriter.new()
459 w.set_max_name_length( @name_length )
464 io.write_to_file( m, output + "_" + i.to_s, w )
466 Util.print_message( PRG_NAME, "wrote " + msas.length.to_s + " files" )
467 log << "wrote " + msas.length.to_s + " files" + ld
468 rescue Exception => e
469 Util.fatal_error( PRG_NAME, "error: " + e.to_s, STDOUT )
473 rescue Exception => e
474 Util.fatal_error( PRG_NAME, "error: " + e.to_s, STDOUT )
480 if ( msa.is_aligned() )
481 Util.print_message( PRG_NAME, "Length of processed alignment : " + msa.get_length.to_s )
482 log << "Length of processed alignment : " + msa.get_length.to_s + ld
483 gp = msa.calculate_gap_proportion
484 Util.print_message( PRG_NAME, "Gap-proportion of processed alignment: " + gp.to_s )
485 log << "Gap-proportion of processed alignment: " + gp.to_s + ld
487 Util.print_warning_message( PRG_NAME, "output is not aligned" )
488 log << "output is not aligned" + ld
493 removed = msa.remove_redundant_sequences!( true, false )
495 identicals = msa.get_identical_seqs_detected
496 log << "the following " + identicals.size.to_s + " sequences are identical:" + ld
497 identicals.each { | s |
500 log << "ignoring the following " + removed.size.to_s + " redundant sequences:" + ld
501 removed.each { | seq_name |
504 Util.print_message( PRG_NAME, "will store " + msa.get_number_of_seqs.to_s + " non-redundant sequences" )
505 log << "will store " + msa.get_number_of_seqs.to_s + " non-redundant sequences" + ld
514 w = PhylipSequentialWriter.new()
516 w.set_max_name_length( @name_length )
517 w.set_exception_if_name_too_long( @die_if_name_too_long )
518 elsif( @fasta_output )
519 w = FastaWriter.new()
520 w.set_line_width( @width )
522 w.remove_gap_chars( true )
523 Util.print_warning_message( PRG_NAME, "removing gap characters, the output is likely to become unaligned" )
524 log << "removing gap character, the output is likely to become unaligned" + ld
527 if ( @name_length_set )
528 w.set_max_name_length( @name_length )
529 w.set_exception_if_name_too_long( @die_if_name_too_long )
531 elsif( @nexus_output )
532 w = NexusWriter.new()
534 w.set_max_name_length( @name_length )
535 w.set_exception_if_name_too_long( @die_if_name_too_long )
540 io.write_to_file( msa, output, w )
541 rescue Exception => e
542 Util.fatal_error( PRG_NAME, "error: " + e.to_s )
546 f = File.open( output + LOG_SUFFIX, 'a' )
549 rescue Exception => e
550 Util.fatal_error( PRG_NAME, "error: " + e.to_s )
555 Util.print_message( PRG_NAME, "OK" )
564 for i in 0 ... msa.get_number_of_seqs
565 name = msa.get_sequence( i ).get_name
568 sorted_ary = names.to_a.sort
570 sorted_ary.each { | seq_name |
571 seq = msa.get_sequence( msa.find_by_name( seq_name, true, false )[ 0 ] )
572 new_msa.add_sequence( seq )
577 def set_fasta_input( fi = true )
579 @input_format_set = true
581 def set_phylip_input( pi = true )
583 @input_format_set = true
585 def set_name_length( i )
587 @name_length_set = true
592 def set_fasta_output( fo = true )
594 @output_format_set = true
596 def set_pi_output( pso = true )
598 @output_format_set = true
600 def set_nexus_output( nexus = true )
601 @nexus_output = nexus
602 @output_format_set = true
604 def set_clean( c = true )
607 def set_remove_gap_columns( rgc = true )
610 def set_remove_gap_only_columns( rgoc = true )
613 def set_remove_gaps( rg = true )
616 def set_remove_gap_ratio( rgr )
619 def set_remove_seqs_gap_ratio( rsgr )
622 def set_remove_seqs_min_non_gap_length( rsl )
625 def set_remove_seqs( file )
626 @seqs_name_file = file
630 def set_keep_seqs( file )
631 @seqs_name_file = file
635 def set_trim( first, last )
640 def set_remove_matching( remove )
641 @remove_matching = remove
643 def set_keep_matching( keep )
644 @keep_matching = keep
646 def set_rem_red( rr )
655 @clean = false # phylip only
658 @rg = false # fasta only
662 @seqs_name_file = nil
671 def analyze_command_line( cla )
672 if ( cla.is_option_set?( INPUT_TYPE_OPTION ) )
674 type = cla.get_option_value( INPUT_TYPE_OPTION )
676 set_phylip_input( true )
677 set_fasta_input( false )
678 elsif ( type == "f" )
679 set_fasta_input( true )
680 set_phylip_input( false )
682 rescue ArgumentError => e
683 Util.fatal_error( PRG_NAME, "error: " + e.to_s, STDOUT )
686 if ( cla.is_option_set?( OUTPUT_TYPE_OPTION ) )
688 type = cla.get_option_value( OUTPUT_TYPE_OPTION )
690 set_pi_output( true )
691 set_fasta_output( false )
692 set_nexus_output( false )
693 elsif ( type == "f" )
694 set_pi_output( false )
695 set_fasta_output( true )
696 set_nexus_output( false )
697 elsif ( type == "n" )
698 set_pi_output( false )
699 set_fasta_output( false )
700 set_nexus_output( true )
702 rescue ArgumentError => e
703 Util.fatal_error( PRG_NAME, "error: " + e.to_s, STDOUT )
706 if ( cla.is_option_set?( MAXIMAL_NAME_LENGTH_OPTION ) )
708 l = cla.get_option_value_as_int( MAXIMAL_NAME_LENGTH_OPTION )
710 rescue ArgumentError => e
711 Util.fatal_error( PRG_NAME, "error: " + e.to_s, STDOUT )
714 if ( cla.is_option_set?( WIDTH_OPTION ) )
716 w = cla.get_option_value_as_int( WIDTH_OPTION )
718 rescue ArgumentError => e
719 Util.fatal_error( PRG_NAME, "error: " + e.to_s, STDOUT )
722 if ( cla.is_option_set?( CLEAN_UP_SEQ_OPTION ) )
725 if ( cla.is_option_set?( REMOVE_GAP_COLUMNS_OPTION ) )
726 set_remove_gap_columns( true )
728 if ( cla.is_option_set?( REM_RED_OPTION ) )
731 if ( cla.is_option_set?( REMOVE_GAP_ONLY_COLUMNS ) )
732 set_remove_gap_only_columns( true )
734 if ( cla.is_option_set?( REMOVE_ALL_GAP_CHARACTERS_OPTION ) )
735 set_remove_gaps( true )
737 if ( cla.is_option_set?( REMOVE_COLUMNS_GAP_RATIO_OPTION ) )
739 f = cla.get_option_value_as_float( REMOVE_COLUMNS_GAP_RATIO_OPTION )
740 set_remove_gap_ratio( f )
741 rescue ArgumentError => e
742 Util.fatal_error( PRG_NAME, "error: " + e.to_s, STDOUT )
745 if ( cla.is_option_set?( REMOVE_ALL_SEQUENCES_LISTED_OPTION ) )
747 s = cla.get_option_value( REMOVE_ALL_SEQUENCES_LISTED_OPTION )
749 rescue ArgumentError => e
750 Util.fatal_error( PRG_NAME, "error: " + e.to_s, STDOUT )
753 if ( cla.is_option_set?( KEEP_ONLY_SEQUENCES_LISTED_OPTION ) )
755 s = cla.get_option_value( KEEP_ONLY_SEQUENCES_LISTED_OPTION )
757 rescue ArgumentError => e
758 Util.fatal_error( PRG_NAME, "error: " + e.to_s, STDOUT )
761 if ( cla.is_option_set?( TRIM_OPTION ) )
763 s = cla.get_option_value( TRIM_OPTION )
764 if ( s =~ /(\d+)-(\d+)/ )
765 set_trim( $1.to_i(), $2.to_i() )
767 puts( "illegal argument" )
771 rescue ArgumentError => e
772 Util.fatal_error( PRG_NAME, "error: " + e.to_s, STDOUT )
775 if ( cla.is_option_set?( REMOVE_SEQS_GAP_RATIO_OPTION ) )
777 f = cla.get_option_value_as_float( REMOVE_SEQS_GAP_RATIO_OPTION )
778 set_remove_seqs_gap_ratio( f )
779 rescue ArgumentError => e
780 Util.fatal_error( PRG_NAME, "error: " + e.to_s, STDOUT )
783 if ( cla.is_option_set?( REMOVE_SEQS_NON_GAP_LENGTH_OPTION ) )
785 f = cla.get_option_value_as_int( REMOVE_SEQS_NON_GAP_LENGTH_OPTION )
786 set_remove_seqs_min_non_gap_length( f )
787 rescue ArgumentError => e
788 Util.fatal_error( PRG_NAME, "error: " + e.to_s, STDOUT )
791 if ( cla.is_option_set?( SPLIT ) )
793 s = cla.get_option_value_as_int( SPLIT )
795 rescue ArgumentError => e
796 Util.fatal_error( PRG_NAME, "error: " + e.to_s, STDOUT )
800 if ( cla.is_option_set?( REMOVE_MATCHING_SEQUENCES_OPTION ) )
802 s = cla.get_option_value( REMOVE_MATCHING_SEQUENCES_OPTION )
803 set_remove_matching( s )
804 rescue ArgumentError => e
805 Util.fatal_error( PRG_NAME, "error: " + e.to_s, STDOUT )
808 if ( cla.is_option_set?( KEEP_MATCHING_SEQUENCES_OPTION ) )
810 s = cla.get_option_value( KEEP_MATCHING_SEQUENCES_OPTION )
811 set_keep_matching( s )
812 rescue ArgumentError => e
813 Util.fatal_error( PRG_NAME, "error: " + e.to_s, STDOUT )
816 if ( cla.is_option_set?( DIE_IF_NAME_TOO_LONG ) )
817 @die_if_name_too_long = true
827 puts( " " + PRG_NAME + ".rb [options] <input alignment> <output>" )
829 puts( " options: -" + INPUT_TYPE_OPTION + "=<input type>: f for fasta, p for phylip selex type" )
830 puts( " -" + OUTPUT_TYPE_OPTION + "=<output type>: f for fasta, n for nexus, p for phylip sequential (default)" )
831 puts( " -" + MAXIMAL_NAME_LENGTH_OPTION + "=<n>: n=maximal name length (default for phylip 10, for fasta: unlimited )" )
832 puts( " -" + DIE_IF_NAME_TOO_LONG + ": die if sequence name too long" )
833 puts( " -" + WIDTH_OPTION + "=<n>: n=width (fasta output only, default is 60)" )
834 puts( " -" + CLEAN_UP_SEQ_OPTION + ": clean up sequences" )
835 puts( " -" + REMOVE_GAP_COLUMNS_OPTION + ": remove gap columns" )
836 puts( " -" + REMOVE_GAP_ONLY_COLUMNS + ": remove gap-only columns" )
837 puts( " -" + REMOVE_COLUMNS_GAP_RATIO_OPTION + "=<n>: remove columns for which ( seqs with gap / number of sequences > n )" )
838 puts( " -" + REMOVE_ALL_GAP_CHARACTERS_OPTION + ": remove all gap characters (destroys alignment, fasta output only)" )
839 puts( " -" + REMOVE_ALL_SEQUENCES_LISTED_OPTION + "=<file>: remove all sequences listed in file" )
840 puts( " -" + KEEP_ONLY_SEQUENCES_LISTED_OPTION + "=<file>: keep only sequences listed in file" )
841 puts( " -" + TRIM_OPTION + "=<first>-<last>: remove columns before first and after last" )
842 puts( " -" + REMOVE_SEQS_GAP_RATIO_OPTION + "=<n>: remove sequences for which the gap ratio > n (after column operations)" )
843 puts( " -" + REMOVE_SEQS_NON_GAP_LENGTH_OPTION + "=<n> remove sequences with less than n non-gap characters (after column operations)" )
844 puts( " -" + REMOVE_MATCHING_SEQUENCES_OPTION + "=<s> remove all sequences with names containing s" )
845 puts( " -" + KEEP_MATCHING_SEQUENCES_OPTION + "=<s> keep only sequences with names containing s" )
846 puts( " -" + SPLIT + "=<n> split a fasta file into n files of equal number of sequences (expect for " )
847 puts( " last one), cannot be used with other options" )
848 puts( " -" + REM_RED_OPTION + ": remove redundant sequences" )
856 end # class MsaProcessor