2 # = lib/evo/io/parser/general_msa_parser - GeneralMsaParser class
4 # Copyright:: Copyright (C) 2017 Christian M. Zmasek
5 # License:: GNU Lesser General Public License (LGPL)
7 # Last modified: 2017/02/07
9 require 'lib/evo/io/parser/msa_parser'
10 require 'lib/evo/msa/msa'
16 class GeneralMsaParser < MsaParser
22 Util.check_file_for_readability( path )
24 current_seq_index_per_block = -1
29 ic = Iconv.new( 'UTF-8//IGNORE', 'UTF-8' )
30 File.open( path ) do | file |
31 while line = file.gets
32 line = ic.iconv( line )
33 if can_ignore?( line )
35 elsif ( is_first && is_program_name_line?( line ) )
36 elsif( line =~ /^\S+\s+.+\s*$/ || line =~ /^\s+.+\s*$/ || line =~ /^\S+\s*$/ )
39 current_seq_index_per_block = -1
42 current_seq_index_per_block += 1
43 if ( line =~ /^(\S+)\s+(.+?)\s*$/ )
45 seq = $2.gsub( /\s/, '.' )
46 a = msa.find_by_name( name, false, false )
49 elsif ( a.length == 1 )
50 msa.get_sequence( a[ 0 ] ).append!( seq )
52 error_msg = "Unexpected error at line: " + line
53 raise IOError, error_msg
56 elsif ( line =~ /^\s+(.+?)\s*$/ )
57 seq = $1.gsub( /\s/, '.' )
58 a = msa.find_by_name( current_name, false, false )
60 error_msg = "Unexpected error at line: " + line
61 raise IOError, error_msg
63 msa.get_sequence( a[ 0 ] ).append!( seq )
66 elsif ( line =~ /^(\S+)\s*$/ )
69 error_msg = "First block cannot contain unnamed sequences"
70 raise IOError, error_msg
72 msa.get_sequence( current_seq_index_per_block ).append!( seq )
77 error_msg = "Unexpected line: " + line
78 raise IOError, error_msg
86 end # def parse( path )
90 def can_ignore?( line )
91 return ( line !~ /[A-Za-z\-?\*_\.]/ ||
92 line =~ /^\s+[*\.:]/ ||
99 def is_program_name_line?( line )
100 return ( line =~ /^CLUSTAL\s/ ||
101 line =~ /^MUSCLE\s\(/ ||
102 line =~ /^PROBCONS\s/ )
104 end # class GeneralMsaParser