2 # = lib/evo/io/parser/general_msa_parser - GeneralMsaParser class
4 # Copyright:: Copyright (C) 2017 Christian M. Zmasek
5 # License:: GNU Lesser General Public License (LGPL)
7 # Last modified: 2017/02/07
9 require 'lib/evo/io/parser/msa_parser'
10 require 'lib/evo/msa/msa'
13 class GeneralMsaParser < MsaParser
18 Util.check_file_for_readability( path )
20 current_seq_index_per_block = -1
26 File.open( path ) do | file |
27 while line = file.gets
28 line.encode!("UTF-8", :invalid => :replace, :undef => :replace, :replace => "?")
29 if can_ignore?( line )
31 elsif ( is_first && is_program_name_line?( line ) )
32 elsif( line =~ /^\S+\s+.+\s*$/ || line =~ /^\s+.+\s*$/ || line =~ /^\S+\s*$/ )
35 current_seq_index_per_block = -1
38 current_seq_index_per_block += 1
39 if ( line =~ /^(\S+)\s+(.+?)\s*$/ )
41 seq = $2.gsub( /\s/, '.' )
42 a = msa.find_by_name( name, false, false )
45 elsif ( a.length == 1 )
46 msa.get_sequence( a[ 0 ] ).append!( seq )
48 error_msg = "Unexpected error at line: " + line
49 raise IOError, error_msg
52 elsif ( line =~ /^\s+(.+?)\s*$/ )
53 seq = $1.gsub( /\s/, '.' )
54 a = msa.find_by_name( current_name, false, false )
56 error_msg = "Unexpected error at line: " + line
57 raise IOError, error_msg
59 msa.get_sequence( a[ 0 ] ).append!( seq )
62 elsif ( line =~ /^(\S+)\s*$/ )
65 error_msg = "First block cannot contain unnamed sequences"
66 raise IOError, error_msg
68 msa.get_sequence( current_seq_index_per_block ).append!( seq )
73 error_msg = "Unexpected line: " + line
74 raise IOError, error_msg
82 end # def parse( path )
86 def can_ignore?( line )
87 return ( line !~ /[A-Za-z\-?\*_\.]/ ||
88 line =~ /^\s+[*\.:]/ ||
95 def is_program_name_line?( line )
96 return ( line =~ /^CLUSTAL\s/ ||
97 line =~ /^MUSCLE\s\(/ ||
98 line =~ /^PROBCONS\s/ )
100 end # class GeneralMsaParser