2 # = lib/evo/io/parser/general_msa_parser - GeneralMsaParser class
4 # Copyright:: Copyright (C) 2006-2007 Christian M. Zmasek
5 # License:: GNU Lesser General Public License (LGPL)
7 # $Id: general_msa_parser.rb,v 1.8 2009/10/08 22:44:54 cmzmasek Exp $
9 # last modified: 2009/10/08
11 require 'lib/evo/io/parser/msa_parser'
12 require 'lib/evo/msa/msa'
18 class GeneralMsaParser < MsaParser
24 Util.check_file_for_readability( path )
26 current_seq_index_per_block = -1
31 ic = Iconv.new( 'UTF-8//IGNORE', 'UTF-8' )
32 File.open( path ) do | file |
33 while line = file.gets
34 line = ic.iconv( line )
35 if can_ignore?( line )
37 elsif ( is_first && is_program_name_line?( line ) )
38 elsif( line =~ /^\S+\s+.+\s*$/ || line =~ /^\s+.+\s*$/ || line =~ /^\S+\s*$/ )
41 current_seq_index_per_block = -1
44 current_seq_index_per_block += 1
45 if ( line =~ /^(\S+)\s+(.+?)\s*$/ )
47 seq = $2.gsub( /\s/, '.' )
48 a = msa.find_by_name( name, false, false )
51 elsif ( a.length == 1 )
52 msa.get_sequence( a[ 0 ] ).append!( seq )
54 error_msg = "Unexpected error at line: " + line
55 raise IOError, error_msg
58 elsif ( line =~ /^\s+(.+?)\s*$/ )
59 seq = $1.gsub( /\s/, '.' )
60 a = msa.find_by_name( current_name, false, false )
62 error_msg = "Unexpected error at line: " + line
63 raise IOError, error_msg
65 msa.get_sequence( a[ 0 ] ).append!( seq )
68 elsif ( line =~ /^(\S+)\s*$/ )
71 error_msg = "First block cannot contain unnamed sequences"
72 raise IOError, error_msg
74 msa.get_sequence( current_seq_index_per_block ).append!( seq )
79 error_msg = "Unexpected line: " + line
80 raise IOError, error_msg
88 end # def parse( path )
92 def can_ignore?( line )
93 return ( line !~ /[A-Za-z\-?\*_\.]/ ||
94 line =~ /^\s+[*\.:]/ ||
101 def is_program_name_line?( line )
102 return ( line =~ /^CLUSTAL\s/ ||
103 line =~ /^MUSCLE\s\(/ ||
104 line =~ /^PROBCONS\s/ )
106 end # class GeneralMsaParser