X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fruby%2Fevoruby%2Flib%2Fevo%2Fio%2Fparser%2Ffasta_parser.rb;h=f1da2a970c4fbd41dca94357af22808379a304fb;hb=a6a716807f0c2a45c54a025252587f8904d170e6;hp=dfc3a7969981fc670a59fde387253aff6d895519;hpb=2fd8b4253e465b0de7f15e54b8f1d9a1229831ee;p=jalview.git diff --git a/forester/ruby/evoruby/lib/evo/io/parser/fasta_parser.rb b/forester/ruby/evoruby/lib/evo/io/parser/fasta_parser.rb index dfc3a79..f1da2a9 100644 --- a/forester/ruby/evoruby/lib/evo/io/parser/fasta_parser.rb +++ b/forester/ruby/evoruby/lib/evo/io/parser/fasta_parser.rb @@ -1,77 +1,70 @@ # # = lib/evo/io/parser/fasta_parser - FastaParser class # -# Copyright:: Copyright (C) 2006-2007 Christian M. Zmasek -# License:: GNU Lesser General Public License (LGPL) +# Copyright:: Copyright (C) 2017 Christian M. Zmasek +# License:: GNU Lesser General Public License (LGPL) # -# $Id: fasta_parser.rb,v 1.11 2010/10/08 22:04:17 cmzmasek Exp $ -# -# last modified: 05/17/2007 +# Last modified: 2017/02/07 require 'lib/evo/io/parser/msa_parser' require 'lib/evo/msa/msa' -require 'iconv' - module Evoruby - class FastaParser < MsaParser + class FastaParser < MsaParser - def initialize - end + def initialize + end - def parse( path ) - Util.check_file_for_readability( path ) - msa = Msa.new - current_seq = String.new() - name = String.new() - saw_first_seq = false - ic = Iconv.new( 'UTF-8//IGNORE', 'UTF-8' ) - File.open( path ) do | file | - while line = file.gets - line = ic.iconv( line ) - if can_ignore?( line, saw_first_seq ) + def parse( path ) + Util.check_file_for_readability( path ) + msa = Msa.new + current_seq = String.new() + name = String.new() + saw_first_seq = false + File.open( path ) do | file | + while line = file.gets + + line.encode!("UTF-8", :invalid => :replace, :undef => :replace, :replace => "?") + if can_ignore?( line, saw_first_seq ) - elsif line =~ /^\s*>\s*(.+)/ - saw_first_seq = true - add_seq( name, current_seq, msa ) - name = $1 - current_seq = String.new() - elsif line =~ /^\s*(.+)/ - if name.length < 1 - error_msg = "format error at: " + line - raise IOError, error_msg - end - # was: seq = $1.rstrip - seq = $1.gsub(/\s+/, '') - current_seq << seq - else - error_msg = "Unexpected line: " + line - raise IOError, error_msg - end - end - end + elsif line =~ /^\s*>\s*(.+)/ + saw_first_seq = true add_seq( name, current_seq, msa ) - return msa + name = $1 + current_seq = String.new() + elsif line =~ /^\s*(.+)/ + if name.length < 1 + error_msg = "format error at: " + line + raise IOError, error_msg + end + # was: seq = $1.rstrip + seq = $1.gsub(/\s+/, '') + current_seq << seq + else + error_msg = "Unexpected line: " + line + raise IOError, error_msg + end end + end + add_seq( name, current_seq, msa ) + return msa + end - private + private - def add_seq( name, seq, msa ) - if name.length > 0 && seq.length > 0 - msa.add( name, seq ) - end - end + def add_seq( name, seq, msa ) + if name.length > 0 && seq.length > 0 + msa.add( name, seq ) + end + end - def can_ignore?( line, saw_first_seq ) - return ( line !~ /\S/ || - line =~ /^\s*#/ || - line =~ /^\s*%/ || - line =~ /^\s*\/\// || - line =~ /^\s*!!/ || - ( !saw_first_seq && line =~/^\s*[^>]/ ) ) - end + def can_ignore?( line, saw_first_seq ) + return ( line !~ /\S/ || + line =~ /^\s*#/ || + ( !saw_first_seq && line =~/^\s*[^>]/ ) ) + end - end # class FastaParser + end # class FastaParser end # module Evoruby