1 #!/usr/local/bin/ruby -w
5 # Copyright:: Copyright (C) 2008-2009 Christian M. Zmasek. All rights reserved.
6 # License:: GNU Lesser General Public License (LGPL)
8 # $Id: pfam_summarize.rb,v 1.2 2008/08/28 17:09:07 cmzmasek Exp $
10 # This extracts ID, AC, DE, TP, and DR values from Pfam data files.
12 # Created 2008-06-25 in San Diego, CA, USA by CMZ
14 # Usage: pfam_summarize.rb <infile: Pfam data file such as Pfam-A.full> <outfile>
18 module ForesterScripts
19 if RUBY_VERSION !~ /1.9/
20 puts( "Your ruby version is #{RUBY_VERSION}, expected 1.9.x " )
26 if ( ARGV == nil || ARGV.length != 2 )
27 puts( "usage: pfam_summarize.rb <infile: Pfam data file such as Pfam-A.full> <outfile>" )
34 if ( !File.exists?( pfamfile ) )
35 puts( "Pfam data file [" + pfamfile + "] does not exist" )
38 if ( File.exists?( outfile ) )
39 puts( "outfile [" + outfile + "] already exists" )
43 ic = Iconv.new( 'UTF-8//IGNORE', 'UTF-8' )
53 out = File.open( outfile, 'w' )
55 File.open( pfamfile ) do | file |
56 while line = file.gets
59 line = ic.iconv( line )
61 if ( line =~ /#=GF ID\s+(.+)/ )
63 puts( "Pfam data file [" + pfamfile + "] format error [line: " + line + "]" )
67 elsif ( line =~ /#=GF AC\s+(.+)/ )
69 elsif ( line =~ /#=GF DE\s+(.+)/ )
71 elsif ( line =~ /#=GF TP\s+(.+)/ )
73 elsif ( line =~ /#=GF DR\s+(.+)/ )
75 elsif ( line =~ /^\/\// )
76 if ( id == nil || ac == nil )
77 puts( "Pfam data file [" + pfamfile + "] format error [line: " + line + "]" )
96 out.write( LINE_DELIMITER )
111 puts( "Summarized data for " + count.to_s + " individual Pfams to " + outfile )
115 end # module ForesterScripts