1 #!/usr/local/bin/ruby -w
5 # Copyright:: Copyright (C) 2008-2009 Christian M. Zmasek. All rights reserved.
6 # License:: GNU Lesser General Public License (LGPL)
8 # $Id: pfam_to_scop.rb,v 1.2 2008/08/28 17:09:07 cmzmasek Exp $
10 # This extracts ID and SCOP fa (or fa and sf) from Pfam data files.
12 # Created 2008-06-25 in San Diego, CA, USA by CMZ
14 # Usage: pfam_to_scop.rb <infile: Pfam data file such as Pfam-A.full> <outfile>
18 module ForesterScripts
20 if RUBY_VERSION !~ /1.9/
21 puts( "Your ruby version is #{RUBY_VERSION}, expected 1.9.x " )
30 if ( ARGV == nil || ARGV.length != 2 )
31 puts( "usage: pfam_to_scop.rb <infile: Pfam data file such as Pfam-A.full> <outfile>" )
38 if ( !File.exists?( pfamfile ) )
39 puts( "Pfam data file [" + pfamfile + "] does not exist" )
42 if ( File.exists?( outfile ) )
43 puts( "outfile [" + outfile + "] already exists" )
47 ic = Iconv.new( 'UTF-8//IGNORE', 'UTF-8' )
55 out = File.open( outfile, 'w' )
57 File.open( pfamfile ) do | file |
58 while line = file.gets
61 line = ic.iconv( line )
63 if ( line =~ /#=GF ID\s+(.+)/ )
65 puts( "Pfam data file [" + pfamfile + "] format error [line: " + line + "]" )
69 elsif ( line =~ /#=GF\s+DR\s+SCOP;\s+(\w+);\s+fa/ )
71 elsif ( SF && line =~ /#=GF\s+DR\s+SCOP;\s+(\w+);\s+sf/ )
73 elsif ( line =~ /^\/\// )
75 puts( "Pfam data file [" + pfamfile + "] format error [line: " + line + "]" )
82 out.write( LINE_DELIMITER )
96 puts( "Extracted #{scop_count} scop fa and sf identifiers for #{count.to_s} individual Pfams to " + outfile )
98 puts( "Extracted #{scop_count} scop fa identifiers for #{count.to_s} individual Pfams to " + outfile )
103 end # module ForesterScripts