1 #!/usr/local/bin/ruby -w
5 # Copyright:: Copyright (C) 2006-2008 Christian M. Zmasek
6 # License:: GNU Lesser General Public License (LGPL)
8 # $Id: pfam2go_reformat.rb,v 1.4 2008/11/27 01:41:36 cmzmasek Exp $
10 # Reformat pfam2go to a "association" file suitable as input
11 # for microarray GO enrichment/overrepresentation-type analyses,
12 # and create a file listing all mapped Pfams as well.
15 module ForesterScripts
19 if RUBY_VERSION !~ /1.9/
20 puts( "Your ruby version is #{RUBY_VERSION}, expected 1.9.x " )
24 if ( ARGV == nil || ARGV.length != 2 )
25 puts( "usage: pfam2go_reformat.rb <pfam2go file> <outfiles base>" )
30 outfilebase = ARGV[ 1 ]
31 outfile_sgd_style = outfilebase + "_sgd_style_associations"
32 outfile_simple_map = outfilebase + "_basic_associations"
33 outfile_all_pfams = outfilebase + "_all_associated_pfams"
37 if ( File.exists?( outfile_sgd_style ) )
38 puts( "outfile [" + outfile_sgd_style + "] already exists" )
41 if ( File.exists?( outfile_simple_map ) )
42 puts( "outfile [" + outfile_simple_map + "] already exists" )
45 if ( File.exists?( outfile_all_pfams ) )
46 puts( "outfile [" + outfile_all_pfams + "] already exists" )
49 if ( !File.exists?( infile) )
50 puts( "infile [" + infile + "] does not exist" )
54 out_str_sgd = String.new
55 out_str_basic = String.new
57 File.open( infile ) do | file |
58 while line = file.gets
59 if line =~ /^\s*Pfam:PF(\d+)\s+(\S+)\s.+(GO:\d+)\s*$/
63 new_line = "PFAM" + "\t" + pfam_name + "\t" + pfam_name + "\t\t" + go_id + "\t" + "PF:" + pfam_id + "\t\t\t\t\t\t\t\t\t"
64 out_str_sgd = out_str_sgd + new_line + "\n"
65 out_str_basic = out_str_basic + pfam_name + "\t" + go_id + "\n"
66 pfams.add( pfam_name )
71 open( outfile_sgd_style, 'w' ) do |file|
72 file.write( out_str_sgd )
74 open( outfile_simple_map, 'w' ) do |file|
75 file.write( out_str_basic )
77 open( outfile_all_pfams, 'w' ) do |file|
83 puts( "number of associated pfams : " + pfams.size.to_s )
84 puts( "wrote assocations in sgd style to : " + outfile_sgd_style )
85 puts( "wrote assocations in basic style to: " + outfile_simple_map )
86 puts( "wrote all associated pfams to : " + outfile_all_pfams )