1 #!/usr/local/bin/ruby -w
5 # Copyright:: Copyright (C) 2008-2009 Christian M. Zmasek.
7 # License:: GNU Lesser General Public License (LGPL)
9 # $Id: scoptastic.rb,v 1.3 2008/08/28 17:09:07 cmzmasek Exp $
11 # To create Pfam id to SCOP mappings, one for each of four levels of SCOP
14 # Created 2008-06-25 in San Diego, CA, USA by CMZ
16 # Usage: scoptastic.rb scoptastic.rb <Pfam id to ac map file, e.g.
17 # pfam_summarize.rb output> <Pfam ac to SCOP classification map file> <Pfam id
18 # to SCOP outfile root>
23 module ForesterScripts
25 if RUBY_VERSION !~ /1.9/
26 puts( "Your ruby version is #{RUBY_VERSION}, expected 1.9.x " )
30 CLASS_LEVEL_SUFFIX = "_SCOP_2_CLASS"
31 FOLD_LEVEL_SUFFIX = "_SCOP_3_FOLD"
32 SUPERFAMILY_LEVEL_SUFFIX = "_SCOP_4_SUPERFAMILY"
33 FAMILY_LEVEL_SUFFIX = "_SCOP_5_FAMILY"
38 if ( ARGV == nil || ARGV.length != 3 )
39 puts( "usage: scoptastic.rb <Pfam id to ac map file, e.g. pfam_summarize.rb output> <Pfam ac to SCOP classification map file> <Pfam id to SCOP outfile root>" )
43 pfam_id_to_ac = ARGV[ 0 ]
44 pfam_ac_to_scop = ARGV[ 1 ]
47 if ( !File.exists?( pfam_id_to_ac ) )
48 puts( "Pfam id to ac map file [" + pfam_id_to_ac + "] does not exist" )
51 if ( !File.exists?( pfam_ac_to_scop ) )
52 puts( "Pfam ac to SCOP classification map file [" + pfam_ac_to_scop + "] does not exist" )
55 if ( File.exists?( outfile + CLASS_LEVEL_SUFFIX ) )
56 puts( "Outfile [" + outfile + CLASS_LEVEL_SUFFIX + "] already exists" )
59 if ( File.exists?( outfile + FOLD_LEVEL_SUFFIX ) )
60 puts( "Outfile [" + outfile + FOLD_LEVEL_SUFFIX + "] already exists" )
63 if ( File.exists?( outfile + SUPERFAMILY_LEVEL_SUFFIX ) )
64 puts( "Outfile [" + outfile + SUPERFAMILY_LEVEL_SUFFIX + "] already exists" )
67 if ( File.exists?( outfile + FAMILY_LEVEL_SUFFIX ) )
68 puts( "Outfile [" + outfile + FAMILY_LEVEL_SUFFIX + "] already exists" )
72 ic = Iconv.new( 'UTF-8//IGNORE', 'UTF-8' )
74 pfam_ac_to_id_map = Hash.new
76 pfam_ac_to_scop_map = Hash.new
80 File.open( pfam_id_to_ac ) do | file |
81 while line = file.gets
82 line = ic.iconv( line )
83 if ( line !~ /^#/ && line =~ /\S/ )
84 if ( line =~ /^(\S+)\s+(PF\d+)/ )
85 pfam_ac_to_id_map[ $2 ] = $1
88 puts( "Pfam id to ac map file [" + pfam_id_to_ac + "] format error [line: " + line + "]" )
95 puts( "Extracted #{count} Pfam id to ac mappings from file [#{pfam_id_to_ac}]" )
98 File.open( pfam_ac_to_scop ) do | file |
99 while line = file.gets
100 line = ic.iconv( line )
101 if ( line !~ /^#/ && line =~ /\S/ )
102 if ( line =~ /^(PF\d+)\.?\d*\s+([a-z]\.\d+\.\d+\.\d+)/ )
103 pfam_ac_to_scop_map[ $1 ] = $2
106 puts( "Pfam ac to SCOP classification map file [" + pfam_ac_to_scop + "] format error [line: " + line + "]" )
113 puts( "Extracted #{count} Pfam ac to SCOP classification mappings from file [#{pfam_ac_to_scop}]" )
115 out_class_level = File.open( outfile + CLASS_LEVEL_SUFFIX, 'w' )
116 out_fold_level = File.open( outfile + FOLD_LEVEL_SUFFIX , 'w' )
117 out_superfamily_level = File.open( outfile + SUPERFAMILY_LEVEL_SUFFIX, 'w' )
118 out_family_level = File.open( outfile + FAMILY_LEVEL_SUFFIX, 'w' )
121 pfam_ac_to_scop_map.each { | pfam_ac,scop |
122 if ( pfam_ac_to_id_map.has_key?( pfam_ac ) )
123 pfam_id = pfam_ac_to_id_map[ pfam_ac ]
124 scop_split = scop.split( "\." )
126 out_class_level.write( pfam_id )
127 out_fold_level.write( pfam_id )
128 out_superfamily_level.write( pfam_id )
129 out_family_level.write( pfam_id )
131 out_class_level.write( SEP )
132 out_fold_level.write( SEP )
133 out_superfamily_level.write( SEP )
134 out_family_level.write( SEP )
136 out_class_level.write( scop_split[ 0 ] )
137 out_fold_level.write( scop_split[ 0 ] + "." + scop_split[ 1 ] )
138 out_superfamily_level.write( scop_split[ 0 ] + "." + scop_split[ 1 ] + "." + scop_split[ 2 ] )
139 out_family_level.write( scop )
141 out_class_level.write( LINE_DELIMITER )
142 out_fold_level.write( LINE_DELIMITER )
143 out_superfamily_level.write( LINE_DELIMITER )
144 out_family_level.write( LINE_DELIMITER )
147 puts( "Pfam ac #{pfam_ac} not found in Pfam id to ac map file [" + pfam_id_to_ac + "]" )
152 out_class_level.close
154 out_superfamily_level.close
155 out_family_level.close
158 puts( "Wrote #{count} Pfam id to SCOP mappings to files '#{outfile + CLASS_LEVEL_SUFFIX}', '#{outfile + FOLD_LEVEL_SUFFIX}', '#{outfile + SUPERFAMILY_LEVEL_SUFFIX}', and '#{ outfile + FAMILY_LEVEL_SUFFIX}'" )
162 end # module ForesterScripts