4 mafftpath = "/usr/local/bin/mafft"
6 def cleartempfiles( filenames )
8 system( "rm -rf #{f}" )
17 reorderoption = "--reorder"
18 pickoptions = " --retree 1 "
19 coreoptions = " --globalpair --maxiterate 100 "
22 directionoptions = " --retree 0 --pileup "
28 params = ARGV.getopts('m:s:n:p:i:C:L:A:o:MhuD:')
31 STDERR.puts "See #{$0} -h"
37 mafftpath = params["m"] if params["m"]
38 seed = params["s"].to_i if params["s"]
39 scand = params["n"].to_s if params["n"]
40 npick = params["p"].to_i if params["p"]
41 infn = params["i"] if params["i"]
42 #pickoptions += params["P"] if params["P"]
43 coreoptions += params["C"] if params["C"] # tsuikagaki!
44 corelastarg += params["L"] if params["L"] # tsuikagaki!
45 addoptions += params["A"] if params["A"]
46 directionoptions += params["D"] if params["D"] # tsuikagaki
47 markcore = "*" if params["M"]
48 #randompickup = false if params["S"]
49 reorderoption = "" if params["o"] =~ /^i/
50 outnum = true if params["u"]
53 STDERR.puts "Usage: #{$0} -i inputfile [options]"
54 STDERR.puts "Options:"
55 STDERR.puts " -i string Input file."
56 STDERR.puts " -m string Mafft command. Default: mafft"
57 STDERR.puts " -s int Seed. Default:0"
58 STDERR.puts " -n int Number of candidates for core sequences. Default: upper 50% in length"
59 STDERR.puts " -p int Number of core sequences. Default: 500"
60 # STDERR.puts " -P \"string\" Mafft options for the PICKUP stage."
61 # STDERR.puts " Default: \"--retree 1\""
62 # STDERR.puts " -S Tree-based pickup. Default: off"
63 STDERR.puts " -C \"string\" Mafft options for the CORE stage."
64 STDERR.puts " Default: \"--globalpair --maxiterate 100\""
65 STDERR.puts " -A \"string\" Mafft options for the ADD stage."
66 STDERR.puts " Default: \"\""
67 STDERR.puts " -D \"string\" Mafft options for inferring the direction of nucleotide sequences."
68 STDERR.puts " Default: \"\""
69 STDERR.puts " -o r or i r: Reorder the sequences based on similarity. Default"
70 STDERR.puts " i: Same as input."
75 STDERR.puts "Give input file with -i."
82 tmpdir = ENV["TMPDIR"]
83 tmpdir = "/tmp" if tmpdir == nil
85 tempfiles.push( temp_pf = tmpdir + "/_pf" + pid )
86 tempfiles.push( temp_nf = tmpdir + "/_nf" + pid )
87 tempfiles.push( temp_cf = tmpdir + "/_cf" + pid )
88 tempfiles.push( temp_of = tmpdir + "/_of" + pid )
90 Signal.trap(:INT){cleartempfiles( tempfiles ); exit 1}
91 at_exit{ cleartempfiles( tempfiles )}
93 system "#{mafftpath} --version > #{temp_of} 2>&1"
95 fp = File.open( temp_of, "r" )
100 versionnum = line.split(' ')[0].sub(/v/,"").to_f
102 if versionnum < 7.210 then
104 STDERR.puts "Please use mafft version >= 7.210\n"
111 def readfasta( fp, name, seq )
116 name.push( $_.sub(/>/,"").chop )
117 seq.push( tmpseq ) if nseq > 0
131 infp = File.open( infn, "r" )
138 if directionoptions =~ /--adjustdirection/ then
139 system( mafftpath + "#{directionoptions} #{infn} > #{temp_of}" )
141 system( "cp #{infn} #{temp_of}" )
146 infp = File.open( temp_of, "r" )
147 tin = readfasta( infp, tname, tseq )
153 tname[i] = "_numo_s_#{i}_numo_e_" + tname[i]
157 npick = 0 if npick == 1
158 npick = tin if npick > tin
161 if scand =~ /%$/ then
162 ncand = (tin * scand.to_f * 0.01 ).to_i
167 if ncand < 0 || ncand > tin then
168 STDERR.puts "Error. -n #{scand}?"
172 ncand = npick if ncand < npick
173 ncand = tin if ncand > tin
175 STDERR.puts "ncand = #{ncand}, npick = #{npick}"
180 lenhash[i] = tseq[i].gsub(/-/,"").length
184 sorted = lenhash.sort_by{|key, value| [-value, i+=1]}
185 #for i in 0..(ncand-1)
186 # sai[sorted[i][0]] = 1
188 #for i in ncand..(tin-1)
189 # sai[sorted[i][0]] = 0
196 if tname[cand] =~ /^_focus_/ then
199 elsif ncandres < ncand then
200 unless tname[cand] =~ /^_tsukawanai_/ then
211 if ncandres+ntsukau < npick
212 STDERR.puts "ncandres = #{ncandres}"
213 STDERR.puts "ncand = #{ncand}"
214 STDERR.puts "ntsukau = #{ntsukau}"
215 STDERR.puts "npick = #{npick}"
216 STDERR.puts "Too many _tsukawanai_ sequences."
221 STDERR.puts "ntsukau = #{ntsukau}"
222 STDERR.puts "npick = #{npick}"
223 STDERR.puts "Too many _focus_ sequences."
229 # puts sai[i].to_s + " " + tname[i]
232 npickrand = npick - ntsukau
236 for i in 0..(npickrand-1)
239 for i in npickrand..(ncandres-1)
242 pick2 = pick.sort_by{rand}
250 if pick[ipick] == 0 then
259 if tname[i] =~ /^_focus_/ then
265 pfp = File.open( temp_pf, 'w' )
266 nfp = File.open( temp_nf, 'w' )
271 pfp.puts ">" + i.to_s + " " + ">" + markcore + tname[i]
274 nfp.puts ">" + i.to_s + " " + ">" + tname[i]
284 STDERR.puts "Not supported in this version"
290 system( mafftpath + " #{coreoptions} #{temp_pf} #{corelastarg} > #{temp_cf}" ) # add de sort
292 system( mafftpath + " #{coreoptions} #{reorderoption} #{temp_pf} #{corelastarg} > #{temp_cf}" ) # ima sort
294 res = ( File::stat(temp_cf).size == 0 )
296 system( "cat /dev/null > #{temp_cf}" )
301 STDERR.puts "\n\nError in the core alignment stage.\n\n"
307 system( mafftpath + " #{addoptions} #{reorderoption} --add #{temp_nf} #{temp_cf} > #{temp_of}" )
308 res = ( File::stat(temp_of).size == 0 )
310 system( "cp #{temp_cf} #{temp_of}" )
315 STDERR.puts "\n\nError in the add stage.\n\n"
321 resfp = File.open( temp_of, "r" )
322 nres = readfasta( resfp, resname, resseq )
325 if reorderoption =~ /--reorder/ then
327 puts ">" + resname[i].sub(/^[0-9]* >/,"")
337 if resname[i] =~ /^[0-9]* >/
338 key = resname[i].split(' ')[0]
339 seqhash[key] = resseq[i]
340 namehash[key] = resname[i]
342 seqlast.push( resseq[i] )
343 namelast.push( resname[i] )
347 for i in 0..(nlast-1)
348 puts ">" + namelast[i]
351 for i in 0..(nres-nlast-1)
353 puts ">" + namehash[key].sub(/^[0-9]* >/,"")