--- /dev/null
+#! /usr/bin/env ruby
+require 'optparse'
+
+mafftpath = "_BINDIR/mafft"
+
+def cleartempfiles( filenames )
+ for f in filenames
+ system( "rm -rf #{f}" )
+ end
+end
+
+
+seed = 0
+scand = "50%"
+npick = 500
+infn = ""
+reorderoption = "--reorder"
+pickoptions = " --retree 1 "
+coreoptions = " --globalpair --maxiterate 100 "
+corelastarg = " "
+addoptions = " "
+directionoptions = " --retree 0 --pileup "
+markcore = ""
+randompickup = true
+outnum = false
+
+begin
+ params = ARGV.getopts('m:s:n:p:i:C:L:A:o:MhuD:')
+rescue => e
+ STDERR.puts e
+ STDERR.puts "See #{$0} -h"
+ exit 1
+end
+
+#p params
+
+mafftpath = params["m"] if params["m"]
+seed = params["s"].to_i if params["s"]
+scand = params["n"].to_s if params["n"]
+npick = params["p"].to_i if params["p"]
+infn = params["i"] if params["i"]
+#pickoptions += params["P"] if params["P"]
+coreoptions += params["C"] if params["C"] # tsuikagaki!
+corelastarg += params["L"] if params["L"] # tsuikagaki!
+addoptions += params["A"] if params["A"]
+directionoptions += params["D"] if params["D"] # tsuikagaki
+markcore = "*" if params["M"]
+#randompickup = false if params["S"]
+reorderoption = "" if params["o"] =~ /^i/
+outnum = true if params["u"]
+
+if params["h"] then
+ STDERR.puts "Usage: #{$0} -i inputfile [options]"
+ STDERR.puts "Options:"
+ STDERR.puts " -i string Input file."
+ STDERR.puts " -m string Mafft command. Default: mafft"
+ STDERR.puts " -s int Seed. Default:0"
+ STDERR.puts " -n int Number of candidates for core sequences. Default: upper 50% in length"
+ STDERR.puts " -p int Number of core sequences. Default: 500"
+# STDERR.puts " -P \"string\" Mafft options for the PICKUP stage."
+# STDERR.puts " Default: \"--retree 1\""
+# STDERR.puts " -S Tree-based pickup. Default: off"
+ STDERR.puts " -C \"string\" Mafft options for the CORE stage."
+ STDERR.puts " Default: \"--globalpair --maxiterate 100\""
+ STDERR.puts " -A \"string\" Mafft options for the ADD stage."
+ STDERR.puts " Default: \"\""
+ STDERR.puts " -D \"string\" Mafft options for inferring the direction of nucleotide sequences."
+ STDERR.puts " Default: \"\""
+ STDERR.puts " -o r or i r: Reorder the sequences based on similarity. Default"
+ STDERR.puts " i: Same as input."
+ exit 1
+end
+
+if infn == "" then
+ STDERR.puts "Give input file with -i."
+ exit 1
+end
+
+
+
+pid = $$.to_s
+tmpdir = ENV["TMPDIR"]
+tmpdir = "/tmp" if tmpdir == nil
+tempfiles = []
+tempfiles.push( temp_pf = tmpdir + "/_pf" + pid )
+tempfiles.push( temp_nf = tmpdir + "/_nf" + pid )
+tempfiles.push( temp_cf = tmpdir + "/_cf" + pid )
+tempfiles.push( temp_of = tmpdir + "/_of" + pid )
+
+Signal.trap(:INT){cleartempfiles( tempfiles ); exit 1}
+at_exit{ cleartempfiles( tempfiles )}
+
+system "#{mafftpath} --version > #{temp_of} 2>&1"
+
+fp = File.open( temp_of, "r" )
+ line = fp.gets
+fp.close
+
+
+versionnum = line.split(' ')[0].sub(/v/,"").to_f
+
+if versionnum < 7.210 then
+ STDERR.puts "\n"
+ STDERR.puts "Please use mafft version >= 7.210\n"
+ STDERR.puts "\n"
+ exit
+end
+
+srand( seed )
+
+def readfasta( fp, name, seq )
+ nseq = 0
+ tmpseq = ""
+ while fp.gets
+ if $_ =~ /^>/ then
+ name.push( $_.sub(/>/,"").chop )
+ seq.push( tmpseq ) if nseq > 0
+ nseq += 1
+ tmpseq = ""
+ else
+ tmpseq += $_.strip
+ end
+ end
+ seq.push( tmpseq )
+ return nseq
+end
+
+
+
+begin
+ infp = File.open( infn, "r" )
+rescue => e
+ STDERR.puts e
+ exit 1
+end
+infp.close
+
+if directionoptions =~ /--adjustdirection/ then
+ system( mafftpath + "#{directionoptions} #{infn} > #{temp_of}" )
+else
+ system( "cp #{infn} #{temp_of}" )
+end
+
+tname = []
+tseq = []
+infp = File.open( temp_of, "r" )
+tin = readfasta( infp, tname, tseq )
+infp.close
+lenhash = {}
+
+if outnum then
+ for i in 0..(tin-1)
+ tname[i] = "_numo_s_#{i}_numo_e_" + tname[i]
+ end
+end
+
+npick = 0 if npick == 1
+npick = tin if npick > tin
+
+
+if scand =~ /%$/ then
+ ncand = (tin * scand.to_f * 0.01 ).to_i
+else
+ ncand = scand.to_i
+end
+
+if ncand < 0 || ncand > tin then
+ STDERR.puts "Error. -n #{scand}?"
+ exit 1
+end
+
+ncand = npick if ncand < npick
+ncand = tin if ncand > tin
+
+STDERR.puts "ncand = #{ncand}, npick = #{npick}"
+
+
+sai = []
+for i in 0..(tin-1)
+ lenhash[i] = tseq[i].gsub(/-/,"").length
+end
+
+i = 0
+sorted = lenhash.sort_by{|key, value| [-value, i+=1]}
+#for i in 0..(ncand-1)
+# sai[sorted[i][0]] = 1
+#end
+#for i in ncand..(tin-1)
+# sai[sorted[i][0]] = 0
+#end
+
+ncandres = 0
+ntsukau = 0
+for i in 0..(tin-1)
+ cand = sorted[i][0]
+ if tname[cand] =~ /^_focus_/ then
+ sai[cand] = 0
+ ntsukau += 1
+ elsif ncandres < ncand then
+ unless tname[cand] =~ /^_tsukawanai_/ then
+ sai[cand] = 1
+ ncandres += 1
+ else
+ sai[cand] = 0
+ end
+ else
+ sai[cand] = 0
+ end
+end
+
+if ncandres+ntsukau < npick
+ STDERR.puts "ncandres = #{ncandres}"
+ STDERR.puts "ncand = #{ncand}"
+ STDERR.puts "ntsukau = #{ntsukau}"
+ STDERR.puts "npick = #{npick}"
+ STDERR.puts "Too many _tsukawanai_ sequences."
+ exit 1
+end
+
+if ntsukau > npick
+ STDERR.puts "ntsukau = #{ntsukau}"
+ STDERR.puts "npick = #{npick}"
+ STDERR.puts "Too many _focus_ sequences."
+ exit 1
+end
+
+#p sai
+#for i in 0..(tin-1)
+# puts sai[i].to_s + " " + tname[i]
+#end
+
+npickrand = npick - ntsukau
+
+if randompickup
+ pick = []
+ for i in 0..(npickrand-1)
+ pick[i] = 1
+ end
+ for i in npickrand..(ncandres-1)
+ pick[i] = 0
+ end
+ pick2 = pick.sort_by{rand}
+ pick = pick2
+# p pick
+# p sai
+
+ ipick = 0
+ for i in 0..(tin-1)
+ if sai[i] == 1 then
+ if pick[ipick] == 0 then
+ sai[i] = 0
+ end
+ ipick += 1
+ end
+ end
+# p sai
+
+ for i in 0..(tin-1)
+ if tname[i] =~ /^_focus_/ then
+ sai[i] = 1
+ end
+ end
+# p sai
+
+ pfp = File.open( temp_pf, 'w' )
+ nfp = File.open( temp_nf, 'w' )
+
+ i = 0
+ while i < tin
+ if sai[i] == 1 then
+ pfp.puts ">" + i.to_s + " " + ">" + markcore + tname[i]
+ pfp.puts tseq[i]
+ else
+ nfp.puts ">" + i.to_s + " " + ">" + tname[i]
+ nfp.puts tseq[i]
+ end
+ i += 1
+ end
+
+ nfp.close
+ pfp.close
+
+else # yamerukamo
+ STDERR.puts "Not supported in this version"
+ exit 1
+end
+
+if npick > 1 then
+ if npick < tin then
+ system( mafftpath + " #{coreoptions} #{temp_pf} #{corelastarg} > #{temp_cf}" ) # add de sort
+ else
+ system( mafftpath + " #{coreoptions} #{reorderoption} #{temp_pf} #{corelastarg} > #{temp_cf}" ) # ima sort
+ end
+ res = ( File::stat(temp_cf).size == 0 )
+else
+ system( "cat /dev/null > #{temp_cf}" )
+ res = false
+end
+
+if res == true then
+ STDERR.puts "\n\nError in the core alignment stage.\n\n"
+ exit 1
+end
+
+
+if npick < tin
+ system( mafftpath + " #{addoptions} #{reorderoption} --add #{temp_nf} #{temp_cf} > #{temp_of}" )
+ res = ( File::stat(temp_of).size == 0 )
+else
+ system( "cp #{temp_cf} #{temp_of}" )
+ res = false
+end
+
+if res == true then
+ STDERR.puts "\n\nError in the add stage.\n\n"
+ exit 1
+end
+
+resname = []
+resseq = []
+resfp = File.open( temp_of, "r" )
+nres = readfasta( resfp, resname, resseq )
+resfp.close
+
+if reorderoption =~ /--reorder/ then
+ for i in 0..(nres-1)
+ puts ">" + resname[i].sub(/^[0-9]* >/,"")
+ puts resseq[i]
+ end
+else
+ seqhash = {}
+ namehash = {}
+ seqlast = []
+ namelast = []
+ nlast = 0
+ for i in 0..(nres-1)
+ if resname[i] =~ /^[0-9]* >/
+ key = resname[i].split(' ')[0]
+ seqhash[key] = resseq[i]
+ namehash[key] = resname[i]
+ else
+ seqlast.push( resseq[i] )
+ namelast.push( resname[i] )
+ nlast += 1
+ end
+ end
+ for i in 0..(nlast-1)
+ puts ">" + namelast[i]
+ puts seqlast[i]
+ end
+ for i in 0..(nres-nlast-1)
+ key = i.to_s
+ puts ">" + namehash[key].sub(/^[0-9]* >/,"")
+ puts seqhash[key]
+ end
+end
+
+