JWS-112 Bumping version of Mafft to version 7.310.
[jabaws.git] / binaries / src / mafft / core / mafft-sparsecore.tmpl
diff --git a/binaries/src/mafft/core/mafft-sparsecore.tmpl b/binaries/src/mafft/core/mafft-sparsecore.tmpl
new file mode 100644 (file)
index 0000000..625e9b8
--- /dev/null
@@ -0,0 +1,358 @@
+#! /usr/bin/env ruby
+require 'optparse'
+
+mafftpath = "_BINDIR/mafft"
+
+def cleartempfiles( filenames )
+       for f in filenames
+               system( "rm -rf #{f}" )
+       end
+end
+
+
+seed = 0
+scand = "50%"
+npick = 500
+infn = ""
+reorderoption = "--reorder"
+pickoptions = " --retree 1 "
+coreoptions = " --globalpair --maxiterate 100 "
+corelastarg = " "
+addoptions = " "
+directionoptions = " --retree 0 --pileup "
+markcore = ""
+randompickup = true
+outnum = false
+
+begin
+       params = ARGV.getopts('m:s:n:p:i:C:L:A:o:MhuD:')
+rescue => e
+    STDERR.puts e
+       STDERR.puts "See #{$0} -h"
+    exit 1
+end
+
+#p params
+
+mafftpath = params["m"]         if params["m"]
+seed = params["s"].to_i         if params["s"]
+scand = params["n"].to_s        if params["n"]
+npick = params["p"].to_i        if params["p"]
+infn = params["i"]              if params["i"]
+#pickoptions += params["P"]     if params["P"]
+coreoptions += params["C"]      if params["C"] # tsuikagaki!
+corelastarg += params["L"]      if params["L"] # tsuikagaki!
+addoptions  += params["A"]      if params["A"]
+directionoptions += params["D"] if params["D"] # tsuikagaki
+markcore = "*"                  if params["M"]
+#randompickup = false           if params["S"]
+reorderoption = ""              if params["o"] =~ /^i/
+outnum = true                   if params["u"]
+
+if params["h"] then
+       STDERR.puts "Usage: #{$0} -i inputfile [options]"
+       STDERR.puts "Options:"
+       STDERR.puts "   -i string     Input file."
+       STDERR.puts "   -m string     Mafft command.  Default: mafft"
+       STDERR.puts "   -s int        Seed.  Default:0"
+       STDERR.puts "   -n int        Number of candidates for core sequences.  Default: upper 50% in length"
+       STDERR.puts "   -p int        Number of core sequences.  Default: 500"
+#      STDERR.puts "   -P \"string\"   Mafft options for the PICKUP stage."
+#      STDERR.puts "                 Default: \"--retree 1\""
+#      STDERR.puts "   -S            Tree-based pickup.  Default: off"
+       STDERR.puts "   -C \"string\"   Mafft options for the CORE stage."
+       STDERR.puts "                 Default: \"--globalpair --maxiterate 100\""
+       STDERR.puts "   -A \"string\"   Mafft options for the ADD stage."
+       STDERR.puts "                 Default: \"\""
+       STDERR.puts "   -D \"string\"   Mafft options for inferring the direction of nucleotide sequences."
+       STDERR.puts "                 Default: \"\""
+       STDERR.puts "   -o r or i     r: Reorder the sequences based on similarity.  Default"
+       STDERR.puts "                 i: Same as input."
+       exit 1
+end
+
+if infn == "" then
+       STDERR.puts "Give input file with -i."
+       exit 1
+end
+
+
+
+pid = $$.to_s
+tmpdir = ENV["TMPDIR"]
+tmpdir = "/tmp" if tmpdir == nil
+tempfiles = []
+tempfiles.push( temp_pf = tmpdir + "/_pf" + pid )
+tempfiles.push( temp_nf = tmpdir + "/_nf" + pid )
+tempfiles.push( temp_cf = tmpdir + "/_cf" + pid )
+tempfiles.push( temp_of = tmpdir + "/_of" + pid )
+
+Signal.trap(:INT){cleartempfiles( tempfiles ); exit 1}
+at_exit{ cleartempfiles( tempfiles )}
+
+system "#{mafftpath} --version > #{temp_of} 2>&1"
+
+fp = File.open( temp_of, "r" )
+       line = fp.gets
+fp.close
+
+
+versionnum = line.split(' ')[0].sub(/v/,"").to_f
+
+if versionnum < 7.210 then
+       STDERR.puts "\n"
+       STDERR.puts "Please use mafft version >= 7.210\n"
+       STDERR.puts "\n"
+       exit
+end
+
+srand( seed )
+
+def readfasta( fp, name, seq )
+        nseq = 0
+        tmpseq = ""
+        while fp.gets
+                if $_ =~ /^>/ then
+                        name.push( $_.sub(/>/,"").chop )
+                        seq.push( tmpseq ) if nseq > 0
+                        nseq += 1
+                        tmpseq = ""
+                else
+                        tmpseq += $_.strip
+                end
+        end
+        seq.push( tmpseq )
+        return nseq
+end
+
+
+
+begin
+       infp = File.open( infn, "r" )
+rescue => e
+    STDERR.puts e
+    exit 1
+end
+infp.close
+
+if directionoptions =~ /--adjustdirection/ then
+       system( mafftpath + "#{directionoptions} #{infn} > #{temp_of}" )
+else
+       system( "cp #{infn} #{temp_of}" )
+end
+
+tname = []
+tseq = []
+infp = File.open( temp_of, "r" )
+tin = readfasta( infp, tname, tseq )
+infp.close
+lenhash = {}
+
+if outnum then
+       for i in 0..(tin-1)
+               tname[i] = "_numo_s_#{i}_numo_e_" + tname[i]
+       end
+end
+
+npick = 0 if npick == 1
+npick = tin if npick > tin
+
+
+if scand =~ /%$/ then
+       ncand = (tin * scand.to_f * 0.01 ).to_i
+else
+       ncand = scand.to_i
+end
+
+if ncand < 0 || ncand > tin then
+       STDERR.puts "Error.  -n #{scand}?"
+       exit 1
+end
+
+ncand = npick if ncand < npick
+ncand = tin if ncand > tin
+
+STDERR.puts "ncand = #{ncand}, npick = #{npick}"
+
+
+sai = []
+for i in 0..(tin-1)
+       lenhash[i] = tseq[i].gsub(/-/,"").length
+end
+
+i = 0
+sorted = lenhash.sort_by{|key, value| [-value, i+=1]}
+#for i in 0..(ncand-1)
+#      sai[sorted[i][0]] = 1
+#end
+#for i in ncand..(tin-1)
+#      sai[sorted[i][0]] = 0
+#end
+
+ncandres = 0
+ntsukau = 0
+for i in 0..(tin-1)
+       cand = sorted[i][0]
+       if tname[cand] =~ /^_focus_/ then
+               sai[cand] = 0
+               ntsukau += 1
+       elsif ncandres < ncand  then
+               unless  tname[cand] =~ /^_tsukawanai_/ then
+                       sai[cand] = 1
+                       ncandres += 1
+               else
+                       sai[cand] = 0
+               end
+       else
+               sai[cand] = 0
+       end
+end
+
+if ncandres+ntsukau < npick
+       STDERR.puts "ncandres = #{ncandres}"
+       STDERR.puts "ncand = #{ncand}"
+       STDERR.puts "ntsukau = #{ntsukau}"
+       STDERR.puts "npick = #{npick}"
+       STDERR.puts "Too many _tsukawanai_ sequences."
+       exit 1
+end
+
+if ntsukau > npick
+       STDERR.puts "ntsukau = #{ntsukau}"
+       STDERR.puts "npick = #{npick}"
+       STDERR.puts "Too many _focus_ sequences."
+       exit 1
+end
+
+#p sai
+#for i in 0..(tin-1)
+#      puts sai[i].to_s + " " + tname[i]
+#end
+
+npickrand = npick - ntsukau
+
+if randompickup
+       pick = []
+       for i in 0..(npickrand-1)
+               pick[i] = 1
+       end
+       for i in npickrand..(ncandres-1)
+               pick[i] = 0
+       end
+       pick2 = pick.sort_by{rand}
+       pick = pick2
+#      p pick
+#      p sai
+
+       ipick = 0
+       for i in 0..(tin-1)
+               if sai[i] == 1 then
+                       if pick[ipick] == 0 then
+                               sai[i] = 0
+                       end
+                       ipick += 1
+               end
+       end
+#      p sai
+
+       for i in 0..(tin-1)
+               if tname[i] =~ /^_focus_/ then
+                       sai[i] = 1
+               end
+       end
+#      p sai
+
+       pfp = File.open( temp_pf, 'w' )
+       nfp = File.open( temp_nf, 'w' )
+
+       i = 0
+       while i < tin
+               if sai[i] == 1 then
+                       pfp.puts ">" + i.to_s + " " + ">" + markcore + tname[i]
+                       pfp.puts tseq[i]
+               else
+                       nfp.puts ">" + i.to_s + " " + ">" + tname[i]
+                       nfp.puts tseq[i]
+               end
+               i += 1
+       end
+
+       nfp.close
+       pfp.close
+
+else   # yamerukamo
+       STDERR.puts "Not supported in this version"
+       exit 1
+end
+
+if npick > 1 then
+       if npick < tin then
+               system( mafftpath + " #{coreoptions} #{temp_pf} #{corelastarg} > #{temp_cf}" ) # add de sort
+       else
+               system( mafftpath + " #{coreoptions} #{reorderoption} #{temp_pf} #{corelastarg} > #{temp_cf}" ) # ima sort
+       end
+       res = ( File::stat(temp_cf).size == 0 ) 
+else
+       system( "cat /dev/null > #{temp_cf}" )
+       res = false
+end
+
+if res == true then
+       STDERR.puts "\n\nError in the core alignment stage.\n\n"
+       exit 1
+end
+
+
+if npick < tin 
+       system( mafftpath + " #{addoptions} #{reorderoption} --add #{temp_nf} #{temp_cf} > #{temp_of}" )
+       res = ( File::stat(temp_of).size == 0 )
+else
+       system( "cp #{temp_cf} #{temp_of}" )
+       res = false
+end
+
+if res == true then
+       STDERR.puts "\n\nError in the add stage.\n\n"
+       exit 1
+end
+
+resname = []
+resseq = []
+resfp = File.open( temp_of, "r" )
+nres = readfasta( resfp, resname, resseq )
+resfp.close
+
+if reorderoption =~ /--reorder/ then
+       for i in 0..(nres-1)
+               puts ">" + resname[i].sub(/^[0-9]* >/,"")
+               puts resseq[i]
+       end
+else
+       seqhash = {}
+       namehash = {}
+       seqlast = []
+       namelast = []
+       nlast = 0
+       for i in 0..(nres-1)
+               if resname[i] =~ /^[0-9]* >/
+                       key = resname[i].split(' ')[0]
+                       seqhash[key] = resseq[i]
+                       namehash[key] = resname[i]
+               else
+                       seqlast.push( resseq[i] )
+                       namelast.push( resname[i] )
+                       nlast += 1
+               end
+       end
+       for i in 0..(nlast-1)
+               puts ">" + namelast[i]
+               puts seqlast[i]
+       end
+       for i in 0..(nres-nlast-1)
+               key = i.to_s
+               puts ">" + namehash[key].sub(/^[0-9]* >/,"")
+               puts seqhash[key]
+       end
+end
+
+