From 0ef2f6921e85266313c2a0404fd8f4efd00b449d Mon Sep 17 00:00:00 2001 From: "cmzmasek@gmail.com" Date: Wed, 9 Nov 2011 20:07:36 +0000 Subject: [PATCH] to download from ensembl ftp site --- forester/ruby/scripts/ensembl_ftp.rb | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 forester/ruby/scripts/ensembl_ftp.rb diff --git a/forester/ruby/scripts/ensembl_ftp.rb b/forester/ruby/scripts/ensembl_ftp.rb new file mode 100644 index 0000000..a38753f --- /dev/null +++ b/forester/ruby/scripts/ensembl_ftp.rb @@ -0,0 +1,32 @@ +require 'net/ftp' + +EMAIL = 'czmasek@burnham.org' +PUB_RELEASE_DIR = '/pub/release-64/fasta' +PEP_DIR = '/pep' + +ftp = Net::FTP.new('ftp.ensembl.org', 'anonymous', EMAIL) +ftp.passive = true # To avoid "No route to host" error. +ftp.chdir( PUB_RELEASE_DIR ) +files = ftp.list('*_*') # To only look at files with an underscore. +count = 0 +files.each do | file | + species = file.split().last + begin + ftp.chdir(species + PEP_DIR) + pepfiles = ftp.list() + pepfiles.each do | pepfile | + pepfile = pepfile.split().last + if pepfile =~ /all.fa.gz/ # Only want the "all.fa.gz" files (and not the + # "abinitio" files). + ftp.getbinaryfile(pepfile) + puts 'downloaded "' + pepfile + '"' + count += 1 + end + end + rescue Exception + puts 'ignoring "' + species + '"' + end + ftp.chdir(PUB_RELEASE_DIR) # To go back to the starting directory. +end +ftp.close +puts 'done (downloaded ' + count.to_s + ' files)' \ No newline at end of file -- 1.7.10.2