From: cmzmasek@gmail.com Date: Wed, 9 Nov 2011 20:07:36 +0000 (+0000) Subject: to download from ensembl ftp site X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=0ef2f6921e85266313c2a0404fd8f4efd00b449d;p=jalview.git to download from ensembl ftp site --- diff --git a/forester/ruby/scripts/ensembl_ftp.rb b/forester/ruby/scripts/ensembl_ftp.rb new file mode 100644 index 0000000..a38753f --- /dev/null +++ b/forester/ruby/scripts/ensembl_ftp.rb @@ -0,0 +1,32 @@ +require 'net/ftp' + +EMAIL = 'czmasek@burnham.org' +PUB_RELEASE_DIR = '/pub/release-64/fasta' +PEP_DIR = '/pep' + +ftp = Net::FTP.new('ftp.ensembl.org', 'anonymous', EMAIL) +ftp.passive = true # To avoid "No route to host" error. +ftp.chdir( PUB_RELEASE_DIR ) +files = ftp.list('*_*') # To only look at files with an underscore. +count = 0 +files.each do | file | + species = file.split().last + begin + ftp.chdir(species + PEP_DIR) + pepfiles = ftp.list() + pepfiles.each do | pepfile | + pepfile = pepfile.split().last + if pepfile =~ /all.fa.gz/ # Only want the "all.fa.gz" files (and not the + # "abinitio" files). + ftp.getbinaryfile(pepfile) + puts 'downloaded "' + pepfile + '"' + count += 1 + end + end + rescue Exception + puts 'ignoring "' + species + '"' + end + ftp.chdir(PUB_RELEASE_DIR) # To go back to the starting directory. +end +ftp.close +puts 'done (downloaded ' + count.to_s + ' files)' \ No newline at end of file