From 084976de0175cf4ea2f55ca263cae9962880c7fd Mon Sep 17 00:00:00 2001 From: glaszig Date: Sat, 4 Jul 2015 19:19:19 +0200 Subject: [PATCH 1/4] uploading files to S3 in parallel by using threads --- lib/octopress-deploy/s3.rb | 108 ++++++++++++++++++++++--------------- 1 file changed, 66 insertions(+), 42 deletions(-) diff --git a/lib/octopress-deploy/s3.rb b/lib/octopress-deploy/s3.rb index a46a16f..ad7e605 100644 --- a/lib/octopress-deploy/s3.rb +++ b/lib/octopress-deploy/s3.rb @@ -25,6 +25,8 @@ def initialize(options) @headers = options[:headers] || [] @remote_path = @remote_path.sub(/^\//,'') # remove leading slash @pull_dir = options[:dir] + @bust_cache_files = [] + @thread_pool = [] connect end @@ -78,64 +80,61 @@ def connect # Write site files to the selected bucket # def write_files - puts "Writing #{pluralize('file', site_files.size)}:" if @verbose - files_to_invalidate = [] + puts "Writing #{pluralize('file', site_files.size)}#{" (sequential mode)" unless parallel_upload?}:" if @verbose + @bust_cache_files = [] + site_files.each do |file| - s3_filename = remote_path(file) - o = @bucket.objects[s3_filename] - file_with_options = get_file_with_metadata(file, s3_filename); - - begin - s3sum = o.etag.tr('"','') if o.exists? - rescue AWS::S3::Errors::NoSuchKey - s3sum = "" + if parallel_upload? + threaded { write_file file } + else + write_file file end + end - if @incremental && (s3sum == Digest::MD5.file(file).hexdigest) - if @verbose - puts "= #{remote_path(file)}" - else - progress('=') - end - else - o.write(file_with_options) - files_to_invalidate.push(file) - if @verbose - puts "+ #{remote_path(file)}" - else - progress('+') - end - end + @thread_pool.each(&:join) + bust_cloudfront_cache + end + + def write_file file + if write_file? file + s3_upload_file file + @bust_cache_files << file + @verbose ? puts("+ #{remote_path(file)}") : progress('+') + else + @verbose ? puts("= #{remote_path(file)}") : progress('=') end + end - invalidate_cache(files_to_invalidate) unless @distro_id.nil? + def s3_upload_file file + s3_object(file).write File.open(file), s3_object_options(file) end - def invalidate_cache(files) + def bust_cloudfront_cache + return if @distro_id.nil? + puts "Invalidating cache for #{pluralize('file', site_files.size)}" if @verbose @cloudfront.create_invalidation( distribution_id: @distro_id, invalidation_batch:{ paths:{ - quantity: files.size, - items: files.map{|file| "/" + remote_path(file)} - }, + quantity: @bust_cache_files.size, + items: @bust_cache_files.map{|file| "/" + remote_path(file)} + }, # String of 8 random chars to uniquely id this invalidation caller_reference: (0...8).map { ('a'..'z').to_a[rand(26)] }.join } - ) unless files.empty? + ) unless @bust_cache_files.empty? + @bust_cache_files = [] end - def get_file_with_metadata(file, s3_filename) - file_with_options = { - :file => file, - :acl => :public_read - } + def s3_object_options(file) + s3_filename = remote_path file + s3_options = { :acl => :public_read } @headers.each do |conf| if conf.has_key? 'filename' and s3_filename.match(conf['filename']) if @verbose - puts "+ #{remote_path(file)} matched pattern #{conf['filename']}" + puts "+ #{s3_filename} matched pattern #{conf['filename']}" end if conf.has_key? 'expires' @@ -151,24 +150,24 @@ def get_file_with_metadata(file, s3_filename) expireDate = (Time.now + (60 * 60 * 24 * relative_days[1].to_i)).httpdate end - file_with_options[:expires] = expireDate + s3_options[:expires] = expireDate end if conf.has_key? 'content_type' - file_with_options[:content_type] = conf['content_type'] + s3_options[:content_type] = conf['content_type'] end if conf.has_key? 'cache_control' - file_with_options[:cache_control] = conf['cache_control'] + s3_options[:cache_control] = conf['cache_control'] end if conf.has_key? 'content_encoding' - file_with_options[:content_encoding] = conf['content_encoding'] + s3_options[:content_encoding] = conf['content_encoding'] end end end - return file_with_options + s3_options end # Delete files from the bucket, to ensure a 1:1 match with site files @@ -273,9 +272,34 @@ def self.default_config(options={}) #{"verbose: #{options[:verbose] || 'false'}".ljust(40)} # Print out all file operations. #{"incremental: #{options[:incremental] || 'false'}".ljust(40)} # Only upload new/changed files #{"delete: #{options[:delete] || 'false'}".ljust(40)} # Remove files from destination which do not match source files. +#{"parallel: #{options[:parallel] || 'true'}".ljust(40)} # Speed up deployment by uploading files in parallel. CONFIG end + protected + + def write_file? file + file_digest = Digest::MD5.file(file).hexdigest + o = s3_object file + s3sum = o.etag.tr('"','') if o.exists? + @incremental == false || s3sum.to_s != file_digest + end + + def s3_object file + s3_filename = remote_path file + @bucket.objects[s3_filename] + end + + def parallel_upload? + @options[:parallel] + end + + def threaded &blk + @thread_pool << Thread.new(blk) do |operation| + operation.call + end + end + end end end From 3e2a5802ae24d4ef924ef6994dcb13cc32082d8b Mon Sep 17 00:00:00 2001 From: glaszig Date: Sun, 5 Jul 2015 18:27:41 +0200 Subject: [PATCH 2/4] S3: skip md5 hashing if incremental uploads aren't even enabled --- lib/octopress-deploy/s3.rb | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/octopress-deploy/s3.rb b/lib/octopress-deploy/s3.rb index ad7e605..014b988 100644 --- a/lib/octopress-deploy/s3.rb +++ b/lib/octopress-deploy/s3.rb @@ -279,10 +279,12 @@ def self.default_config(options={}) protected def write_file? file + return true unless @incremental + file_digest = Digest::MD5.file(file).hexdigest o = s3_object file s3sum = o.etag.tr('"','') if o.exists? - @incremental == false || s3sum.to_s != file_digest + s3sum.to_s != file_digest end def s3_object file From d4106ef0392b638f5a03a120d1d5759a41d60a1e Mon Sep 17 00:00:00 2001 From: glaszig Date: Sun, 5 Jul 2015 18:46:03 +0200 Subject: [PATCH 3/4] adding notes about parallel S3 uploads to changelog --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0bf29d6..688efd7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ # Changelog +### unreleased +- New: Files are now uploaded to S3 in parallel. This can be disabled by setting `parallel: false` in `_deploy.yml`. [#63](https://github.com/octopress/deploy/pull/63) + ### 1.3.0 - 2015-07-05 - New: Now `_deploy.yml` is processed through ERB, this means you can load configurations for ENV vars pretty easily. [#62](https://github.com/octopress/deploy/pull/62) From 8f54c0ceb51467ce9465f9e7b1d2dbe4247c4d6f Mon Sep 17 00:00:00 2001 From: glaszig Date: Mon, 11 Jan 2016 18:54:48 +0100 Subject: [PATCH 4/4] s3: enabling incremental upload by default --- lib/octopress-deploy/s3.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/octopress-deploy/s3.rb b/lib/octopress-deploy/s3.rb index 014b988..6ea2b5c 100644 --- a/lib/octopress-deploy/s3.rb +++ b/lib/octopress-deploy/s3.rb @@ -20,7 +20,7 @@ def initialize(options) @distro_id = options[:distribution_id] || ENV['AWS_DISTRIBUTION_ID'] @remote_path = (options[:remote_path] || '/').sub(/^\//,'') @verbose = options[:verbose] - @incremental = options[:incremental] + @incremental = options[:incremental] || true @delete = options[:delete] @headers = options[:headers] || [] @remote_path = @remote_path.sub(/^\//,'') # remove leading slash @@ -270,7 +270,7 @@ def self.default_config(options={}) #{"remote_path: #{options[:remote_path] || '/'}".ljust(40)} # relative path on bucket where files should be copied. #{"region: #{options[:remote_path] || 'us-east-1'}".ljust(40)} # Region where your bucket is located. #{"verbose: #{options[:verbose] || 'false'}".ljust(40)} # Print out all file operations. -#{"incremental: #{options[:incremental] || 'false'}".ljust(40)} # Only upload new/changed files +#{"incremental: #{options[:incremental] || 'true'}".ljust(40)} # Only upload new/changed files #{"delete: #{options[:delete] || 'false'}".ljust(40)} # Remove files from destination which do not match source files. #{"parallel: #{options[:parallel] || 'true'}".ljust(40)} # Speed up deployment by uploading files in parallel. CONFIG