diff --git a/README.md b/README.md index bbaa445..f833e8c 100644 --- a/README.md +++ b/README.md @@ -25,6 +25,31 @@ last-modified-at: date-format: '%d-%b-%y' ``` +For sites with lots of documents using `last_modified_at`, there may be render +performance improvement via: + +```yml +plugins: + - jekyll-last-modified-at + +last-modified-at: + use-git-cache: true +``` + +If `use-git-cache` is `false` (the default), every committed file using +`last_modified_at` will generate a separate spawned process to check the git log +for time data. So if you have 10 documents, this will result in 10 spawned calls. + +If `use-git-cache` is `true`, a single spawned process is generated that reads +the entire git log history and caches the time data. This cache is then read +from during the rest of the site generation process. So if you have 10 (or 1000) +documents, this will result in 1 spawned call. The cache is flushed on site +reset, allowing for a long-lived server to correctly reflect `last_modified_at` +of files modified and committed while it has been running. + +Note: there may be performance issues for repositories with very large +histories, in which case the default behavior is likely preferred. + ## Usage There are a few ways to use this gem. diff --git a/lib/jekyll-last-modified-at.rb b/lib/jekyll-last-modified-at.rb index 2482d86..5ce996d 100644 --- a/lib/jekyll-last-modified-at.rb +++ b/lib/jekyll-last-modified-at.rb @@ -9,8 +9,5 @@ module LastModifiedAt autoload :Executor, 'jekyll-last-modified-at/executor' autoload :Determinator, 'jekyll-last-modified-at/determinator' autoload :Git, 'jekyll-last-modified-at/git' - - PATH_CACHE = {} # rubocop:disable Style/MutableConstant - REPO_CACHE = {} # rubocop:disable Style/MutableConstant end end diff --git a/lib/jekyll-last-modified-at/determinator.rb b/lib/jekyll-last-modified-at/determinator.rb index 8c5138b..789e531 100644 --- a/lib/jekyll-last-modified-at/determinator.rb +++ b/lib/jekyll-last-modified-at/determinator.rb @@ -3,49 +3,47 @@ module Jekyll module LastModifiedAt class Determinator - attr_reader :site_source, :page_path + @repo_cache = {} + @path_cache = {} + class << self + # attr_accessor so we can flush externally + attr_accessor :repo_cache + attr_accessor :path_cache + end + + attr_reader :site_source, :page_path, :use_git_cache attr_accessor :format - def initialize(site_source, page_path, format = nil) - @site_source = site_source - @page_path = page_path - @format = format || '%d-%b-%y' + def initialize(site_source, page_path, format = nil, use_git_cache = false) # rubocop:disable Style/OptionalBooleanParameter + @site_source = site_source + @page_path = page_path + @format = format || '%d-%b-%y' + @use_git_cache = use_git_cache end def git - return REPO_CACHE[site_source] unless REPO_CACHE[site_source].nil? + return self.class.repo_cache[site_source] unless self.class.repo_cache[site_source].nil? - REPO_CACHE[site_source] = Git.new(site_source) - REPO_CACHE[site_source] + self.class.repo_cache[site_source] = Git.new(site_source) + self.class.repo_cache[site_source] end def formatted_last_modified_date - return PATH_CACHE[page_path] unless PATH_CACHE[page_path].nil? - - last_modified = last_modified_at_time.strftime(@format) - PATH_CACHE[page_path] = last_modified - last_modified + last_modified_at_time.strftime(@format) end def last_modified_at_time + return self.class.path_cache[page_path] unless self.class.path_cache[page_path].nil? + raise Errno::ENOENT, "#{absolute_path_to_article} does not exist!" unless File.exist? absolute_path_to_article - Time.at(last_modified_at_unix.to_i) + self.class.path_cache[page_path] = Time.at(last_modified_at_unix.to_i) + self.class.path_cache[page_path] end def last_modified_at_unix if git.git_repo? - last_commit_date = Executor.sh( - 'git', - '--git-dir', - git.top_level_directory, - 'log', - '-n', - '1', - '--format="%ct"', - '--', - relative_path_from_git_dir - )[/\d+/] + last_commit_date = git.last_commit_date(relative_path_from_git_dir, use_git_cache) # last_commit_date can be nil iff the file was not committed. last_commit_date.nil? || last_commit_date.empty? ? mtime(absolute_path_to_article) : last_commit_date else diff --git a/lib/jekyll-last-modified-at/git.rb b/lib/jekyll-last-modified-at/git.rb index 22c4f30..c730f6f 100644 --- a/lib/jekyll-last-modified-at/git.rb +++ b/lib/jekyll-last-modified-at/git.rb @@ -8,6 +8,7 @@ class Git def initialize(site_source) @site_source = site_source @is_git_repo = nil + @lcd_cache = {} end def top_level_directory @@ -33,6 +34,66 @@ def git_repo? false end end + + def last_commit_date(path, use_git_cache = false) # rubocop:disable Style/OptionalBooleanParameter + if use_git_cache + build_lcd_cache if @lcd_cache.empty? + @lcd_cache[path] + else + Executor.sh( + 'git', + '--git-dir', + top_level_directory, + 'log', + '-n', + '1', + '--format="%ct"', + '--', + path + )[/\d+/] + end + end + + private + + # generates hash of `path => unix time stamp (string)` + def build_lcd_cache + # example output: + # + # %jekyll-last-modified-at:1621042992 + # + # Dockerfile.production + # %jekyll-last-modified-at:1621041929 + # + # assets/css/style.52513a5600efd4015668ccb9b702256e.css + # assets/css/style.52513a5600efd4015668ccb9b702256e.css.gz + lines = Executor.sh( + 'git', + '--git-dir', + top_level_directory, + 'log', + '--name-only', + '--date=unix', + '--pretty=%%jekyll-last-modified-at:%ct' + ) + + lcd = nil + lines.split("\n").each do |line| + next if line.empty? + + if line.start_with?('%jekyll-last-modified-at:') + # new record + lcd = line.split(':')[1] + next + end + + # we already have it + next if @lcd_cache[line] + + # we don't have it + @lcd_cache[line] = lcd + end + end end end end diff --git a/lib/jekyll-last-modified-at/hook.rb b/lib/jekyll-last-modified-at/hook.rb index 127aaee..85f0306 100644 --- a/lib/jekyll-last-modified-at/hook.rb +++ b/lib/jekyll-last-modified-at/hook.rb @@ -6,11 +6,21 @@ module Hook def self.add_determinator_proc proc { |item| format = item.site.config.dig('last-modified-at', 'date-format') - item.data['last_modified_at'] = Determinator.new(item.site.source, item.path, - format) + use_git_cache = item.site.config.dig('last-modified-at', 'use-git-cache') + item.data['last_modified_at'] = Determinator.new(item.site.source, item.relative_path, + format, use_git_cache) } end + Jekyll::Hooks.register :site, :after_reset do |site| + use_git_cache = site.config.dig('last-modified-at', 'use-git-cache') + if use_git_cache + # flush the caches so we can detect commits while server is running + Determinator.repo_cache = {} + Determinator.path_cache = {} + end + end + Jekyll::Hooks.register :posts, :post_init, &Hook.add_determinator_proc Jekyll::Hooks.register :pages, :post_init, &Hook.add_determinator_proc Jekyll::Hooks.register :documents, :post_init, &Hook.add_determinator_proc