From f67c7ef585929ffa378d6e3640328dd68ae9f234 Mon Sep 17 00:00:00 2001 From: Adam Saponara Date: Mon, 8 May 2023 16:44:34 -0400 Subject: [PATCH] Memoize git shell commands We have a shell script that invokes `chef install` and `chef export` against a setup with ~30 cookbooks. It was annoyingly slow so we dug into why that was the case. We found that these commands call `ChefCLI::CookbookProfiler::Git` in a loop which ends up shelling out the same handful of git commands repeatedly. For example, one run produced the following duplicate git commands (sorted by dupe count): ``` 74 execve("/usr/bin/git", ["git", "rev-parse", "HEAD"], 46 execve("/usr/bin/git", ["git", "rev-parse", "HEAD"], 41 execve("/usr/bin/git", ["git", "rev-parse", "--abbrev-ref", "HEAD"], 39 execve("/usr/bin/git", ["git", "diff-files", "--quiet"], 25 execve("/usr/bin/git", ["git", "branch", "-r", "--contains", ""], 23 execve("/usr/lib/git-core/git", ["/usr/lib/git-core/git", "status", "--porcelain=2", "-uno"], 23 execve("/usr/bin/git", ["git", "config", "--get", "branch.HEAD.remote"], 21 execve("/usr/bin/git", ["git", "diff-files", "--quiet"], 19 execve("/usr/bin/git", ["git", "rev-parse", "--abbrev-ref", "HEAD"], 17 execve("/usr/bin/git", ["git", "config", "--get", "branch..remote"], 16 execve("/usr/bin/git", ["git", "config", "--get", "remote.origin.url"], 14 execve("/usr/bin/git", ["git", "branch", "-r", "--contains", ""], 14 execve("/usr/bin/git", ["git", "branch", "-r", "--contains", ""], 12 execve("/usr/bin/git", ["git", "config", "--get", "remote.origin.url"], 11 execve("/usr/bin/git", ["git", "config", "--get", "branch..remote"], 9 execve("/usr/bin/git", ["git", "config", "--get", "branch.HEAD.remote"], 7 execve("/usr/bin/git", ["git", "branch", "-r", "--contains", ""], 5 execve("/usr/lib/git-core/git", ["/usr/lib/git-core/git", "status", "--porcelain=2", "-uno"], ``` Adding this memoization brings our run from ~45s down to ~8s. The memo cache is keyed by repo path (`rev-parse --show-toplevel`) which should support both cookbook-mono-repo and repo-per-cookbook. Signed-off-by: Adam Saponara --- lib/chef-cli/cookbook_profiler/git.rb | 27 +++++++++++++++++++++++-- spec/unit/cookbook_profiler/git_spec.rb | 3 ++- spec/unit/policyfile_lock_build_spec.rb | 4 ++++ 3 files changed, 31 insertions(+), 3 deletions(-) diff --git a/lib/chef-cli/cookbook_profiler/git.rb b/lib/chef-cli/cookbook_profiler/git.rb index ba0a52f2..449fea9a 100644 --- a/lib/chef-cli/cookbook_profiler/git.rb +++ b/lib/chef-cli/cookbook_profiler/git.rb @@ -23,10 +23,17 @@ class Git include Helpers + @@git_memo = {} + attr_reader :cookbook_path + def self.uncache + @@git_memo = {} + end + def initialize(cookbook_path) @cookbook_path = cookbook_path + @repo_path = nil @unborn_branch = nil @unborn_branch_ref = nil end @@ -111,8 +118,24 @@ def git!(subcommand, options = {}) end def git(subcommand, options = {}) - options = { cwd: cookbook_path }.merge(options) - system_command("git #{subcommand}", options) + # memoize commands per-repo + repo_path = get_repo_path + memo_key = [repo_path, subcommand, options] + if @@git_memo.key?(memo_key) + rv = @@git_memo[memo_key] + else + options = { cwd: cookbook_path }.merge(options) + rv = system_command("git #{subcommand}", options) + @@git_memo[memo_key] = rv + end + rv + end + + def get_repo_path + unless @repo_path + @repo_path = system_command("git rev-parse --show-toplevel", { cwd: cookbook_path }).stdout.strip + end + @repo_path end def detect_current_branch diff --git a/spec/unit/cookbook_profiler/git_spec.rb b/spec/unit/cookbook_profiler/git_spec.rb index 1a45f6b3..9a960b00 100644 --- a/spec/unit/cookbook_profiler/git_spec.rb +++ b/spec/unit/cookbook_profiler/git_spec.rb @@ -25,7 +25,8 @@ include ChefCLI::Helpers - let(:git_profiler) do + let!(:git_profiler) do + ChefCLI::CookbookProfiler::Git.uncache ChefCLI::CookbookProfiler::Git.new(cookbook_path) end diff --git a/spec/unit/policyfile_lock_build_spec.rb b/spec/unit/policyfile_lock_build_spec.rb index cd008ee8..a500cb6e 100644 --- a/spec/unit/policyfile_lock_build_spec.rb +++ b/spec/unit/policyfile_lock_build_spec.rb @@ -50,6 +50,10 @@ def expect_hash_equal(actual, expected) ChefCLI::Policyfile::StorageConfig.new( cache_path: cache_path, relative_paths_root: relative_paths_root ) end + let!(:git_memo) do + ChefCLI::CookbookProfiler::Git.uncache + end + context "when a cached cookbook omits the cache key" do let(:policyfile_lock) do