From 907bf35dbd306137c115d647f053e3d0abf53432 Mon Sep 17 00:00:00 2001 From: Daniel Goldman Date: Wed, 29 Jan 2025 10:45:14 -0500 Subject: [PATCH] Fix concurrency issue of Terraform provider cache (#21805) The Terraform provider cache is not concurrency safe. The worst concurrency bug with the Terraform provider cache is a race condition when nonatomic moves result in another process capturing incorrect hashes in the lockfile which gets pulled into the Pants cache and then poisons it. After [lots of debugging and help from the community](https://chat.pantsbuild.org/t/26730983/i-m-finding-terraform-init-may-not-cache-stably-from-2-23-it#84153d7a-cedd-4600-838f-bd6ef99f6cc9), we've determined that the best way forward is to isolate each module into its own Terraform cache. This prevents concurrent access to the Terraform cache. One possible solution would have been to not cache during cache-unsafe operations. This is harder than it seems, because envvars can override user cache settings in Terraform RC files but they cannot _unset_ it. fixes #21804 --- .../backend/terraform/goals/deploy_test.py | 13 +++++- src/python/pants/backend/terraform/tool.py | 40 ++++++++++++++----- 2 files changed, 42 insertions(+), 11 deletions(-) diff --git a/src/python/pants/backend/terraform/goals/deploy_test.py b/src/python/pants/backend/terraform/goals/deploy_test.py index 8c7c460b397..2a6d92b7f1b 100644 --- a/src/python/pants/backend/terraform/goals/deploy_test.py +++ b/src/python/pants/backend/terraform/goals/deploy_test.py @@ -3,6 +3,7 @@ from __future__ import annotations import json +import shlex import pytest @@ -16,6 +17,14 @@ standard_deployment = standard_deployment +def _extract_terraform_argv_from_script(script_argv) -> tuple[str, ...]: + """Extract the argv of the invocation of Terraform from its launcher script.""" + script = script_argv[2] + tf_invocation = script.split("&&") + argv = shlex.split(tf_invocation[-1]) + return tuple(argv) + + def test_run_terraform_deploy(rule_runner: RuleRunner, standard_deployment, tmpdir) -> None: """Test end-to-end running of a deployment.""" rule_runner.write_files(standard_deployment.files) @@ -44,7 +53,7 @@ def test_deploy_terraform_forwards_args(rule_runner: RuleRunner, standard_deploy deploy_process = rule_runner.request(DeployProcess, [field_set]) assert deploy_process.process - argv = deploy_process.process.process.argv + argv = _extract_terraform_argv_from_script(deploy_process.process.process.argv) assert "-chdir=src/tf" in argv, "Did not find expected -chdir" assert "-var-file=stg.tfvars" in argv, "Did not find expected -var-file" @@ -70,7 +79,7 @@ def test_deploy_terraform_adheres_to_dry_run_flag( deploy_process = rule_runner.request(DeployProcess, [field_set]) assert deploy_process.process - argv = deploy_process.process.process.argv + argv = _extract_terraform_argv_from_script(deploy_process.process.process.argv) assert action in argv, f"Expected {action} in argv" assert not_action not in argv, f"Did not expect {not_action} in argv" diff --git a/src/python/pants/backend/terraform/tool.py b/src/python/pants/backend/terraform/tool.py index e312ce05dcd..bc7ddc64ac0 100644 --- a/src/python/pants/backend/terraform/tool.py +++ b/src/python/pants/backend/terraform/tool.py @@ -20,7 +20,7 @@ import shlex from dataclasses import dataclass from pathlib import Path -from typing import Tuple +from typing import Iterable, Tuple from pants.core.goals.resolves import ExportableTool from pants.core.util_rules import external_tool @@ -29,6 +29,7 @@ ExternalToolRequest, TemplatedExternalTool, ) +from pants.core.util_rules.system_binaries import BashBinary, MkdirBinary from pants.engine.env_vars import EnvironmentVars, EnvironmentVarsRequest from pants.engine.fs import EMPTY_DIGEST, Digest from pants.engine.internals.selectors import Get @@ -415,10 +416,18 @@ class TerraformProcess: chdir: str = "." # directory for terraform's `-chdir` argument +def _make_launcher_script(bash: BashBinary, commands: Iterable[Iterable[str]]) -> tuple[str, ...]: + """Assemble several command invocations into an inline launcher script, suitable for passing as + `Process(argv=(bash.path, "-c", script), ...)`""" + return (bash.path, "-c", " && ".join([shlex.join(command) for command in commands])) + + @rule async def setup_terraform_process( request: TerraformProcess, terraform: TerraformTool, + bash: BashBinary, + mkdir: MkdirBinary, platform: Platform, ) -> Process: downloaded_terraform = await Get( @@ -428,18 +437,18 @@ async def setup_terraform_process( ) env = await Get(EnvironmentVars, EnvironmentVarsRequest(terraform.extra_env_vars)) + extra_env_vars = {} + path = [] user_path = env.get("PATH") if user_path: path.append(user_path) + extra_env_vars["PATH"] = os.pathsep.join(path) - env = EnvironmentVars( - { - **env, - "PATH": ":".join(path), - "TF_PLUGIN_CACHE_DIR": (os.path.join("{chroot}", terraform.plugin_cache_dir)), - } - ) + tf_plugin_cache_dir = os.path.join(terraform.plugin_cache_dir, request.chdir) + extra_env_vars["TF_PLUGIN_CACHE_DIR"] = os.path.join("{chroot}", tf_plugin_cache_dir) + + env = EnvironmentVars({**env, **extra_env_vars}) immutable_input_digests = { "__terraform": downloaded_terraform.digest, @@ -448,8 +457,21 @@ async def setup_terraform_process( def prepend_paths(paths: Tuple[str, ...]) -> Tuple[str, ...]: return tuple((Path(request.chdir) / path).as_posix() for path in paths) + # Initialise the Terraform provider cache, since Terraform expects the directory to already exist. + initialize_provider_cache_cmd = (mkdir.path, "-p", tf_plugin_cache_dir) + run_terraform_cmd = ( + "__terraform/terraform", + f"-chdir={shlex.quote(request.chdir)}", + ) + request.args + return Process( - argv=("__terraform/terraform", f"-chdir={shlex.quote(request.chdir)}") + request.args, + argv=_make_launcher_script( + bash, + ( + initialize_provider_cache_cmd, + run_terraform_cmd, + ), + ), input_digest=request.input_digest, immutable_input_digests=immutable_input_digests, output_files=prepend_paths(request.output_files),