diff --git a/src/python/pants/core/util_rules/system_binaries.py b/src/python/pants/core/util_rules/system_binaries.py index f59c13e8326..01a92a97376 100644 --- a/src/python/pants/core/util_rules/system_binaries.py +++ b/src/python/pants/core/util_rules/system_binaries.py @@ -11,6 +11,7 @@ import subprocess from dataclasses import dataclass from enum import Enum +from itertools import groupby from textwrap import dedent # noqa: PNT20 from typing import Iterable, Mapping, Sequence @@ -243,6 +244,21 @@ def for_paths( *paths: BinaryPath, rationale: str, ) -> BinaryShimsRequest: + # Remove any duplicates (which may result if the caller merges `BinaryPath` instances from multiple sources) + # and also sort to ensure a stable order for better caching. + paths = tuple(sorted(set(paths), key=lambda bp: bp.path)) + + # Then ensure that there are no duplicate paths with mismatched content. + duplicate_paths = set() + for path, group in groupby(paths, key=lambda x: x.path): + if len(list(group)) > 1: + duplicate_paths.add(path) + if duplicate_paths: + raise ValueError( + "Detected duplicate paths with mismatched content at paths: " + f"{', '.join(sorted(duplicate_paths))}" + ) + return cls( paths=paths, rationale=rationale, diff --git a/src/python/pants/core/util_rules/system_binaries_test.py b/src/python/pants/core/util_rules/system_binaries_test.py index e7d5d04542d..1d8d8e19667 100644 --- a/src/python/pants/core/util_rules/system_binaries_test.py +++ b/src/python/pants/core/util_rules/system_binaries_test.py @@ -176,3 +176,37 @@ def test_binary_shims_paths(rule_runner: RuleRunner, tmp_path: Path) -> None: ), binary_shim.content.decode(), ) + + +def test_merge_and_detection_of_duplicate_binary_paths() -> None: + # Test merge of duplicate paths where content hash is the same. + shims_request_1 = BinaryShimsRequest.for_paths( + BinaryPath("/foo/bar", "abc123"), + BinaryPath("/abc/def/123", "def456"), + BinaryPath("/foo/bar", "abc123"), + rationale="awesomeness", + ) + assert shims_request_1.paths == ( + BinaryPath("/abc/def/123", "def456"), + BinaryPath("/foo/bar", "abc123"), + ) + + # Test detection of duplicate pahs with differing content hashes. Exception should be thrown. + with pytest.raises(ValueError, match="Detected duplicate paths with mismatched content"): + _ = BinaryShimsRequest.for_paths( + BinaryPath("/foo/bar", "abc123"), + BinaryPath("/abc/def/123", "def456"), + BinaryPath("/foo/bar", "xyz789"), + rationale="awesomeness", + ) + + # Test paths with no duplication. + shims_request_2 = BinaryShimsRequest.for_paths( + BinaryPath("/foo/bar", "abc123"), + BinaryPath("/abc/def/123", "def456"), + rationale="awesomeness", + ) + assert shims_request_2.paths == ( + BinaryPath("/abc/def/123", "def456"), + BinaryPath("/foo/bar", "abc123"), + )