Skip to content

Commit

Permalink
fix issue with duplicated BinaryPath instances passed to `BinaryShi…
Browse files Browse the repository at this point in the history
…msRequest` (Cherry pick of #21745) (#21759)

As reported in #21709, a
Docker tool appearing in both the `--docker-tools` and
`--docker-optional-tools` options was causing an error with the
`create_digest` intrinsic: `Snapshots must be constructed from unique
path stats; got duplicates in [Some("docker-credential-ecr-login"),
Some("getent"), Some("sw_vers")]`

The root cause is that duplicate `BinaryPath` instances were being
passed to `BinaryShimsRequest.for_paths` which was then translated
eventually into a `create_digest` call with the duplicate paths (which
is not permitted).

Solution: De-duplicate paths in `BinaryShimsRequest.for_paths` (and sort
into stable order for good measure to ensure better cacheability).

Closes #21709.

---------

Co-authored-by: Tom Dyas <[email protected]>
  • Loading branch information
tdyas and Tom Dyas authored Dec 14, 2024
1 parent eecafd5 commit 7901814
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 0 deletions.
16 changes: 16 additions & 0 deletions src/python/pants/core/util_rules/system_binaries.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import subprocess
from dataclasses import dataclass
from enum import Enum
from itertools import groupby
from textwrap import dedent # noqa: PNT20
from typing import Iterable, Mapping, Sequence

Expand Down Expand Up @@ -243,6 +244,21 @@ def for_paths(
*paths: BinaryPath,
rationale: str,
) -> BinaryShimsRequest:
# Remove any duplicates (which may result if the caller merges `BinaryPath` instances from multiple sources)
# and also sort to ensure a stable order for better caching.
paths = tuple(sorted(set(paths), key=lambda bp: bp.path))

# Then ensure that there are no duplicate paths with mismatched content.
duplicate_paths = set()
for path, group in groupby(paths, key=lambda x: x.path):
if len(list(group)) > 1:
duplicate_paths.add(path)
if duplicate_paths:
raise ValueError(
"Detected duplicate paths with mismatched content at paths: "
f"{', '.join(sorted(duplicate_paths))}"
)

return cls(
paths=paths,
rationale=rationale,
Expand Down
34 changes: 34 additions & 0 deletions src/python/pants/core/util_rules/system_binaries_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,3 +176,37 @@ def test_binary_shims_paths(rule_runner: RuleRunner, tmp_path: Path) -> None:
),
binary_shim.content.decode(),
)


def test_merge_and_detection_of_duplicate_binary_paths() -> None:
# Test merge of duplicate paths where content hash is the same.
shims_request_1 = BinaryShimsRequest.for_paths(
BinaryPath("/foo/bar", "abc123"),
BinaryPath("/abc/def/123", "def456"),
BinaryPath("/foo/bar", "abc123"),
rationale="awesomeness",
)
assert shims_request_1.paths == (
BinaryPath("/abc/def/123", "def456"),
BinaryPath("/foo/bar", "abc123"),
)

# Test detection of duplicate pahs with differing content hashes. Exception should be thrown.
with pytest.raises(ValueError, match="Detected duplicate paths with mismatched content"):
_ = BinaryShimsRequest.for_paths(
BinaryPath("/foo/bar", "abc123"),
BinaryPath("/abc/def/123", "def456"),
BinaryPath("/foo/bar", "xyz789"),
rationale="awesomeness",
)

# Test paths with no duplication.
shims_request_2 = BinaryShimsRequest.for_paths(
BinaryPath("/foo/bar", "abc123"),
BinaryPath("/abc/def/123", "def456"),
rationale="awesomeness",
)
assert shims_request_2.paths == (
BinaryPath("/abc/def/123", "def456"),
BinaryPath("/foo/bar", "abc123"),
)

0 comments on commit 7901814

Please sign in to comment.