Skip to content

Commit

Permalink
Use mksquashfs pseudofile definitions for AppImage build (#228)
Browse files Browse the repository at this point in the history
This changes the way that appimage squashfs files are built with
rules_appimage. Instead of taring up all the files and then providing
that to mksquashfs we build a pseudo file definitions list that tells
mksquashfs where to get all the files from. This avoids the expensive
tar operation when inputs change (building the pseudofile defs is pretty
cheap).
The mksquashfs action does take longer now (likely because it has to do
thousands of `sh -c "cat {file}"` invocations) but overall the build
times are lower for large appimages and the cachability remote execution
ability should be better.

There was also an issue with the most recent MkAppDir implementation
where the tar was populated in a ThreadPool which was not deterministic,
so file order inside the .tar and .sqfs may change, leading to changed
artifact hashes. This is resolved with this.
  • Loading branch information
lalten authored Sep 27, 2024
1 parent f77fb87 commit 5c2db08
Show file tree
Hide file tree
Showing 3 changed files with 163 additions and 218 deletions.
49 changes: 28 additions & 21 deletions appimage/appimage.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -31,37 +31,44 @@ def _appimage_impl(ctx):
apprun = make_apprun(ctx)
inputs = depset(direct = [manifest_file, apprun] + runfile_info.files)

# Create the AppDir tar
appdirtar = ctx.actions.declare_file(ctx.attr.name + ".tar")
# Create the AppDir mksquashfs pseudofile definitions
pseudofile_defs = ctx.actions.declare_file(ctx.attr.name + ".pseudofile_defs.txt")
args = ctx.actions.args()
args.add("--manifest").add(manifest_file.path)
args.add("--apprun").add(apprun.path)
args.add(appdirtar.path)
args.add(pseudofile_defs.path)
ctx.actions.run(
mnemonic = "MkAppDir",
inputs = inputs,
executable = ctx.executable._mkappdir,
arguments = [args],
outputs = [appdirtar],
execution_requirements = {"no-remote": "1"},
outputs = [pseudofile_defs],
execution_requirements = {"no-remote-exec": "1"},
)

# Create an empty directory that we can point mksquashfs at so all the added files come from the pseudofile defs
emptydir = ctx.actions.declare_directory(ctx.attr.name + ".emptydir")
ctx.actions.run_shell(
mnemonic = "MkEmptyDir",
outputs = [emptydir],
command = "mkdir -p %s" % emptydir.path,
)

# Create the AppDir squashfs
mksquashfs_args = list(MKSQUASHFS_ARGS)
mksquashfs_args.extend(["-processors", str(MKSQUASHFS_NUM_PROCS)])
mksquashfs_args.extend(["-mem", "{}M".format(MKSQUASHFS_MEM_MB)])
mksquashfs_args.extend(ctx.attr.build_args)
mksquashfs_args.append("-tar")
appdirsqfs = ctx.actions.declare_file(ctx.attr.name + ".sqfs")
ctx.actions.run_shell(
mnemonic = "Sqfstar",
inputs = [appdirtar],
command = "{exe} - {dst} {args} <{src}".format(
exe = ctx.executable._mksquashfs.path,
dst = appdirsqfs.path,
args = " ".join(mksquashfs_args),
src = appdirtar.path,
),
mksquashfs_args = ctx.actions.args()
mksquashfs_args.add(emptydir.path + "/")
mksquashfs_args.add(appdirsqfs.path)
mksquashfs_args.add_all(MKSQUASHFS_ARGS)
mksquashfs_args.add("-processors").add(MKSQUASHFS_NUM_PROCS)
mksquashfs_args.add("-mem").add("%sM" % MKSQUASHFS_MEM_MB)
mksquashfs_args.add_all(ctx.attr.build_args)
mksquashfs_args.add("-pf").add(pseudofile_defs.path)
ctx.actions.run(
mnemonic = "MkSquashfs",
inputs = depset(direct = [pseudofile_defs, apprun, emptydir] + runfile_info.files),
executable = ctx.executable._mksquashfs,
arguments = [mksquashfs_args],
tools = [ctx.executable._mksquashfs],
outputs = [appdirsqfs],
resource_set = _resources,
Expand All @@ -79,7 +86,7 @@ def _appimage_impl(ctx):
ctx.outputs.executable.path,
],
outputs = [ctx.outputs.executable],
execution_requirements = {"no-remote": "1"},
execution_requirements = {"no-remote-exec": "1"},
)

# Take the `binary` env and add the appimage target's env on top of it
Expand All @@ -95,7 +102,7 @@ def _appimage_impl(ctx):
runfiles = ctx.runfiles(files = [ctx.outputs.executable]),
),
RunEnvironmentInfo(env),
OutputGroupInfo(appimage_debug = depset([manifest_file, appdirtar, appdirsqfs])),
OutputGroupInfo(appimage_debug = depset([manifest_file, pseudofile_defs, appdirsqfs])),
]

_ATTRS = {
Expand Down
153 changes: 82 additions & 71 deletions appimage/private/mkappdir.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Prepare and build an AppImage AppDir tarball."""
"""Prepare and build an AppImage AppDir Mksquashfs pseudo-file definitions file."""

from __future__ import annotations

Expand All @@ -9,11 +9,8 @@
import os
import shutil
import sys
import tarfile
import tempfile
from concurrent.futures import ThreadPoolExecutor
from pathlib import Path
from typing import TYPE_CHECKING, Callable, NamedTuple
from typing import TYPE_CHECKING, NamedTuple

if TYPE_CHECKING:
from collections.abc import Sequence
Expand Down Expand Up @@ -101,43 +98,68 @@ def is_inside_bazel_cache(path: Path) -> bool:
return os.fspath(path).startswith(get_output_base())


def copy_file_or_dir(src: Path, dst: Path, preserve_symlinks: bool) -> None:
"""Copy a file or dir from src to dst.
def get_all_parent_dirs(path: Path | str) -> list[Path]:
path = Path(path)
parts = path.parts if path.is_dir() else path.parts[:-1]
return [Path(*parts[: i + 1]) for i in range(len(parts))]

We use copy2 because it preserves metadata like permissions.
If preserve_symlinks is True we do not want to set follow_symlinks in order to keep
symlink targets preserved.

def to_pseudofile_def_lines(src: Path, dst: Path, preserve_symlinks: bool) -> dict[str, str]:
"""Return a pseudo-file definition line for a file or directory.
See https://github.com/plougher/squashfs-tools/blob/e7b6490/examples/pseudo-file.example
Pseudo file definition format:
"filename d mode uid gid" create a directory
"filename m mode uid gid" modify filename
"filename f mode uid gid command" create file from stdout of command
"filename s mode uid gid symlink" create a symbolic link
(...)
"""
dst.parent.mkdir(parents=True, exist_ok=True)
operations = {dir.as_posix(): "d 755 0 0" for dir in get_all_parent_dirs(dst)}
if (
src.is_symlink()
and (preserve_symlinks or not src.readlink().exists())
and not is_inside_bazel_cache(src.readlink())
):
operations[dst.as_posix()] = f"s {src.lstat().st_mode & 0o777:o} 0 0 {src.readlink()}"
elif src.is_file():
operations[dst.as_posix()] = f"f {src.lstat().st_mode & 0o777:o} 0 0 cat {src}"
elif src.is_dir():
operations[dst.as_posix()] = f"d {src.lstat().st_mode & 0o777:o} 0 0"
elif not src.exists():
raise FileNotFoundError(f"{src=} does not exist")
else:
raise NotImplementedError(f"Cannot handle {src}")

return operations


def copy_file_or_dir(src: Path, dst: Path, preserve_symlinks: bool) -> dict[str, str]:
"""Copy a file or dir from src to dst."""
if not src.is_dir():
shutil.copy2(src, dst, follow_symlinks=not preserve_symlinks)
return
return to_pseudofile_def_lines(src, dst, preserve_symlinks)

# Scan and recreate dangling symlinks
dangling: set[Path] = set()
for link in src.rglob("*"):
if not link.is_symlink() or link.exists():
continue
new_link = dst / link.relative_to(src)
new_link.parent.mkdir(parents=True, exist_ok=True)
new_link.symlink_to(link.readlink())
dangling.add(link)
operations: dict[str, str] = {}

copy_function = functools.partial(shutil.copy2, follow_symlinks=not preserve_symlinks)
# Scan and recreate symlinks manually
links = {link for link in src.rglob("*") if link.is_symlink()}
for link in links:
operations.update(to_pseudofile_def_lines(link, dst / link.relative_to(src), preserve_symlinks))

copies: dict[str, str] = {}
shutil.copytree(
src,
dst,
symlinks=preserve_symlinks,
ignore=lambda dir, names: [f for f in names if Path(dir) / f in dangling],
copy_function=copy_function,
ignore=lambda dir, names: [f for f in names if Path(dir) / f in links],
copy_function=lambda src, dst: copies.setdefault(dst, src),
ignore_dangling_symlinks=False,
dirs_exist_ok=True,
)
for dst_, src_ in copies.items():
operations.update(to_pseudofile_def_lines(Path(src_), Path(dst_), preserve_symlinks))


def _copy_file_or_dir_later(src: Path, dst: Path, preserve_symlinks: bool) -> Callable[[], None]:
"""Return a function that copies a file or dir from src to dst."""
return lambda: copy_file_or_dir(src, dst, preserve_symlinks)
return operations


def _move_relative_symlinks_in_files_to_their_own_section(manifest_data: _ManifestData) -> _ManifestData:
Expand Down Expand Up @@ -198,7 +220,7 @@ def _prevent_duplicate_dsts_with_diverging_srcs(manifest_data: _ManifestData) ->
return new_manifest_data


def populate_appdir(appdir: Path, manifest: Path) -> None:
def make_appdir_pseudofile_defs(manifest: Path) -> dict[str, str]:
"""Make the AppDir that will be squashfs'd into the AppImage.
Note that the [AppImage Type2 Spec][appimage-spec] specifies that the contained [AppDir][appdir-spec] may contain a
Expand All @@ -209,79 +231,68 @@ def populate_appdir(appdir: Path, manifest: Path) -> None:
[desktop-spec]: https://specifications.freedesktop.org/desktop-entry-spec/desktop-entry-spec-latest.html
[appimaged]: https://docs.appimage.org/user-guide/run-appimages.html#integrating-appimages-into-the-desktop
"""
appdir.mkdir(parents=True, exist_ok=True)
manifest_data = _ManifestData.from_json(manifest.read_text())
manifest_data = _move_relative_symlinks_in_files_to_their_own_section(manifest_data)
manifest_data = _prevent_duplicate_dsts_with_diverging_srcs(manifest_data)

actions: list[Callable[[], None]] = []
operations: dict[str, str] = {}

for empty_file in manifest_data.empty_files:
# example entry: "tests/test_py.runfiles/__init__.py"
(appdir / empty_file).parent.mkdir(parents=True, exist_ok=True)
(appdir / empty_file).touch()
for dir in get_all_parent_dirs(empty_file):
operations[dir.as_posix()] = "d 755 0 0"
operations[empty_file] = "f 755 0 0 true"

for file in manifest_data.files:
# example entry: {"dst": "tests/test_py.runfiles/_main/tests/data.txt", "src": "tests/data.txt"}
src = Path(file.src).resolve()
dst = Path(appdir / file.dst).resolve()
assert src.exists(), f"want to copy {src} to {dst}, but it does not exist"
actions.append(_copy_file_or_dir_later(src, dst, preserve_symlinks=True))
operations.update(copy_file_or_dir(Path(file.src), Path(file.dst), preserve_symlinks=True))

for link in manifest_data.symlinks:
# example entry: {"linkname": "tests/test_py", "target": "tests/test_py.runfiles/_main/tests/test_py"}
linkname = Path(link.linkname)
linkfile = (appdir / linkname).resolve()
linkfile.parent.mkdir(parents=True, exist_ok=True)
# example entry: {"linkname":
# "tests/test_py.runfiles/_main/../rules_python~0.27.1~python~python_3_11_x86_64-unknown-linux-gnu/bin/python3",
# "target": "python3.11"}
linkfile = Path(link.linkname)
for dir in get_all_parent_dirs(linkfile):
operations[dir.as_posix()] = "d 755 0 0"
target = Path(link.target)
if target.is_absolute():
# We keep absolute symlinks as is, but make no effort to copy the target into the runfiles as well.
# Note that not all Bazel remote cache implementations allow absolute symlinks!
pass
else:
# Adapt relative symlinks to point relative to the new linkfile location
target = relative_path(appdir / target, linkfile.parent)
linkfile.symlink_to(target)
target = relative_path(target, linkfile.parent)
operations[linkfile.as_posix()] = f"s 755 0 0 {target}"

for link in manifest_data.relative_symlinks:
# example entry: {"linkname":
# "tests/test_py.runfiles/_main/../rules_python~0.27.1~python~python_3_11_x86_64-unknown-linux-gnu/bin/python3",
# "target": "python3.11"}
linkfile = (appdir / link.linkname).resolve()
linkfile.parent.mkdir(parents=True, exist_ok=True)
target = Path(link.target)
assert not target.is_absolute(), f"symlink {linkfile} must be relative, but points at {target}"
linkfile.symlink_to(target)
for dir in get_all_parent_dirs(link.linkname):
operations[dir.as_posix()] = "d 755 0 0"
operations[link.linkname] = f"s 755 0 0 {link.target}"

for tree_artifact in manifest_data.tree_artifacts:
# example entry:
# {'dst': 'test.runfiles/_main/../rules_pycross~~lock_repos~pdm_deps/_lock/[email protected]',
# 'src': 'bazel-out/k8-fastbuild/bin/external/rules_pycross~~lock_repos~pdm_deps/_lock/[email protected]'}
src = Path(tree_artifact.src).resolve()
dst = Path(appdir / tree_artifact.dst).resolve()
assert src.exists(), f"want to copy {src} to {dst}, but it does not exist"
actions.append(_copy_file_or_dir_later(src, dst, preserve_symlinks=False))

with ThreadPoolExecutor() as executor:
futures = [executor.submit(action) for action in actions]
for future in futures:
future.result()
operations.update(copy_file_or_dir(Path(tree_artifact.src), Path(tree_artifact.dst), preserve_symlinks=False))

# Must not have `..` in file names: https://github.com/plougher/squashfs-tools/blob/4.6.1/squashfs-tools/unsquash-1.c#L377
operations = {os.path.normpath(f): v for f, v in operations.items()}

def _make_executable(ti: tarfile.TarInfo) -> tarfile.TarInfo:
ti.mode |= 0o111
return ti
return operations


def make_appdir_tar(manifest: Path, apprun: Path, output: Path) -> None:
"""Create an AppImage AppDir (uncompressed) tar ready to be sqfstar'ed."""
with tarfile.open(output, "w") as tar:
tar.add(apprun.resolve(), arcname="AppRun", filter=_make_executable)
with tempfile.TemporaryDirectory() as tmpdir:
appdir = Path(tmpdir)
populate_appdir(appdir, manifest)
for top_level in appdir.iterdir():
tar.add(top_level, arcname=top_level.name)
def write_appdir_pseudofile_defs(manifest: Path, apprun: Path, output: Path) -> None:
"""Write a mksquashfs pf file representing the AppDir."""
lines = [
f"AppRun f 777 0 0 cat {apprun}",
*sorted(f"{k} {v}" for k, v in make_appdir_pseudofile_defs(manifest).items()),
"",
]
output.write_text("\n".join(lines))


def parse_args(args: Sequence[str]) -> argparse.Namespace:
Expand All @@ -299,10 +310,10 @@ def parse_args(args: Sequence[str]) -> argparse.Namespace:
type=Path,
help="Path to AppRun script",
)
parser.add_argument("output", type=Path, help="Where to place output AppDir tar")
parser.add_argument("output", type=Path, help="Where to place output AppDir pseudo-file definition file")
return parser.parse_args(args)


if __name__ == "__main__":
args = parse_args(sys.argv[1:])
make_appdir_tar(args.manifest, args.apprun, args.output)
write_appdir_pseudofile_defs(args.manifest, args.apprun, args.output)
Loading

0 comments on commit 5c2db08

Please sign in to comment.