Skip to content

Commit

Permalink
Improve py_image layering (bazelbuild#324)
Browse files Browse the repository at this point in the history
Consider the following more complex python image that uses the pip
requirement feature of rules_python:

    load("@pip_deps//:requirements.bzl", "requirement")

    py_library(
        name = "py_image_library_with_external_deps",
        srcs = ["py_image_library.py"],
        deps = [requirement("tensorflow")],
    )

    py_library(
        name = "py_image_library",
        srcs = ["py_image_library.py"],
        deps = [":py_image_library_with_external_deps"],
    )

    py_image(
        name = "py_image_complex",
        srcs = ["py_image.py"],
        layers = [
            ":py_image_library_with_external_deps",
            ":py_image_library",
        ],
        main = "py_image.py",
    )

Currently, it will produce the following layers:

   301M bazel-bin/testdata/py_image_complex.0-layer.tar
   301M bazel-bin/testdata/py_image_complex.1-layer.tar
    15M bazel-bin/testdata/py_image_complex-layer.tar

This change improves the layering by fixing the following issues:

   1. The middle layer does not detect that all tensorflow files
      were already added by the first layer.

   2. The last layer is 15MB in size due to the huge symlink forest.
      Create a separate symlink layer for each dependency layer to
      avoid bloating the top layer, while still keeping the dependency
      layers image agnostic.

   3. Getting "tensorflow" to its own layer requires the author of the
      "py_image" rule to know that libraries transitively depend on
      "tensorflow".  This is not great, since library authors may add
      and remove dependencies without remembering to update the "layers"
      list.  To counter this, add a new py_layer rule that can be used
      to select from transitive dependencies and group them to layers,
      without explicitly naming the transitive dependencies like
      "tensorflow".

With these improvements, the above example can be rewritten as:

    load("@pip_deps//:requirements.bzl", "requirement")

    py_library(
        name = "py_image_library_with_external_deps",
        srcs = ["py_image_library.py"],
        deps = [requirement("tensorflow")],
    )

    py_library(
        name = "py_image_library",
        srcs = ["py_image_library.py"],
        deps = [":py_image_library_with_external_deps"],
    )

    py_layer(
        name = "external_deps",
        deps = [":py_image_library"],
        filter = "@",
    )

    py_image(
        name = "py_image_complex",
        srcs = ["py_image.py"],
        layers = [
            ":external_deps",
            ":py_image_library",
        ],
        main = "py_image.py",
    )

This will yield the following layer structure:

   310M bazel-bin/testdata/py_image_complex.0-layer.tar           (agnostic)
    15M bazel-bin/testdata/py_image_complex.0-symlinks-layer.tar  (non-agnostic)
    10K bazel-bin/testdata/py_image_complex.1-layer.tar           (agnostic)
    10K bazel-bin/testdata/py_image_complex.1-symlinks-layer.tar  (non-agnostic)
    20K bazel-bin/testdata/py_image_complex-layer.tar             (non-agnostic)
  • Loading branch information
scele authored and nlopezgi committed Oct 3, 2018
1 parent d288baf commit c9065d1
Show file tree
Hide file tree
Showing 27 changed files with 603 additions and 194 deletions.
45 changes: 45 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,51 @@ py_image(
)
```

You can also implement more complex fine layering strategies by using the
`py_layer` rule and its `filter` attribute. For example:
```python
# Suppose that we are synthesizing an image that depends on a complex set
# of libraries that we want to break into layers.
LIBS = [
"//pkg/complex_library",
# ...
]
# First, we extract all transitive dependencies of LIBS that are under //pkg/common.
py_layer(
name = "common_deps",
deps = LIBS,
filter = "//pkg/common",
)
# Then, we further extract all external dependencies of the deps under //pkg/common.
py_layer(
name = "common_external_deps",
deps = [":common_deps"],
filter = "@",
)
# We also extract all external dependencies of LIBS, which is a superset of
# ":common_external_deps".
py_layer(
name = "external_deps",
deps = LIBS,
filter = "@",
)
# Finally, we create the image, stacking the above filtered layers on top of one
# another in the "layers" attribute. The layers are applied in order, and any
# dependencies already added to the image will not be added again. Therefore,
# ":external_deps" will only add the external dependencies not present in
# ":common_external_deps".
py_image(
name = "image",
deps = LIBS,
layers = [
":common_external_deps",
":common_deps",
":external_deps",
],
# ...
)
```

### py3_image

To use a Python 3 runtime instead of the default of Python 2, use `py3_image`,
Expand Down
20 changes: 20 additions & 0 deletions WORKSPACE
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,26 @@ load(

_py_image_repos()

http_archive(
name = "io_bazel_rules_python",
sha256 = "8b32d2dbb0b0dca02e0410da81499eef8ff051dad167d6931a92579e3b2a1d48",
strip_prefix = "rules_python-8b5d0683a7d878b28fffe464779c8a53659fc645",
urls = ["https://github.com/bazelbuild/rules_python/archive/8b5d0683a7d878b28fffe464779c8a53659fc645.tar.gz"],
)

load("@io_bazel_rules_python//python:pip.bzl", "pip_import", "pip_repositories")

pip_repositories()

pip_import(
name = "pip_deps",
requirements = "//testdata:requirements-pip.txt",
)

load("@pip_deps//:requirements.bzl", "pip_install")

pip_install()

load(
"//python3:image.bzl",
_py3_image_repos = "repositories",
Expand Down
7 changes: 2 additions & 5 deletions cc/image.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ The signature of this rule is compatible with cc_binary.
load(
"//lang:image.bzl",
"app_layer",
"dep_layer",
)
load(
"//container:container.bzl",
Expand Down Expand Up @@ -78,17 +77,15 @@ def cc_image(name, base = None, deps = [], layers = [], binary = None, **kwargs)

base = base or DEFAULT_BASE
for index, dep in enumerate(layers):
this_name = "%s.%d" % (name, index)
dep_layer(name = this_name, base = base, dep = dep)
base = this_name
base = app_layer(name = "%s.%d" % (name, index), base = base, dep = dep)
base = app_layer(name = "%s.%d-symlinks" % (name, index), base = base, dep = dep, binary = binary)

visibility = kwargs.get("visibility", None)
tags = kwargs.get("tags", None)
app_layer(
name = name,
base = base,
binary = binary,
lang_layers = layers,
visibility = visibility,
tags = tags,
args = kwargs.get("args"),
Expand Down
1 change: 1 addition & 0 deletions container/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ TEST_TARGETS = [
":nodejs_image",
":py3_image",
":py_image",
":py_image_complex",
":rust_image",
":scala_image",
":war_image",
Expand Down
9 changes: 9 additions & 0 deletions container/image.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,11 @@ def _impl(
unzipped_layers = parent_parts.get("unzipped_layer", []) + [layer.unzipped_layer for layer in layers]
layer_diff_ids = [layer.diff_id for layer in layers]
diff_ids = parent_parts.get("diff_id", []) + layer_diff_ids
new_files = [f for f in file_map or []]
new_emptyfiles = empty_files or []
new_symlinks = [f for f in symlinks or []]
parent_transitive_files = parent_parts.get("transitive_files", depset())
transitive_files = depset(new_files + new_emptyfiles + new_symlinks, transitive = [parent_transitive_files])

# Get the config for the base layer
config_file = _get_base_config(ctx, name, base)
Expand Down Expand Up @@ -393,6 +398,9 @@ def _impl(
# At the root of the chain, we support deriving from a tarball
# base image.
"legacy": parent_parts.get("legacy"),

# Keep track of all files/emptyfiles/symlinks that we have already added to the image layers.
"transitive_files": transitive_files,
}

# We support incrementally loading or assembling this single image
Expand Down Expand Up @@ -482,6 +490,7 @@ _attrs = dict(_layer.attrs.items() + {
_outputs = dict(_layer.outputs)

_outputs["out"] = "%{name}.tar"

_outputs["digest"] = "%{name}.digest"

image = struct(
Expand Down
158 changes: 154 additions & 4 deletions container/image_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ def TestBundleImage(name, image_name):
class ImageTest(unittest.TestCase):

def assertTarballContains(self, tar, paths):
self.maxDiff = None
self.assertEqual(paths, tar.getnames())

def assertLayerNContains(self, img, n, paths):
Expand Down Expand Up @@ -468,23 +469,172 @@ def test_py_image(self):
# files to avoid this redundancy.
'/app',
'/app/testdata',
'/app/testdata/py_image.binary',
'/app/testdata/py_image.binary.runfiles',
'/app/testdata/py_image.binary.runfiles/io_bazel_rules_docker',
'/app/testdata/py_image.binary.runfiles/io_bazel_rules_docker/testdata',
'/app/testdata/py_image.binary.runfiles/io_bazel_rules_docker/testdata/py_image_library.py',
'/app/testdata/py_image.binary',
'/app/testdata/py_image.binary.runfiles/io_bazel_rules_docker/external',
])

# Check the library layer, which is one below our application layer.
# Below that, we have a layer that generates symlinks for the library layer.
self.assertLayerNContains(img, 1, [
'.',
'/app',
'/app/testdata',
'/app/testdata/py_image.binary.runfiles',
'/app/testdata/py_image.binary.runfiles/io_bazel_rules_docker',
'/app/testdata/py_image.binary.runfiles/io_bazel_rules_docker/testdata',
'/app/testdata/py_image.binary.runfiles/io_bazel_rules_docker/testdata/py_image_library.py',
])

# Check the library layer, which is two below our application layer.
self.assertLayerNContains(img, 2, [
'.',
'./app',
'./app/io_bazel_rules_docker',
'./app/io_bazel_rules_docker/testdata',
'./app/io_bazel_rules_docker/testdata/py_image_library.py',
])

def test_py_image_complex(self):
with TestImage('py_image_complex') as img:
# bazel-bin/testdata/py_image_complex-layer.tar
self.assertTopLayerContains(img, [
'.',
'./app',
'./app/testdata',
'./app/testdata/py_image_complex.binary.runfiles',
'./app/testdata/py_image_complex.binary.runfiles/io_bazel_rules_docker',
'./app/testdata/py_image_complex.binary.runfiles/io_bazel_rules_docker/testdata',
'./app/testdata/py_image_complex.binary.runfiles/io_bazel_rules_docker/testdata/py_image_complex.py',
'./app/testdata/py_image_complex.binary.runfiles/io_bazel_rules_docker/testdata/py_image_complex.binary',
'./app/testdata/py_image_complex.binary.runfiles/io_bazel_rules_docker/testdata/test',
'./app/testdata/py_image_complex.binary.runfiles/io_bazel_rules_docker/testdata/test/__init__.py',
'./app/testdata/py_image_complex.binary.runfiles/pypi__six_1_11_0',
'./app/testdata/py_image_complex.binary.runfiles/pypi__six_1_11_0/__init__.py',
'./app/testdata/py_image_complex.binary.runfiles/__init__.py',
'./app/testdata/py_image_complex.binary.runfiles/pypi__addict_2_1_2',
'./app/testdata/py_image_complex.binary.runfiles/pypi__addict_2_1_2/__init__.py',
'./app/testdata/py_image_complex.binary.runfiles/io_bazel_rules_docker/testdata/__init__.py',
'/app',
'/app/testdata',
'/app/testdata/py_image_complex.binary',
'/app/testdata/py_image_complex.binary.runfiles',
'/app/testdata/py_image_complex.binary.runfiles/io_bazel_rules_docker',
'/app/testdata/py_image_complex.binary.runfiles/io_bazel_rules_docker/external',
])

# bazel-bin/testdata/py_image_complex.3-symlinks-layer.tar
self.assertLayerNContains(img, 1, [
'.',
'/app',
'/app/testdata',
'/app/testdata/py_image_complex.binary.runfiles',
'/app/testdata/py_image_complex.binary.runfiles/io_bazel_rules_docker',
'/app/testdata/py_image_complex.binary.runfiles/io_bazel_rules_docker/testdata',
'/app/testdata/py_image_complex.binary.runfiles/io_bazel_rules_docker/testdata/py_image_complex_library.py',
'/app/testdata/py_image_complex.binary.runfiles/io_bazel_rules_docker/testdata/py_image_library_using_six.py',
])

# bazel-bin/testdata/py_image_complex.3-layer.tar
self.assertLayerNContains(img, 2, [
'.',
'./app',
'./app/io_bazel_rules_docker',
'./app/io_bazel_rules_docker/testdata',
'./app/io_bazel_rules_docker/testdata/py_image_complex_library.py',
'./app/io_bazel_rules_docker/testdata/py_image_library_using_six.py',
])

# bazel-bin/testdata/py_image_complex.2-symlinks-layer.tar
self.assertLayerNContains(img, 3, [
'.',
'/app',
'/app/testdata',
'/app/testdata/py_image_complex.binary.runfiles',
'/app/testdata/py_image_complex.binary.runfiles/io_bazel_rules_docker',
'/app/testdata/py_image_complex.binary.runfiles/io_bazel_rules_docker/testdata',
'/app/testdata/py_image_complex.binary.runfiles/io_bazel_rules_docker/testdata/test',
'/app/testdata/py_image_complex.binary.runfiles/io_bazel_rules_docker/testdata/test/py_image_library_using_addict.py',
])

# bazel-bin/testdata/py_image_complex.2-layer.tar
self.assertLayerNContains(img, 4, [
'.',
'./app',
'./app/io_bazel_rules_docker',
'./app/io_bazel_rules_docker/testdata',
'./app/io_bazel_rules_docker/testdata/test',
'./app/io_bazel_rules_docker/testdata/test/py_image_library_using_addict.py',
])

# bazel-bin/testdata/py_image_complex.1-symlinks-layer.tar
self.assertLayerNContains(img, 5, [
'.',
'/app',
'/app/testdata',
'/app/testdata/py_image_complex.binary.runfiles',
'/app/testdata/py_image_complex.binary.runfiles/pypi__six_1_11_0',
'/app/testdata/py_image_complex.binary.runfiles/pypi__six_1_11_0/six.py',
'/app/testdata/py_image_complex.binary.runfiles/pypi__six_1_11_0/six-1.11.0.dist-info',
'/app/testdata/py_image_complex.binary.runfiles/pypi__six_1_11_0/six-1.11.0.dist-info/DESCRIPTION.rst',
'/app/testdata/py_image_complex.binary.runfiles/pypi__six_1_11_0/six-1.11.0.dist-info/METADATA',
'/app/testdata/py_image_complex.binary.runfiles/pypi__six_1_11_0/six-1.11.0.dist-info/RECORD',
'/app/testdata/py_image_complex.binary.runfiles/pypi__six_1_11_0/six-1.11.0.dist-info/WHEEL',
'/app/testdata/py_image_complex.binary.runfiles/pypi__six_1_11_0/six-1.11.0.dist-info/metadata.json',
'/app/testdata/py_image_complex.binary.runfiles/pypi__six_1_11_0/six-1.11.0.dist-info/top_level.txt',
])

# bazel-bin/testdata/py_image_complex.1-layer.tar
self.assertLayerNContains(img, 6, [
'.',
'./app',
'./app/pypi__six_1_11_0',
'./app/pypi__six_1_11_0/six.py',
'./app/pypi__six_1_11_0/six-1.11.0.dist-info',
'./app/pypi__six_1_11_0/six-1.11.0.dist-info/DESCRIPTION.rst',
'./app/pypi__six_1_11_0/six-1.11.0.dist-info/METADATA',
'./app/pypi__six_1_11_0/six-1.11.0.dist-info/RECORD',
'./app/pypi__six_1_11_0/six-1.11.0.dist-info/WHEEL',
'./app/pypi__six_1_11_0/six-1.11.0.dist-info/metadata.json',
'./app/pypi__six_1_11_0/six-1.11.0.dist-info/top_level.txt',
])


# bazel-bin/testdata/py_image_complex.0-symlinks-layer.tar
self.assertLayerNContains(img, 7, [
'.',
'/app',
'/app/testdata',
'/app/testdata/py_image_complex.binary.runfiles',
'/app/testdata/py_image_complex.binary.runfiles/pypi__addict_2_1_2',
'/app/testdata/py_image_complex.binary.runfiles/pypi__addict_2_1_2/addict',
'/app/testdata/py_image_complex.binary.runfiles/pypi__addict_2_1_2/addict/__init__.py',
'/app/testdata/py_image_complex.binary.runfiles/pypi__addict_2_1_2/addict/addict.py',
'/app/testdata/py_image_complex.binary.runfiles/pypi__addict_2_1_2/addict-2.1.2.dist-info',
'/app/testdata/py_image_complex.binary.runfiles/pypi__addict_2_1_2/addict-2.1.2.dist-info/DESCRIPTION.rst',
'/app/testdata/py_image_complex.binary.runfiles/pypi__addict_2_1_2/addict-2.1.2.dist-info/METADATA',
'/app/testdata/py_image_complex.binary.runfiles/pypi__addict_2_1_2/addict-2.1.2.dist-info/RECORD',
'/app/testdata/py_image_complex.binary.runfiles/pypi__addict_2_1_2/addict-2.1.2.dist-info/WHEEL',
'/app/testdata/py_image_complex.binary.runfiles/pypi__addict_2_1_2/addict-2.1.2.dist-info/metadata.json',
'/app/testdata/py_image_complex.binary.runfiles/pypi__addict_2_1_2/addict-2.1.2.dist-info/top_level.txt',
])

# bazel-bin/testdata/py_image_complex.0-layer.tar
self.assertLayerNContains(img, 8, [
'.',
'./app',
'./app/pypi__addict_2_1_2',
'./app/pypi__addict_2_1_2/addict',
'./app/pypi__addict_2_1_2/addict/__init__.py',
'./app/pypi__addict_2_1_2/addict/addict.py',
'./app/pypi__addict_2_1_2/addict-2.1.2.dist-info',
'./app/pypi__addict_2_1_2/addict-2.1.2.dist-info/DESCRIPTION.rst',
'./app/pypi__addict_2_1_2/addict-2.1.2.dist-info/METADATA',
'./app/pypi__addict_2_1_2/addict-2.1.2.dist-info/RECORD',
'./app/pypi__addict_2_1_2/addict-2.1.2.dist-info/WHEEL',
'./app/pypi__addict_2_1_2/addict-2.1.2.dist-info/metadata.json',
'./app/pypi__addict_2_1_2/addict-2.1.2.dist-info/top_level.txt',
])

def test_cc_image(self):
with TestImage('cc_image') as img:
Expand Down
10 changes: 8 additions & 2 deletions container/import.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -160,8 +160,14 @@ def _container_import_impl(ctx):
container_import = rule(
attrs = dict({
"config": attr.label(allow_files = [".json"]),
"manifest": attr.label(allow_files = [".json"], mandatory = False),
"layers": attr.label_list(allow_files = tar_filetype + tgz_filetype, mandatory = True),
"manifest": attr.label(
allow_files = [".json"],
mandatory = False,
),
"layers": attr.label_list(
allow_files = tar_filetype + tgz_filetype,
mandatory = True,
),
"repository": attr.string(default = "bazel"),
}.items() + _hash_tools.items() + _layer_tools.items() + _zip_tools.items()),
executable = True,
Expand Down
5 changes: 4 additions & 1 deletion container/layer.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,10 @@ _layer_attrs = dict({
# Implicit/Undocumented dependencies.
"empty_files": attr.string_list(),
"empty_dirs": attr.string_list(),
"operating_system": attr.string(default = "linux", mandatory = False),
"operating_system": attr.string(
default = "linux",
mandatory = False,
),
"build_layer": attr.label(
default = Label("//container:build_tar"),
cfg = "host",
Expand Down
20 changes: 19 additions & 1 deletion container/providers.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,10 @@
"""Provider definitions"""

# A provider containing information exposed by container_bundle rules
BundleInfo = provider(fields = ["container_images", "stamp"])
BundleInfo = provider(fields = [
"container_images",
"stamp",
])

# A provider containing information exposed by container_flatten rules
FlattenInfo = provider()
Expand Down Expand Up @@ -46,3 +49,18 @@ PushInfo = provider(fields = [
"stamp",
"stamp_inputs",
])

# A provider containing information exposed by filter_layer rules
FilterLayerInfo = provider(
fields = {
"runfiles": "filtered runfiles that should be installed from this layer",
"filtered_depset": "a filtered depset of struct(target=<target>, target_deps=<depset>)",
},
)

# A provider containing information exposed by filter_aspect
FilterAspectInfo = provider(
fields = {
"depset": "a depset of struct(target=<target>, target_deps=<depset>)",
},
)
Loading

0 comments on commit c9065d1

Please sign in to comment.