From bfec1035d3aa3134eae7c1010c0dbecd9d48494a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jiri=20Dan=C4=9Bk?= Date: Wed, 22 May 2024 15:24:25 +0200 Subject: [PATCH 1/7] RHOAIENG-7525: Build opendatahub-io/notebooks in GitHub Action with caching MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Jiri Daněk (cherry picked from commit 5c4029b841fe9e0bc22e0980d8b62f5e17665d89) --- .../workflows/build-notebooks-TEMPLATE.yaml | 93 ++++ .github/workflows/build-notebooks-pr.yaml | 29 ++ .github/workflows/build-notebooks.yaml | 468 ++++++++++++++++++ .github/workflows/code-quality.yaml | 21 + Makefile | 4 +- ci/cached-builds/containers.conf | 22 + .../dev_null_container_registry.go | 14 + ci/cached-builds/gen_gha_matrix_jobs.py | 161 ++++++ .../insecure_localhost_registry.conf | 3 + ci/cached-builds/storage.conf | 11 + 10 files changed, 825 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/build-notebooks-TEMPLATE.yaml create mode 100644 .github/workflows/build-notebooks-pr.yaml create mode 100644 .github/workflows/build-notebooks.yaml create mode 100644 ci/cached-builds/containers.conf create mode 100644 ci/cached-builds/dev_null_container_registry.go create mode 100644 ci/cached-builds/gen_gha_matrix_jobs.py create mode 100644 ci/cached-builds/insecure_localhost_registry.conf create mode 100644 ci/cached-builds/storage.conf diff --git a/.github/workflows/build-notebooks-TEMPLATE.yaml b/.github/workflows/build-notebooks-TEMPLATE.yaml new file mode 100644 index 000000000..808e428d0 --- /dev/null +++ b/.github/workflows/build-notebooks-TEMPLATE.yaml @@ -0,0 +1,93 @@ +# inspired by +# https://github.com/thesuperzapper/kubeflow/blob/master/.github/workflows/example_notebook_servers_publish_TEMPLATE.yaml +--- +name: Build & Publish Notebook Servers (TEMPLATE) +"on": + workflow_call: + inputs: + # https://docs.github.com/en/actions/learn-github-actions/variables#default-environment-variables + # https://docs.github.com/en/actions/learn-github-actions/contexts + target: + required: true + description: "make target to build" + type: string + github: + required: true + description: "top workflow's `github`" + type: string + +jobs: + build: + runs-on: ubuntu-latest + env: + # GitHub image registry used for storing $(CONTAINER_ENGINE)'s cache + CACHE: "ghcr.io/${{ github.repository }}/workbench-images/build-cache" + + steps: + + - uses: actions/checkout@v4 + + - name: Login to GitHub Container Registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + # https://github.com/containers/buildah/issues/2521#issuecomment-884779112 + - name: Workaround https://github.com/containers/podman/issues/22152#issuecomment-2027705598 + run: sudo apt-get -qq remove podman crun + + - uses: actions/cache@v4 + id: cached-linuxbrew + with: + path: /home/linuxbrew/.linuxbrew + key: linuxbrew + + - name: Install podman + if: steps.cached-linuxbrew.outputs.cache-hit != 'true' + run: | + /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)" + /home/linuxbrew/.linuxbrew/bin/brew install podman + + - name: Add linuxbrew to PATH + run: echo "/home/linuxbrew/.linuxbrew/bin/" >> $GITHUB_PATH + + - name: Configure Podman + run: | + mkdir -p $HOME/.config/containers/ + cp ci/cached-builds/containers.conf $HOME/.config/containers/containers.conf + cp ci/cached-builds/storage.conf $HOME/.config/containers/storage.conf + # should at least reset storage when touching storage.conf + sudo mkdir -p /mnt/containers/ + sudo chown -R $USER:$USER /mnt/containers + podman system reset --force + # podman bug? need to create this _after_ doing the reset + mkdir -p /mnt/containers/tmp + + # https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#push + - name: "push: make ${{ inputs.target }}" + run: "make ${{ inputs.target }}" + if: "${{ fromJson(inputs.github).event_name == 'push' }}" + env: + IMAGE_TAG: "${{ github.ref_name }}_${{ github.sha }}" + IMAGE_REGISTRY: "ghcr.io/${{ github.repository }}/workbench-images" + CONTAINER_BUILD_CACHE_ARGS: "--cache-from ${{ env.CACHE }} --cache-to ${{ env.CACHE }}" + + # https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#pull_request + - name: "pull_request: make ${{ inputs.target }}" + run: | + # start a black hole container registry as make target always does a push + mkdir -p $HOME/.config/containers/registries.conf.d/ + cp ci/cached-builds/insecure_localhost_registry.conf $HOME/.config/containers/registries.conf.d/insecure_localhost_registry.conf + go run ci/cached-builds/dev_null_container_registry.go & + # build and push the image + make ${{ inputs.target }} + if: "${{ fromJson(inputs.github).event_name == 'pull_request' }}" + env: + IMAGE_TAG: "${{ github.sha }}" + IMAGE_REGISTRY: "localhost:5000/workbench-images" + CONTAINER_BUILD_CACHE_ARGS: "--cache-from ${{ env.CACHE }}" + + - run: df -h + if: "${{ !cancelled() }}" diff --git a/.github/workflows/build-notebooks-pr.yaml b/.github/workflows/build-notebooks-pr.yaml new file mode 100644 index 000000000..d04d90a6c --- /dev/null +++ b/.github/workflows/build-notebooks-pr.yaml @@ -0,0 +1,29 @@ +--- +"name": "Build Notebooks" +"permissions": + "packages": "read" +"on": + "pull_request": + +jobs: + gen: + name: Generate job matrix + runs-on: ubuntu-latest + outputs: + matrix: ${{ steps.gen.outputs.matrix }} + steps: + - uses: actions/checkout@v4 + - run: python3 ci/cached-builds/gen_gha_matrix_jobs.py + id: gen + + # base images + build: + needs: ["gen"] + strategy: + fail-fast: false + matrix: "${{ fromJson(needs.gen.outputs.matrix) }}" + uses: ./.github/workflows/build-notebooks-TEMPLATE.yaml + with: + target: "${{ matrix.target }}" + github: "${{ toJSON(github) }}" + secrets: inherit diff --git a/.github/workflows/build-notebooks.yaml b/.github/workflows/build-notebooks.yaml new file mode 100644 index 000000000..b76bd1fea --- /dev/null +++ b/.github/workflows/build-notebooks.yaml @@ -0,0 +1,468 @@ +--- +# This file is autogenerated by ci/cached-builds/gen_gha_matrix_jobs.py +{ + "name": "Build Notebooks", + "permissions": { + "packages": "write" + }, + "on": { + "push": {}, + "workflow_dispatch": {} + }, + "jobs": { + "base-ubi8-python-3_8": { + "needs": [], + "uses": "./.github/workflows/build-notebooks-TEMPLATE.yaml", + "with": { + "target": "base-ubi8-python-3.8", + "github": "${{ toJSON(github) }}" + }, + "secrets": "inherit" + }, + "jupyter-minimal-ubi8-python-3_8": { + "needs": [ + "base-ubi8-python-3_8" + ], + "uses": "./.github/workflows/build-notebooks-TEMPLATE.yaml", + "with": { + "target": "jupyter-minimal-ubi8-python-3.8", + "github": "${{ toJSON(github) }}" + }, + "secrets": "inherit" + }, + "jupyter-datascience-ubi8-python-3_8": { + "needs": [ + "jupyter-minimal-ubi8-python-3_8" + ], + "uses": "./.github/workflows/build-notebooks-TEMPLATE.yaml", + "with": { + "target": "jupyter-datascience-ubi8-python-3.8", + "github": "${{ toJSON(github) }}" + }, + "secrets": "inherit" + }, + "cuda-ubi8-python-3_8": { + "needs": [ + "base-ubi8-python-3_8" + ], + "uses": "./.github/workflows/build-notebooks-TEMPLATE.yaml", + "with": { + "target": "cuda-ubi8-python-3.8", + "github": "${{ toJSON(github) }}" + }, + "secrets": "inherit" + }, + "cuda-jupyter-minimal-ubi8-python-3_8": { + "needs": [ + "cuda-ubi8-python-3_8" + ], + "uses": "./.github/workflows/build-notebooks-TEMPLATE.yaml", + "with": { + "target": "cuda-jupyter-minimal-ubi8-python-3.8", + "github": "${{ toJSON(github) }}" + }, + "secrets": "inherit" + }, + "cuda-jupyter-datascience-ubi8-python-3_8": { + "needs": [ + "cuda-jupyter-minimal-ubi8-python-3_8" + ], + "uses": "./.github/workflows/build-notebooks-TEMPLATE.yaml", + "with": { + "target": "cuda-jupyter-datascience-ubi8-python-3.8", + "github": "${{ toJSON(github) }}" + }, + "secrets": "inherit" + }, + "jupyter-trustyai-ubi8-python-3_8": { + "needs": [ + "jupyter-datascience-ubi8-python-3_8" + ], + "uses": "./.github/workflows/build-notebooks-TEMPLATE.yaml", + "with": { + "target": "jupyter-trustyai-ubi8-python-3.8", + "github": "${{ toJSON(github) }}" + }, + "secrets": "inherit" + }, + "habana-jupyter-1_9_0-ubi8-python-3_8": { + "needs": [ + "jupyter-datascience-ubi8-python-3_8" + ], + "uses": "./.github/workflows/build-notebooks-TEMPLATE.yaml", + "with": { + "target": "habana-jupyter-1.9.0-ubi8-python-3.8", + "github": "${{ toJSON(github) }}" + }, + "secrets": "inherit" + }, + "habana-jupyter-1_10_0-ubi8-python-3_8": { + "needs": [ + "jupyter-datascience-ubi8-python-3_8" + ], + "uses": "./.github/workflows/build-notebooks-TEMPLATE.yaml", + "with": { + "target": "habana-jupyter-1.10.0-ubi8-python-3.8", + "github": "${{ toJSON(github) }}" + }, + "secrets": "inherit" + }, + "habana-jupyter-1_11_0-ubi8-python-3_8": { + "needs": [ + "jupyter-datascience-ubi8-python-3_8" + ], + "uses": "./.github/workflows/build-notebooks-TEMPLATE.yaml", + "with": { + "target": "habana-jupyter-1.11.0-ubi8-python-3.8", + "github": "${{ toJSON(github) }}" + }, + "secrets": "inherit" + }, + "habana-jupyter-1_13_0-ubi8-python-3_8": { + "needs": [ + "jupyter-datascience-ubi8-python-3_8" + ], + "uses": "./.github/workflows/build-notebooks-TEMPLATE.yaml", + "with": { + "target": "habana-jupyter-1.13.0-ubi8-python-3.8", + "github": "${{ toJSON(github) }}" + }, + "secrets": "inherit" + }, + "runtime-minimal-ubi8-python-3_8": { + "needs": [ + "base-ubi8-python-3_8" + ], + "uses": "./.github/workflows/build-notebooks-TEMPLATE.yaml", + "with": { + "target": "runtime-minimal-ubi8-python-3.8", + "github": "${{ toJSON(github) }}" + }, + "secrets": "inherit" + }, + "runtime-datascience-ubi8-python-3_8": { + "needs": [ + "base-ubi8-python-3_8" + ], + "uses": "./.github/workflows/build-notebooks-TEMPLATE.yaml", + "with": { + "target": "runtime-datascience-ubi8-python-3.8", + "github": "${{ toJSON(github) }}" + }, + "secrets": "inherit" + }, + "runtime-pytorch-ubi8-python-3_8": { + "needs": [ + "base-ubi8-python-3_8" + ], + "uses": "./.github/workflows/build-notebooks-TEMPLATE.yaml", + "with": { + "target": "runtime-pytorch-ubi8-python-3.8", + "github": "${{ toJSON(github) }}" + }, + "secrets": "inherit" + }, + "runtime-cuda-tensorflow-ubi8-python-3_8": { + "needs": [ + "cuda-ubi8-python-3_8" + ], + "uses": "./.github/workflows/build-notebooks-TEMPLATE.yaml", + "with": { + "target": "runtime-cuda-tensorflow-ubi8-python-3.8", + "github": "${{ toJSON(github) }}" + }, + "secrets": "inherit" + }, + "base-ubi9-python-3_9": { + "needs": [], + "uses": "./.github/workflows/build-notebooks-TEMPLATE.yaml", + "with": { + "target": "base-ubi9-python-3.9", + "github": "${{ toJSON(github) }}" + }, + "secrets": "inherit" + }, + "jupyter-minimal-ubi9-python-3_9": { + "needs": [ + "base-ubi9-python-3_9" + ], + "uses": "./.github/workflows/build-notebooks-TEMPLATE.yaml", + "with": { + "target": "jupyter-minimal-ubi9-python-3.9", + "github": "${{ toJSON(github) }}" + }, + "secrets": "inherit" + }, + "jupyter-datascience-ubi9-python-3_9": { + "needs": [ + "jupyter-minimal-ubi9-python-3_9" + ], + "uses": "./.github/workflows/build-notebooks-TEMPLATE.yaml", + "with": { + "target": "jupyter-datascience-ubi9-python-3.9", + "github": "${{ toJSON(github) }}" + }, + "secrets": "inherit" + }, + "cuda-ubi9-python-3_9": { + "needs": [ + "base-ubi9-python-3_9" + ], + "uses": "./.github/workflows/build-notebooks-TEMPLATE.yaml", + "with": { + "target": "cuda-ubi9-python-3.9", + "github": "${{ toJSON(github) }}" + }, + "secrets": "inherit" + }, + "cuda-jupyter-minimal-ubi9-python-3_9": { + "needs": [ + "cuda-ubi9-python-3_9" + ], + "uses": "./.github/workflows/build-notebooks-TEMPLATE.yaml", + "with": { + "target": "cuda-jupyter-minimal-ubi9-python-3.9", + "github": "${{ toJSON(github) }}" + }, + "secrets": "inherit" + }, + "cuda-jupyter-datascience-ubi9-python-3_9": { + "needs": [ + "cuda-jupyter-minimal-ubi9-python-3_9" + ], + "uses": "./.github/workflows/build-notebooks-TEMPLATE.yaml", + "with": { + "target": "cuda-jupyter-datascience-ubi9-python-3.9", + "github": "${{ toJSON(github) }}" + }, + "secrets": "inherit" + }, + "cuda-jupyter-tensorflow-ubi9-python-3_9": { + "needs": [ + "cuda-jupyter-datascience-ubi9-python-3_9" + ], + "uses": "./.github/workflows/build-notebooks-TEMPLATE.yaml", + "with": { + "target": "cuda-jupyter-tensorflow-ubi9-python-3.9", + "github": "${{ toJSON(github) }}" + }, + "secrets": "inherit" + }, + "jupyter-pytorch-ubi9-python-3_9": { + "needs": [ + "cuda-jupyter-datascience-ubi9-python-3_9" + ], + "uses": "./.github/workflows/build-notebooks-TEMPLATE.yaml", + "with": { + "target": "jupyter-pytorch-ubi9-python-3.9", + "github": "${{ toJSON(github) }}" + }, + "secrets": "inherit" + }, + "jupyter-trustyai-ubi9-python-3_9": { + "needs": [ + "jupyter-datascience-ubi9-python-3_9" + ], + "uses": "./.github/workflows/build-notebooks-TEMPLATE.yaml", + "with": { + "target": "jupyter-trustyai-ubi9-python-3.9", + "github": "${{ toJSON(github) }}" + }, + "secrets": "inherit" + }, + "runtime-minimal-ubi9-python-3_9": { + "needs": [ + "base-ubi9-python-3_9" + ], + "uses": "./.github/workflows/build-notebooks-TEMPLATE.yaml", + "with": { + "target": "runtime-minimal-ubi9-python-3.9", + "github": "${{ toJSON(github) }}" + }, + "secrets": "inherit" + }, + "runtime-datascience-ubi9-python-3_9": { + "needs": [ + "base-ubi9-python-3_9" + ], + "uses": "./.github/workflows/build-notebooks-TEMPLATE.yaml", + "with": { + "target": "runtime-datascience-ubi9-python-3.9", + "github": "${{ toJSON(github) }}" + }, + "secrets": "inherit" + }, + "runtime-pytorch-ubi9-python-3_9": { + "needs": [ + "base-ubi9-python-3_9" + ], + "uses": "./.github/workflows/build-notebooks-TEMPLATE.yaml", + "with": { + "target": "runtime-pytorch-ubi9-python-3.9", + "github": "${{ toJSON(github) }}" + }, + "secrets": "inherit" + }, + "runtime-cuda-tensorflow-ubi9-python-3_9": { + "needs": [ + "cuda-ubi9-python-3_9" + ], + "uses": "./.github/workflows/build-notebooks-TEMPLATE.yaml", + "with": { + "target": "runtime-cuda-tensorflow-ubi9-python-3.9", + "github": "${{ toJSON(github) }}" + }, + "secrets": "inherit" + }, + "codeserver-ubi9-python-3_9": { + "needs": [ + "base-ubi9-python-3_9" + ], + "uses": "./.github/workflows/build-notebooks-TEMPLATE.yaml", + "with": { + "target": "codeserver-ubi9-python-3.9", + "github": "${{ toJSON(github) }}" + }, + "secrets": "inherit" + }, + "intel-base-gpu-ubi9-python-3_9": { + "needs": [ + "base-ubi9-python-3_9" + ], + "uses": "./.github/workflows/build-notebooks-TEMPLATE.yaml", + "with": { + "target": "intel-base-gpu-ubi9-python-3.9", + "github": "${{ toJSON(github) }}" + }, + "secrets": "inherit" + }, + "intel-runtime-tensorflow-ubi9-python-3_9": { + "needs": [ + "intel-base-gpu-ubi9-python-3_9" + ], + "uses": "./.github/workflows/build-notebooks-TEMPLATE.yaml", + "with": { + "target": "intel-runtime-tensorflow-ubi9-python-3.9", + "github": "${{ toJSON(github) }}" + }, + "secrets": "inherit" + }, + "jupyter-intel-tensorflow-ubi9-python-3_9": { + "needs": [ + "intel-base-gpu-ubi9-python-3_9" + ], + "uses": "./.github/workflows/build-notebooks-TEMPLATE.yaml", + "with": { + "target": "jupyter-intel-tensorflow-ubi9-python-3.9", + "github": "${{ toJSON(github) }}" + }, + "secrets": "inherit" + }, + "intel-runtime-pytorch-ubi9-python-3_9": { + "needs": [ + "intel-base-gpu-ubi9-python-3_9" + ], + "uses": "./.github/workflows/build-notebooks-TEMPLATE.yaml", + "with": { + "target": "intel-runtime-pytorch-ubi9-python-3.9", + "github": "${{ toJSON(github) }}" + }, + "secrets": "inherit" + }, + "jupyter-intel-pytorch-ubi9-python-3_9": { + "needs": [ + "intel-base-gpu-ubi9-python-3_9" + ], + "uses": "./.github/workflows/build-notebooks-TEMPLATE.yaml", + "with": { + "target": "jupyter-intel-pytorch-ubi9-python-3.9", + "github": "${{ toJSON(github) }}" + }, + "secrets": "inherit" + }, + "intel-runtime-ml-ubi9-python-3_9": { + "needs": [ + "base-ubi9-python-3_9" + ], + "uses": "./.github/workflows/build-notebooks-TEMPLATE.yaml", + "with": { + "target": "intel-runtime-ml-ubi9-python-3.9", + "github": "${{ toJSON(github) }}" + }, + "secrets": "inherit" + }, + "jupyter-intel-ml-ubi9-python-3_9": { + "needs": [ + "base-ubi9-python-3_9" + ], + "uses": "./.github/workflows/build-notebooks-TEMPLATE.yaml", + "with": { + "target": "jupyter-intel-ml-ubi9-python-3.9", + "github": "${{ toJSON(github) }}" + }, + "secrets": "inherit" + }, + "base-c9s-python-3_9": { + "needs": [], + "uses": "./.github/workflows/build-notebooks-TEMPLATE.yaml", + "with": { + "target": "base-c9s-python-3.9", + "github": "${{ toJSON(github) }}" + }, + "secrets": "inherit" + }, + "cuda-c9s-python-3_9": { + "needs": [ + "base-c9s-python-3_9" + ], + "uses": "./.github/workflows/build-notebooks-TEMPLATE.yaml", + "with": { + "target": "cuda-c9s-python-3.9", + "github": "${{ toJSON(github) }}" + }, + "secrets": "inherit" + }, + "rstudio-c9s-python-3_9": { + "needs": [ + "base-c9s-python-3_9" + ], + "uses": "./.github/workflows/build-notebooks-TEMPLATE.yaml", + "with": { + "target": "rstudio-c9s-python-3.9", + "github": "${{ toJSON(github) }}" + }, + "secrets": "inherit" + }, + "cuda-rstudio-c9s-python-3_9": { + "needs": [ + "cuda-c9s-python-3_9" + ], + "uses": "./.github/workflows/build-notebooks-TEMPLATE.yaml", + "with": { + "target": "cuda-rstudio-c9s-python-3.9", + "github": "${{ toJSON(github) }}" + }, + "secrets": "inherit" + }, + "base-anaconda-python-3_8": { + "needs": [], + "uses": "./.github/workflows/build-notebooks-TEMPLATE.yaml", + "with": { + "target": "base-anaconda-python-3.8", + "github": "${{ toJSON(github) }}" + }, + "secrets": "inherit" + }, + "jupyter-datascience-anaconda-python-3_8": { + "needs": [ + "base-anaconda-python-3_8" + ], + "uses": "./.github/workflows/build-notebooks-TEMPLATE.yaml", + "with": { + "target": "jupyter-datascience-anaconda-python-3.8", + "github": "${{ toJSON(github) }}" + }, + "secrets": "inherit" + } + } +} diff --git a/.github/workflows/code-quality.yaml b/.github/workflows/code-quality.yaml index c3cc98cc0..e1008aec2 100644 --- a/.github/workflows/code-quality.yaml +++ b/.github/workflows/code-quality.yaml @@ -9,6 +9,27 @@ permissions: contents: read jobs: + check-generated-code: + runs-on: ubuntu-24.04 + steps: + - uses: actions/checkout@v4 + + - name: Rerun all code generators we have + run: python3 ci/cached-builds/gen_gha_matrix_jobs.py + + - name: Check there aren't any modified files present + run: | + if [[ $(git ls-files . -d -m -o --exclude-standard --full-name -v | tee modified.log | wc -l) -gt 0 ]]; then + echo "There are changed files" + exit 1 + fi + + - name: Print modified files + if: ${{ failure() }} + run: | + cat modified.log + git diff + code-static-analysis: runs-on: ubuntu-latest steps: diff --git a/Makefile b/Makefile index 297e73684..5ee75f7c3 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,7 @@ IMAGE_REGISTRY ?= quay.io/opendatahub/workbench-images RELEASE ?= 2024a +# additional user-specified caching parameters for $(CONTAINER_ENGINE) build +CONTAINER_BUILD_CACHE_ARGS ?= --no-cache # OS dependant: Generate date, select appropriate cmd to locate container engine ifeq ($(OS), Windows_NT) @@ -41,7 +43,7 @@ define build_image $(eval BUILD_ARGS := --build-arg BASE_IMAGE=$(BASE_IMAGE_NAME)), $(eval BUILD_ARGS :=) ) - $(CONTAINER_ENGINE) build --no-cache -t $(IMAGE_NAME) $(BUILD_ARGS) $(2) + $(CONTAINER_ENGINE) build $(CONTAINER_BUILD_CACHE_ARGS) -t $(IMAGE_NAME) $(BUILD_ARGS) $(2) endef # Push function for the notebok image: diff --git a/ci/cached-builds/containers.conf b/ci/cached-builds/containers.conf new file mode 100644 index 000000000..6f9a8c43e --- /dev/null +++ b/ci/cached-builds/containers.conf @@ -0,0 +1,22 @@ +# https://github.com/containers/common/blob/main/docs/containers.conf.5.md + +[containers] + +[engine] +# needed for reliability +retry=100 +# supposedly these images are faster to pull +compression_format="zstd:chunked" +compression_level=6 +# defaults to /var/tmp, which is small +image_copy_tmp_dir="storage" + +[machine] + +[network] +# workaround for missing pasta binary in linuxbrew +default_rootless_network_cmd="slirp4netns" + +[secrets] + +[configmaps] diff --git a/ci/cached-builds/dev_null_container_registry.go b/ci/cached-builds/dev_null_container_registry.go new file mode 100644 index 000000000..bb3049fc0 --- /dev/null +++ b/ci/cached-builds/dev_null_container_registry.go @@ -0,0 +1,14 @@ +package main + +import ( + "log" + "net/http" +) + +func main() { + http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { + log.Printf("%s %v", r.Method, r.URL) + }) + + log.Fatal(http.ListenAndServe(":5000", nil)) +} diff --git a/ci/cached-builds/gen_gha_matrix_jobs.py b/ci/cached-builds/gen_gha_matrix_jobs.py new file mode 100644 index 000000000..867577932 --- /dev/null +++ b/ci/cached-builds/gen_gha_matrix_jobs.py @@ -0,0 +1,161 @@ +import itertools +import json +import os +import pathlib +import re +import string +from typing import Iterable + +"""Trivial Makefile parser that extracts target dependencies so that we can build each Dockerfile image target in its +own GitHub Actions job and handle dependencies between them. + +The parsing is not able to handle general Makefiles, it only works with the Makefile in this project. +Use https://pypi.org/project/py-make/ or https://github.com/JetBrains/intellij-plugins/tree/master/makefile/grammars if you look for general parser.""" + +project_dir = pathlib.Path(__file__).parent.parent.parent.absolute() + + +def read_makefile_lines(lines: Iterable[str]) -> list[str]: + """Processes line continuations lines and line comments + Note that this does not handle escaped backslash and escaped hash, or hash inside literals, ...""" + output = [] + current = "" + for line in lines: + # remove comment + if (i := line.find("#")) != -1: + line = line[:i] + + # line continuation + if line.endswith("\\\n"): + current += line[:-2] + else: + current += line[:-1] + output.append(current) + current = "" + if current: + output.append(current) + return output + + +def extract_target_dependencies(lines: Iterable[str]) -> dict[str, list[str]]: + tree = {} + for line in lines: + # not a target + if line.startswith("\t"): + continue + # .PHONY targets and such + if line.startswith("."): + continue + + r = re.compile(r""" + ^ # match from beginning + ([-A-Za-z0-9.]+)\s*: # target name + (?:\s* # any number of spaces between dependent targets + ([-A-Za-z0-9.]+) # dependent target name(s) + )* # ... + \s*$ # any whitespace at the end of the line + """, re.VERBOSE) + if m := re.match(r, line): + target, *deps = m.groups() + if deps == [None]: + deps = [] + tree[target] = deps + return tree + + +def write_github_workflow_file(tree: dict[str, list[str]], path: pathlib.Path) -> None: + jobs = {} + + # IDs may only contain alphanumeric characters, '_', and '-'. IDs must start with a letter or '_' and must be less than 100 characters. + allowed_github_chars = string.ascii_letters + string.digits + "_-" + + for task, deps in tree.items(): + # in level 0, we only want base images, not other utility tasks + if not deps: + if not task.startswith("base-"): + continue + + # we won't build rhel-based images because they need subscription + if "rhel" in task: + continue + + task_name = re.sub(r"[^-_0-9A-Za-z]", "_", task) + deps_names = [re.sub(r"[^-_0-9A-Za-z]", "_", dep) for dep in deps] + jobs[task_name] = { + "needs": deps_names, + "uses": "./.github/workflows/build-notebooks-TEMPLATE.yaml", + "with": { + "target": task, + "github": "${{ toJSON(github) }}", + }, + "secrets": "inherit", + } + + workflow = { + "name": "Build Notebooks", + # https://docs.github.com/en/actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token + "permissions": { + "packages": "write", + }, + "on": { + "push": {}, + "workflow_dispatch": {}, + }, + "jobs": jobs, + } + + with open(path, "wt") as f: + print("---", file=f) + print("# This file is autogenerated by", pathlib.Path(__file__).relative_to(project_dir), file=f) + # every json file is a valid yaml file + json.dump(workflow, f, sort_keys=False, indent=4) + print(file=f) + + +def flatten(list_of_lists): + return list(itertools.chain.from_iterable(list_of_lists)) + +def compute_leafs_in_dependency_tree(tree: dict[str, list[str]]) -> list[str]: + key_set = set(tree.keys()) + value_set = set(flatten(tree.values())) + return [key for key in key_set if key not in value_set] + +def print_github_actions_pr_matrix(tree: dict[str, list[str]], leafs: list[str]) -> list[str]: + """Outputs GitHub matrix definition Json + """ + targets = [] + for leaf in leafs: + # in level 0, we only want base images, not other utility tasks + if not tree[leaf] and not leaf.startswith("base-"): + continue + + # we won't build rhel-based images because they need a subscription + if "rhel" in leaf: + continue + + targets.append(leaf) + + matrix = {"target": targets} + return [f"matrix={json.dumps(matrix, separators=(',', ':'))}"] + + +def main() -> None: + # https://www.gnu.org/software/make/manual/make.html#Reading-Makefiles + with open("Makefile", "rt") as makefile: + lines = read_makefile_lines(makefile) + tree = extract_target_dependencies(lines) + + write_github_workflow_file(tree, project_dir / ".github" / "workflows" / "build-notebooks.yaml") + + leafs = compute_leafs_in_dependency_tree(tree) + output = print_github_actions_pr_matrix(tree, leafs) + + print("leafs", leafs) + print(*output, sep="\n") + with open(os.environ["GITHUB_OUTPUT"], "at") as f: + for line in output: + print(line, file=f) + + +if __name__ == '__main__': + main() diff --git a/ci/cached-builds/insecure_localhost_registry.conf b/ci/cached-builds/insecure_localhost_registry.conf new file mode 100644 index 000000000..cddc459e8 --- /dev/null +++ b/ci/cached-builds/insecure_localhost_registry.conf @@ -0,0 +1,3 @@ +[[registry]] +location = "localhost:5000" +insecure = true diff --git a/ci/cached-builds/storage.conf b/ci/cached-builds/storage.conf new file mode 100644 index 000000000..24a181ec0 --- /dev/null +++ b/ci/cached-builds/storage.conf @@ -0,0 +1,11 @@ +# https://github.com/containers/storage/blob/main/docs/containers-storage.conf.5.md + +[storage] +driver="overlay" +rootless_storage_path="/mnt/containers" + +[storage.options] +# https://www.redhat.com/sysadmin/faster-container-image-pulls +pull_options = {enable_partial_images = "true", use_hard_links = "true", ostree_repos=""} + +[storage.options.overlay] From 8e17746917a0433b1bbb0c4e07f1bf325f71363d Mon Sep 17 00:00:00 2001 From: Jan Stourac Date: Fri, 19 Jul 2024 14:24:42 +0200 Subject: [PATCH 2/7] fixup --- .github/workflows/build-notebooks.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build-notebooks.yaml b/.github/workflows/build-notebooks.yaml index b76bd1fea..3b2b72655 100644 --- a/.github/workflows/build-notebooks.yaml +++ b/.github/workflows/build-notebooks.yaml @@ -349,7 +349,7 @@ }, "jupyter-intel-tensorflow-ubi9-python-3_9": { "needs": [ - "intel-base-gpu-ubi9-python-3_9" + "intel-runtime-tensorflow-ubi9-python-3_9" ], "uses": "./.github/workflows/build-notebooks-TEMPLATE.yaml", "with": { @@ -371,7 +371,7 @@ }, "jupyter-intel-pytorch-ubi9-python-3_9": { "needs": [ - "intel-base-gpu-ubi9-python-3_9" + "intel-runtime-pytorch-ubi9-python-3_9" ], "uses": "./.github/workflows/build-notebooks-TEMPLATE.yaml", "with": { @@ -393,7 +393,7 @@ }, "jupyter-intel-ml-ubi9-python-3_9": { "needs": [ - "base-ubi9-python-3_9" + "intel-runtime-ml-ubi9-python-3_9" ], "uses": "./.github/workflows/build-notebooks-TEMPLATE.yaml", "with": { From 8ffa41eedd4e34ac3fedf6bf774fa9d1d7fa2147 Mon Sep 17 00:00:00 2001 From: Jan Stourac Date: Sat, 15 Jun 2024 20:09:00 +0200 Subject: [PATCH 3/7] [CI] let's run params-env workflow also on push Let's run the params-env workflow that checks values in params.env and commit.env files also on push event and also on dispatch_workflow. (cherry picked from commit 23fb67c7c377c41522ae76fb34ecb2fa8a129155) --- .github/workflows/params-env.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/params-env.yaml b/.github/workflows/params-env.yaml index 1d4da9402..e6ac1674a 100644 --- a/.github/workflows/params-env.yaml +++ b/.github/workflows/params-env.yaml @@ -1,11 +1,13 @@ --- name: Validation of image references (image SHAs) in params.env and runtime images on: # yamllint disable-line rule:truthy + push: pull_request: paths: - 'manifests/base/commit.env' - 'manifests/base/params.env' - 'ci/check-params-env.sh' + workflow_dispatch: permissions: contents: read From 27e9c988de6c284f92bdb71b9c375987209df88e Mon Sep 17 00:00:00 2001 From: Jan Stourac Date: Sat, 15 Jun 2024 20:11:50 +0200 Subject: [PATCH 4/7] [CI] enhance the check-params-env.sh to also check uniqueness of values Up to now, it only checked that variables used in params.env file are unique. This change checks also that the images referenced are unique as we don't expect any of the given variables to hold the same reference. (cherry picked from commit 7983f1a802f5ec65f1407515c16ddb12cb1190c7) --- ci/check-params-env.sh | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/ci/check-params-env.sh b/ci/check-params-env.sh index 23f6d395e..3f020d992 100755 --- a/ci/check-params-env.sh +++ b/ci/check-params-env.sh @@ -31,6 +31,7 @@ EXPECTED_NUM_RECORDS=27 function check_variables_uniq() { local env_file_path="${1}" + local allow_value_duplicity="${2:=false}" local ret_code=0 echo "Checking that all variables in the file '${env_file_path}' are unique and expected" @@ -45,10 +46,31 @@ function check_variables_uniq() { num_uniq_records=$(echo "${content}" | uniq | wc -l) test "${num_records}" -eq "${num_uniq_records}" || { - echo "Some of the records in the file aren't unique!" + echo "Some of the variables in the file aren't unique!" ret_code=1 } + # ---- + if test "${allow_value_duplicity}" = "false"; then + echo "Checking that all values assigned to variables in the file '${env_file_path}' are unique and expected" + + content=$(sed 's#.*=\(.*\)#\1#' "${env_file_path}" | sort) + + local num_values + num_values=$(echo "${content}" | wc -l) + + local num_uniq_values + num_uniq_values=$(echo "${content}" | uniq | wc -l) + + test "${num_values}" -eq "${num_uniq_values}" || { + echo "Some of the values in the file aren't unique!" + ret_code=1 + } + fi + + # ---- + echo "Checking that there are expected number of records in the file '${env_file_path}'" + test "${num_records}" -eq "${EXPECTED_NUM_RECORDS}" || { echo "Number of records in the file is incorrect - expected '${EXPECTED_NUM_RECORDS}' but got '${num_records}'!" ret_code=1 @@ -314,13 +336,13 @@ ret_code=0 echo "Starting check of image references in files: '${COMMIT_ENV_PATH}' and '${PARAMS_ENV_PATH}'" echo "---------------------------------------------" -check_variables_uniq "${COMMIT_ENV_PATH}" || { +check_variables_uniq "${COMMIT_ENV_PATH}" "true" || { echo "ERROR: Variable names in the '${COMMIT_ENV_PATH}' file failed validation!" echo "----------------------------------------------------" ret_code=1 } -check_variables_uniq "${PARAMS_ENV_PATH}" || { +check_variables_uniq "${PARAMS_ENV_PATH}" "false" || { echo "ERROR: Variable names in the '${PARAMS_ENV_PATH}' file failed validation!" echo "----------------------------------------------------" ret_code=1 From c2382f752df6afa6693a92e748ac98cb7c2d75bf Mon Sep 17 00:00:00 2001 From: Jan Stourac Date: Sat, 15 Jun 2024 20:13:33 +0200 Subject: [PATCH 5/7] [CI] check-params-env.sh prints also time of creation of the checked image (cherry picked from commit 7ce69e6a1368f7992f2fde74434a4614f5326c99) --- ci/check-params-env.sh | 6 ++++++ ci/check-runtime-images.sh | 8 ++++++++ 2 files changed, 14 insertions(+) diff --git a/ci/check-params-env.sh b/ci/check-params-env.sh index 3f020d992..640ab2863 100755 --- a/ci/check-params-env.sh +++ b/ci/check-params-env.sh @@ -280,6 +280,7 @@ function check_image() { local image_name local image_commit_id local image_commitref + local image_created image_metadata="$(skopeo inspect --config "docker://${image_url}")" || { echo "Couldn't download image metadata with skopeo tool!" @@ -297,6 +298,10 @@ function check_image() { echo "Couldn't parse '.config.Labels."io.openshift.build.commit.ref"' from image metadata!" return 1 } + image_created=$(echo "${image_metadata}" | jq --raw-output '.created') || { + echo "Couldn't parse '.created' from image metadata!" + return 1 + } local config_env local build_name_raw @@ -321,6 +326,7 @@ function check_image() { } echo "Image name retrieved: '${image_name}'" + echo "Image created: '${image_created}'" check_image_variable_matches_name_and_commitref "${image_variable}" "${image_name}" "${image_commitref}" "${openshift_build_name}" || return 1 diff --git a/ci/check-runtime-images.sh b/ci/check-runtime-images.sh index 8908a9b6c..826ea2197 100755 --- a/ci/check-runtime-images.sh +++ b/ci/check-runtime-images.sh @@ -27,6 +27,7 @@ function check_image() { local img_tag local img_url local img_metadata + local img_created img_tag=$(jq -r '.metadata.tags[0]' "${runtime_image_file}") || { echo "ERROR: Couldn't parse image tags metadata for '${runtime_image_file}' runtime image file!" @@ -42,6 +43,11 @@ function check_image() { return 1 } + img_created=$(echo "${img_metadata}" | jq --raw-output '.created') || { + echo "Couldn't parse '.created' from image metadata!" + return 1 + } + local expected_string="runtime-${img_tag}-ubi" echo "Checking that '${expected_string}' is present in the image metadata" echo "${img_metadata}" | grep --quiet "${expected_string}" || { @@ -49,6 +55,8 @@ function check_image() { return 1 } + echo "Image created: '${img_created}'" + # TODO: we shall extend this check to check also Label "io.openshift.build.commit.ref" value (e.g. '2024a') or something similar } From 3f059f0ede2c2a6d1bf36da34043e316941a5b5d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jiri=20Dan=C4=9Bk?= Date: Wed, 26 Jun 2024 13:37:21 +0200 Subject: [PATCH 6/7] Limit PR checks to build only the modified images (#558) (cherry picked from commit 7bfbf321c73c2b4b7c6567541181099ee1d8a655) --- .github/workflows/build-notebooks-pr.yaml | 20 +++- Makefile | 1 + ci/cached-builds/gen_gha_matrix_jobs.py | 43 +++++++- ci/cached-builds/gha_pr_changed_files.py | 126 ++++++++++++++++++++++ 4 files changed, 185 insertions(+), 5 deletions(-) create mode 100644 ci/cached-builds/gha_pr_changed_files.py diff --git a/.github/workflows/build-notebooks-pr.yaml b/.github/workflows/build-notebooks-pr.yaml index d04d90a6c..fd93b545d 100644 --- a/.github/workflows/build-notebooks-pr.yaml +++ b/.github/workflows/build-notebooks-pr.yaml @@ -1,28 +1,40 @@ --- "name": "Build Notebooks" -"permissions": - "packages": "read" "on": "pull_request": +permissions: + contents: read + packages: read + pull-requests: read + jobs: gen: name: Generate job matrix runs-on: ubuntu-latest outputs: matrix: ${{ steps.gen.outputs.matrix }} + has_jobs: ${{ steps.gen.outputs.has_jobs }} steps: - uses: actions/checkout@v4 - - run: python3 ci/cached-builds/gen_gha_matrix_jobs.py + + - run: | + python3 ci/cached-builds/gen_gha_matrix_jobs.py \ + --owner=${{ github.repository_owner }} \ + --repo=${{ github.event.pull_request.base.repo.name }} \ + --pr-number=${{ github.event.pull_request.number }} \ + --skip-unchanged id: gen + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # base images build: needs: ["gen"] strategy: fail-fast: false matrix: "${{ fromJson(needs.gen.outputs.matrix) }}" uses: ./.github/workflows/build-notebooks-TEMPLATE.yaml + if: ${{ fromJson(needs.gen.outputs.has_jobs) }} with: target: "${{ matrix.target }}" github: "${{ toJSON(github) }}" diff --git a/Makefile b/Makefile index 5ee75f7c3..0eb3b8088 100644 --- a/Makefile +++ b/Makefile @@ -59,6 +59,7 @@ endef # ARG 2: Path of image context we want to build. # ARG 3: Base image tag name (optional). define image + $(info #*# Image build directory: <$(2)> #(MACHINE-PARSED LINE)#*#...) $(call build_image,$(1),$(2),$(3)) $(call push_image,$(1)) endef diff --git a/ci/cached-builds/gen_gha_matrix_jobs.py b/ci/cached-builds/gen_gha_matrix_jobs.py index 867577932..7a4746275 100644 --- a/ci/cached-builds/gen_gha_matrix_jobs.py +++ b/ci/cached-builds/gen_gha_matrix_jobs.py @@ -1,11 +1,17 @@ +import argparse import itertools import json +import logging import os import pathlib import re import string +import sys +import unittest from typing import Iterable +import gha_pr_changed_files + """Trivial Makefile parser that extracts target dependencies so that we can build each Dockerfile image target in its own GitHub Actions job and handle dependencies between them. @@ -115,11 +121,13 @@ def write_github_workflow_file(tree: dict[str, list[str]], path: pathlib.Path) - def flatten(list_of_lists): return list(itertools.chain.from_iterable(list_of_lists)) + def compute_leafs_in_dependency_tree(tree: dict[str, list[str]]) -> list[str]: key_set = set(tree.keys()) value_set = set(flatten(tree.values())) return [key for key in key_set if key not in value_set] + def print_github_actions_pr_matrix(tree: dict[str, list[str]], leafs: list[str]) -> list[str]: """Outputs GitHub matrix definition Json """ @@ -136,10 +144,24 @@ def print_github_actions_pr_matrix(tree: dict[str, list[str]], leafs: list[str]) targets.append(leaf) matrix = {"target": targets} - return [f"matrix={json.dumps(matrix, separators=(',', ':'))}"] + return [f"matrix={json.dumps(matrix, separators=(',', ':'))}", + f"has_jobs={json.dumps(len(leafs) > 0, separators=(',', ':'))}"] def main() -> None: + logging.basicConfig(level=logging.DEBUG, stream=sys.stderr) + + argparser = argparse.ArgumentParser() + argparser.add_argument("--owner", type=str, required=False, + help="GitHub repo owner/org (for the --skip-unchanged feature)") + argparser.add_argument("--repo", type=str, required=False, + help="GitHub repo name (for the --skip-unchanged feature)") + argparser.add_argument("--pr-number", type=int, required=False, + help="PR number under owner/repo (for the --skip-unchanged feature)") + argparser.add_argument("--skip-unchanged", type=bool, required=False, default=False, + action=argparse.BooleanOptionalAction) + args = argparser.parse_args() + # https://www.gnu.org/software/make/manual/make.html#Reading-Makefiles with open("Makefile", "rt") as makefile: lines = read_makefile_lines(makefile) @@ -148,6 +170,10 @@ def main() -> None: write_github_workflow_file(tree, project_dir / ".github" / "workflows" / "build-notebooks.yaml") leafs = compute_leafs_in_dependency_tree(tree) + if args.skip_unchanged: + logging.info(f"Skipping targets not modified in PR #{args.pr_number}") + changed_files = gha_pr_changed_files.list_changed_files(args.owner, args.repo, args.pr_number) + leafs = gha_pr_changed_files.filter_out_unchanged(leafs, changed_files) output = print_github_actions_pr_matrix(tree, leafs) print("leafs", leafs) @@ -159,3 +185,18 @@ def main() -> None: if __name__ == '__main__': main() + + +class SelfTests(unittest.TestCase): + def test_select_changed_targets(self): + with open(project_dir / "Makefile", "rt") as makefile: + lines = read_makefile_lines(makefile) + tree = extract_target_dependencies(lines) + leafs = compute_leafs_in_dependency_tree(tree) + + changed_files = ["jupyter/datascience/ubi9-python-3.9/Dockerfile"] + + leafs = gha_pr_changed_files.filter_out_unchanged(leafs, changed_files) + assert set(leafs) == {'cuda-jupyter-tensorflow-ubi9-python-3.9', + 'jupyter-trustyai-ubi9-python-3.9', + 'jupyter-pytorch-ubi9-python-3.9'} diff --git a/ci/cached-builds/gha_pr_changed_files.py b/ci/cached-builds/gha_pr_changed_files.py new file mode 100644 index 000000000..1f3ab56e1 --- /dev/null +++ b/ci/cached-builds/gha_pr_changed_files.py @@ -0,0 +1,126 @@ +import json +import logging +import os +import pathlib +import re +import subprocess +import unittest +import urllib.request + +PROJECT_ROOT = pathlib.Path(__file__).parent.parent.parent.resolve() + + +def get_github_token() -> str: + github_token = os.environ['GITHUB_TOKEN'] + return github_token + + +# https://docs.github.com/en/graphql/guides/forming-calls-with-graphql +def compose_gh_api_request(pull_number: int, owner="opendatahub-io", repo="notebooks", per_page=100, + cursor="") -> urllib.request.Request: + github_token = get_github_token() + + return urllib.request.Request( + url="https://api.github.com/graphql", + method="POST", + headers={ + "Authorization": f"bearer {github_token}", + }, + # https://docs.github.com/en/graphql/guides/using-the-explorer + data=json.dumps({"query": f""" +{{ + repository(owner:"{owner}", name:"{repo}") {{ + pullRequest(number:{pull_number}) {{ + files(first:{per_page}, after:"{cursor}") {{ + edges {{ + node {{ + path + }} + cursor + }} + }} + }} + }} +}} + """}).encode("utf-8"), + ) + + +def list_changed_files(owner: str, repo: str, pr_number: int, per_page=100) -> list[str]: + files = [] + + logging.debug("Getting list of changed files from GitHub API") + + CURSOR = "" + while CURSOR is not None: + request = compose_gh_api_request(pull_number=pr_number, owner=owner, repo=repo, per_page=per_page, + cursor=CURSOR) + response = urllib.request.urlopen(request) + data = json.loads(response.read().decode("utf-8")) + response.close() + edges = data["data"]["repository"]["pullRequest"]["files"]["edges"] + + CURSOR = None + for edge in edges: + files.append(edge["node"]["path"]) + CURSOR = edge["cursor"] + + logging.debug(f"Determined {len(files)} changed files: {files[:5]} (..., printing up to 5)") + return files + + +def analyze_build_directories(make_target) -> list[str]: + directories = [] + + pattern = re.compile(r"#\*# Image build directory: <(?P[^>]+)> #\(MACHINE-PARSED LINE\)#\*#\.\.\.") + try: + logging.debug(f"Running make in --just-print mode for target {make_target}") + for line in subprocess.check_output(["make", make_target, "--just-print"], encoding="utf-8", + cwd=PROJECT_ROOT).splitlines(): + if m := pattern.match(line): + directories.append(m["dir"]) + except subprocess.CalledProcessError as e: + print(e.stderr, e.stdout) + raise + + logging.debug(f"Target {make_target} depends on files in directories {directories}") + return directories + + +def should_build_target(changed_files: list[str], target_directories: list[str]) -> str: + """Returns truthy if there is at least one changed file necessitating a build. + Falsy (empty) string is returned otherwise.""" + for directory in target_directories: + for changed_file in changed_files: + if changed_file.startswith(directory): + return changed_file + return "" + + +def filter_out_unchanged(targets: list[str], changed_files: list[str]) -> list[str]: + changed = [] + for target in targets: + target_directories = analyze_build_directories(target) + if reason := should_build_target(changed_files, target_directories): + logging.info(f"✅ Will build {target} because file {reason} has been changed") + changed.append(target) + else: + logging.info(f"❌ Won't build {target}") + return changed + + +class SelfTests(unittest.TestCase): + def test_compose_gh_api_request__call_without_asserting(self): + request = compose_gh_api_request(pull_number=556, per_page=100, cursor="") + print(request.data) + + def test_list_changed_files__pagination_works(self): + changed_files = list_changed_files(owner="opendatahub-io", repo="notebooks", pr_number=556, per_page=1) + assert set(changed_files) == {'codeserver/ubi9-python-3.9/Dockerfile', + 'codeserver/ubi9-python-3.9/run-code-server.sh'} + + def test_analyze_build_directories(self): + directories = analyze_build_directories("jupyter-intel-pytorch-ubi9-python-3.9") + assert set(directories) == {"base/ubi9-python-3.9", + "intel/base/gpu/ubi9-python-3.9", + "jupyter/intel/pytorch/ubi9-python-3.9"} From 2ee66e3895e2cc91128fc73da59d9d6c0314dd43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jiri=20Dan=C4=9Bk?= Date: Wed, 26 Jun 2024 16:47:14 +0200 Subject: [PATCH 7/7] ci: increase available disk space for GHA container image builds (#577) * ci: increase available disk space for GHA container image builds This PR creates a LVM overlay, increasing the available disk space from previous 66GB to 82GB by default, and 106GB when building any amd/cude/pytorch/tensorflow image. * fixup from review, add intel (cherry picked from commit 088ab3ce88cb1efd9e6842f5345f928f5e4a46ab) --- .../workflows/build-notebooks-TEMPLATE.yaml | 38 +++++++++- ci/cached-builds/gha_lvm_overlay.bash | 74 +++++++++++++++++++ ci/cached-builds/storage.conf | 3 +- 3 files changed, 109 insertions(+), 6 deletions(-) create mode 100755 ci/cached-builds/gha_lvm_overlay.bash diff --git a/.github/workflows/build-notebooks-TEMPLATE.yaml b/.github/workflows/build-notebooks-TEMPLATE.yaml index 808e428d0..c083b3ad0 100644 --- a/.github/workflows/build-notebooks-TEMPLATE.yaml +++ b/.github/workflows/build-notebooks-TEMPLATE.yaml @@ -34,6 +34,38 @@ jobs: username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} + - name: Free up additional disk space + # https://docs.github.com/en/actions/learn-github-actions/expressions + if: "${{ contains(inputs.target, 'amd') || contains(inputs.target, 'cuda') || contains(inputs.target, 'intel') || + contains(inputs.target, 'pytorch') || contains(inputs.target, 'tensorflow') }}" + run: | + set -x + + df -h + + sudo rm -rf /usr/local/lib/android & + sudo rm -rf /usr/local/share/boost & + sudo rm -rf /usr/local/lib/node_modules & + sudo rm -rf /usr/share/dotnet & + sudo rm -rf /opt/ghc & + sudo rm -rf /opt/hostedtoolcache/CodeQL & + + sudo docker image prune --all --force & + + wait + + df -h + + - name: Mount lvm overlay for podman builds + run: | + df -h + free -h + + bash ./ci/cached-builds/gha_lvm_overlay.bash + + df -h + free -h + # https://github.com/containers/buildah/issues/2521#issuecomment-884779112 - name: Workaround https://github.com/containers/podman/issues/22152#issuecomment-2027705598 run: sudo apt-get -qq remove podman crun @@ -58,12 +90,10 @@ jobs: mkdir -p $HOME/.config/containers/ cp ci/cached-builds/containers.conf $HOME/.config/containers/containers.conf cp ci/cached-builds/storage.conf $HOME/.config/containers/storage.conf + # should at least reset storage when touching storage.conf - sudo mkdir -p /mnt/containers/ - sudo chown -R $USER:$USER /mnt/containers podman system reset --force - # podman bug? need to create this _after_ doing the reset - mkdir -p /mnt/containers/tmp + mkdir -p $HOME/.local/share/containers/storage/tmp # https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#push - name: "push: make ${{ inputs.target }}" diff --git a/ci/cached-builds/gha_lvm_overlay.bash b/ci/cached-builds/gha_lvm_overlay.bash new file mode 100755 index 000000000..f890e80ba --- /dev/null +++ b/ci/cached-builds/gha_lvm_overlay.bash @@ -0,0 +1,74 @@ +/usr/bin/env bash +set -Eeuo pipefail + +# GitHub Actions runners have two disks, /dev/root and /dev/sda1. +# We would like to be able to combine available disk space on both and use it for podman container builds. +# +# This script creates file-backed volumes on /dev/root and /dev/sda1, then creates ext4 over both, and mounts it for our use +# https://github.com/easimon/maximize-build-space/blob/master/action.yml + +root_reserve_mb=2048 +temp_reserve_mb=100 +swap_size_mb=4096 + +build_mount_path="${HOME}/.local/share/containers" +build_mount_path_ownership="runner:runner" + +pv_loop_path=/pv.img +tmp_pv_loop_path=/mnt/tmp-pv.img +overprovision_lvm=false + +VG_NAME=buildvg + +# github runners have an active swap file in /mnt/swapfile +# we want to reuse the temp disk, so first unmount swap and clean the temp disk +echo "Unmounting and removing swap file." +sudo swapoff -a +sudo rm -f /mnt/swapfile + +echo "Creating LVM Volume." +echo " Creating LVM PV on root fs." +# create loop pv image on root fs +ROOT_RESERVE_KB=$(expr ${root_reserve_mb} \* 1024) +ROOT_FREE_KB=$(df --block-size=1024 --output=avail / | tail -1) +ROOT_LVM_SIZE_KB=$(expr $ROOT_FREE_KB - $ROOT_RESERVE_KB) +ROOT_LVM_SIZE_BYTES=$(expr $ROOT_LVM_SIZE_KB \* 1024) +sudo touch "${pv_loop_path}" && sudo fallocate -z -l "${ROOT_LVM_SIZE_BYTES}" "${pv_loop_path}" +export ROOT_LOOP_DEV=$(sudo losetup --find --show "${pv_loop_path}") +sudo pvcreate -f "${ROOT_LOOP_DEV}" + +# create pv on temp disk +echo " Creating LVM PV on temp fs." +TMP_RESERVE_KB=$(expr ${temp_reserve_mb} \* 1024) +TMP_FREE_KB=$(df --block-size=1024 --output=avail /mnt | tail -1) +TMP_LVM_SIZE_KB=$(expr $TMP_FREE_KB - $TMP_RESERVE_KB) +TMP_LVM_SIZE_BYTES=$(expr $TMP_LVM_SIZE_KB \* 1024) +sudo touch "${tmp_pv_loop_path}" && sudo fallocate -z -l "${TMP_LVM_SIZE_BYTES}" "${tmp_pv_loop_path}" +export TMP_LOOP_DEV=$(sudo losetup --find --show "${tmp_pv_loop_path}") +sudo pvcreate -f "${TMP_LOOP_DEV}" + +# create volume group from these pvs +sudo vgcreate "${VG_NAME}" "${TMP_LOOP_DEV}" "${ROOT_LOOP_DEV}" + +echo "Recreating swap" +# create and activate swap +sudo lvcreate -L "${swap_size_mb}M" -n swap "${VG_NAME}" +sudo mkswap "/dev/mapper/${VG_NAME}-swap" +sudo swapon "/dev/mapper/${VG_NAME}-swap" + +echo "Creating build volume" +# create and mount build volume +sudo lvcreate --type raid0 --stripes 2 --stripesize 4 --alloc anywhere --extents 100%FREE --name buildlv "${VG_NAME}" +if [[ ${overprovision_lvm} == 'true' ]]; then + sudo mkfs.ext4 -m0 "/dev/mapper/${VG_NAME}-buildlv" +else + sudo mkfs.ext4 -Enodiscard -m0 "/dev/mapper/${VG_NAME}-buildlv" +fi +sudo mount "/dev/mapper/${VG_NAME}-buildlv" "${build_mount_path}" +sudo chown -R "${build_mount_path_ownership}" "${build_mount_path}" + +# if build mount path is a parent of $GITHUB_WORKSPACE, and has been deleted, recreate it +if [[ ! -d "${GITHUB_WORKSPACE}" ]]; then + sudo mkdir -p "${GITHUB_WORKSPACE}" + sudo chown -R "${WORKSPACE_OWNER}" "${GITHUB_WORKSPACE}" +fi diff --git a/ci/cached-builds/storage.conf b/ci/cached-builds/storage.conf index 24a181ec0..eb9bba42a 100644 --- a/ci/cached-builds/storage.conf +++ b/ci/cached-builds/storage.conf @@ -2,10 +2,9 @@ [storage] driver="overlay" -rootless_storage_path="/mnt/containers" [storage.options] # https://www.redhat.com/sysadmin/faster-container-image-pulls -pull_options = {enable_partial_images = "true", use_hard_links = "true", ostree_repos=""} +pull_options = {enable_partial_images = "true", use_hard_links = "false", ostree_repos=""} [storage.options.overlay]