Add GitHub Action for XLA CPU CI tests to test new self-hosted runners

PiperOrigin-RevId: 719340680
openxla · Jan 27, 2025 · 9bb2b57 · 9bb2b57
1 parent d3236dd
commit 9bb2b57
Show file tree

Hide file tree

Showing 3 changed files with 64 additions and 12 deletions.
diff --git a/.github/workflows/xla_cpu_ci.yml b/.github/workflows/xla_cpu_ci.yml
@@ -0,0 +1,40 @@
+# Copyright 2025 The OpenXLA Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+name: XLA CPU Self-hosted
+permissions:
+  contents: read
+on:
+  pull_request:
+  push:
+    branches:
+      - main
+
+# TODO(ddunleavy): refactor build.py to not depend on this env var
+env:
+  KOKORO_JOB_NAME: "tensorflow/xla/linux/cpu/test_self_hosted"
+
+jobs:
+  xla-cpu-tests:
+    runs-on: "linux-x86-n2-16"
+    container: "us-central1-docker.pkg.dev/tensorflow-sigs/tensorflow/ml-build:latest"
+    defaults:
+      run:
+        shell: bash
+    timeout-minutes: 30
+    steps:
+      - name: "Checking out repository"
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+      - name: "Run build.py"
+        run: build_tools/ci/build.py
diff --git a/build_tools/ci/build.py b/build_tools/ci/build.py
@@ -88,6 +88,7 @@ def _write_to_sponge_config(key, value) -> None:
 class BuildType(enum.Enum):
   """Enum representing all types of builds."""
   CPU_X86 = enum.auto()
+  CPU_X86_SELF_HOSTED = enum.auto()
   CPU_ARM64 = enum.auto()
   GPU = enum.auto()
   GPU_CONTINUOUS = enum.auto()
@@ -157,10 +158,12 @@ def docker_run_command(self, *, command: str, **kwargs: Any) -> List[str]:
   def commands(self) -> List[List[str]]:
     """Returns list of commands for a build."""
     cmds = []
-    cmds.append([
-        f"{_KOKORO_ARTIFACTS_DIR}/github/xla/.kokoro/generate_index_html.sh",
-        "index.html",
-    ])
+
+    if "self_hosted" in self.type_.name.lower():
+      cmds.append([
+          f"{_KOKORO_ARTIFACTS_DIR}/github/xla/.kokoro/generate_index_html.sh",
+          "index.html",
+      ])
     if self.repo != "openxla/xla":
       _, repo_name = self.repo.split("/")
 
@@ -292,6 +295,16 @@ def nvidia_gpu_build_with_compute_capability(
     test_tag_filters=cpu_x86_tag_filter,
     options=_DEFAULT_BAZEL_OPTIONS,
 )
+_SELF_HOSTED_GHA_BUILD = Build(
+    type_=BuildType.CPU_X86_SELF_HOSTED,
+    repo="openxla/xla",
+    image_url=None,
+    configs=("warnings", "nonccl", "rbe_linux_cpu"),
+    target_patterns=_XLA_DEFAULT_TARGET_PATTERNS,
+    build_tag_filters=cpu_x86_tag_filter,
+    test_tag_filters=cpu_x86_tag_filter,
+    options=_DEFAULT_BAZEL_OPTIONS,
+)
 
 cpu_arm_tag_filter = (
     "-no_oss",
@@ -454,6 +467,7 @@ def nvidia_gpu_build_with_compute_capability(
 _KOKORO_JOB_NAME_TO_BUILD_MAP = {
     "tensorflow/xla/linux/arm64/build_cpu": _CPU_ARM64_BUILD,
     "tensorflow/xla/linux/cpu/build_cpu": _CPU_X86_BUILD,
+    "tensorflow/xla/linux/cpu/test_self_hosted": _SELF_HOSTED_GHA_BUILD,
     "tensorflow/xla/linux/gpu/build_gpu": _GPU_BUILD,
     "tensorflow/xla/linux/github_continuous/arm64/build_cpu": _CPU_ARM64_BUILD,
     "tensorflow/xla/linux/github_continuous/build_gpu": _GPU_BUILD,

diff --git a/build_tools/ci/golden_commands.txt b/build_tools/ci/golden_commands.txt
@@ -1,5 +1,4 @@
 # BEGIN BuildType.CPU_ARM64
-$KOKORO_ARTIFACTS_DIR/github/xla/.kokoro/generate_index_html.sh index.html
 docker pull us-central1-docker.pkg.dev/tensorflow-sigs/tensorflow/ml-build-arm64:latest
 docker run --detach --name=xla_ci --rm --interactive --tty --volume=./github:/github --workdir=/github/xla us-central1-docker.pkg.dev/tensorflow-sigs/tensorflow/ml-build-arm64:latest bash
 docker exec xla_ci parallel --ungroup --retries 3 --delay 15 --nonall -- bazel build --build_tag_filters=-no_oss,-gpu,-requires-gpu-nvidia,-requires-gpu-amd,-not_run:arm --test_tag_filters=-no_oss,-gpu,-requires-gpu-nvidia,-requires-gpu-amd,-not_run:arm --config=warnings --config=rbe_cross_compile_linux_arm64 --config=nonccl --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async --build_tests_only --nobuild -- //xla/... //build_tools/... @tsl//tsl/...
@@ -8,16 +7,20 @@ docker exec xla_ci bazel analyze-profile profile.json.gz
 docker stop xla_ci
 # END BuildType.CPU_ARM64
 # BEGIN BuildType.CPU_X86
-$KOKORO_ARTIFACTS_DIR/github/xla/.kokoro/generate_index_html.sh index.html
 parallel --ungroup --retries 3 --delay 15 --nonall -- docker pull us-central1-docker.pkg.dev/tensorflow-sigs/tensorflow/ml-build:latest
 docker run --detach --name=xla_ci --rm --interactive --tty --volume=./github:/github --workdir=/github/xla us-central1-docker.pkg.dev/tensorflow-sigs/tensorflow/ml-build:latest bash
 docker exec xla_ci parallel --ungroup --retries 3 --delay 15 --nonall -- bazel build --build_tag_filters=-no_oss,-gpu,-requires-gpu-nvidia,-requires-gpu-amd --test_tag_filters=-no_oss,-gpu,-requires-gpu-nvidia,-requires-gpu-amd --config=warnings --config=nonccl --config=rbe_linux_cpu --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async --nobuild -- //xla/... //build_tools/... @tsl//tsl/...
 docker exec xla_ci bazel test --build_tag_filters=-no_oss,-gpu,-requires-gpu-nvidia,-requires-gpu-amd --test_tag_filters=-no_oss,-gpu,-requires-gpu-nvidia,-requires-gpu-amd --config=warnings --config=nonccl --config=rbe_linux_cpu --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async -- //xla/... //build_tools/... @tsl//tsl/...
 docker exec xla_ci bazel analyze-profile profile.json.gz
 docker stop xla_ci
 # END BuildType.CPU_X86
-# BEGIN BuildType.GPU
+# BEGIN BuildType.CPU_X86_SELF_HOSTED
 $KOKORO_ARTIFACTS_DIR/github/xla/.kokoro/generate_index_html.sh index.html
+parallel --ungroup --retries 3 --delay 15 --nonall -- bazel build --build_tag_filters=-no_oss,-gpu,-requires-gpu-nvidia,-requires-gpu-amd --test_tag_filters=-no_oss,-gpu,-requires-gpu-nvidia,-requires-gpu-amd --config=warnings --config=nonccl --config=rbe_linux_cpu --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async --nobuild -- //xla/... //build_tools/... @tsl//tsl/...
+bazel test --build_tag_filters=-no_oss,-gpu,-requires-gpu-nvidia,-requires-gpu-amd --test_tag_filters=-no_oss,-gpu,-requires-gpu-nvidia,-requires-gpu-amd --config=warnings --config=nonccl --config=rbe_linux_cpu --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async -- //xla/... //build_tools/... @tsl//tsl/...
+bazel analyze-profile profile.json.gz
+# END BuildType.CPU_X86_SELF_HOSTED
+# BEGIN BuildType.GPU
 nvidia-smi
 parallel --ungroup --retries 3 --delay 15 --nonall -- docker pull us-central1-docker.pkg.dev/tensorflow-sigs/tensorflow/ml-build:latest
 docker run --detach --name=xla_ci --rm --interactive --tty --volume=./github:/github --workdir=/github/xla us-central1-docker.pkg.dev/tensorflow-sigs/tensorflow/ml-build:latest bash
@@ -27,7 +30,6 @@ docker exec xla_ci bazel analyze-profile profile.json.gz
 docker stop xla_ci
 # END BuildType.GPU
 # BEGIN BuildType.JAX_CPU
-$KOKORO_ARTIFACTS_DIR/github/xla/.kokoro/generate_index_html.sh index.html
 git clone --depth=1 https://github.com/google/jax ./github/jax
 parallel --ungroup --retries 3 --delay 15 --nonall -- docker pull gcr.io/tensorflow-sigs/build:latest-python3.11
 docker run --detach --name=xla_ci --rm --interactive --tty --volume=./github:/github --workdir=/github/jax gcr.io/tensorflow-sigs/build:latest-python3.11 bash
@@ -37,7 +39,6 @@ docker exec xla_ci bazel analyze-profile profile.json.gz
 docker stop xla_ci
 # END BuildType.JAX_CPU
 # BEGIN BuildType.JAX_GPU
-$KOKORO_ARTIFACTS_DIR/github/xla/.kokoro/generate_index_html.sh index.html
 git clone --depth=1 https://github.com/google/jax ./github/jax
 parallel --ungroup --retries 3 --delay 15 --nonall -- docker pull gcr.io/tensorflow-sigs/build:latest-python3.11
 docker run --detach --name=xla_ci --rm --interactive --tty --volume=./github:/github --workdir=/github/jax gcr.io/tensorflow-sigs/build:latest-python3.11 bash
@@ -47,7 +48,6 @@ docker exec xla_ci bazel analyze-profile profile.json.gz
 docker stop xla_ci
 # END BuildType.JAX_GPU
 # BEGIN BuildType.MACOS_CPU_X86
-$KOKORO_ARTIFACTS_DIR/github/xla/.kokoro/generate_index_html.sh index.html
 sudo wget --no-verbose -O /usr/local/bin/bazel https://github.com/bazelbuild/bazelisk/releases/download/v1.11.0/bazelisk-darwin-amd64
 chmod +x /usr/local/bin/bazel
 bazel --version
@@ -56,7 +56,6 @@ bazel test --build_tag_filters=-no_oss,-gpu,-no_mac,-mac_excluded,-requires-gpu-
 bazel analyze-profile profile.json.gz
 # END BuildType.MACOS_CPU_X86
 # BEGIN BuildType.TENSORFLOW_CPU
-$KOKORO_ARTIFACTS_DIR/github/xla/.kokoro/generate_index_html.sh index.html
 git clone --depth=1 https://github.com/tensorflow/tensorflow ./github/tensorflow
 parallel --ungroup --retries 3 --delay 15 --nonall -- docker pull us-central1-docker.pkg.dev/tensorflow-sigs/tensorflow/ml-build:latest
 docker run --detach --name=xla_ci --rm --interactive --tty --volume=./github:/github --workdir=/github/tensorflow us-central1-docker.pkg.dev/tensorflow-sigs/tensorflow/ml-build:latest bash
@@ -65,7 +64,6 @@ docker exec xla_ci bazel analyze-profile profile.json.gz
 docker stop xla_ci
 # END BuildType.TENSORFLOW_CPU
 # BEGIN BuildType.TENSORFLOW_GPU
-$KOKORO_ARTIFACTS_DIR/github/xla/.kokoro/generate_index_html.sh index.html
 git clone --depth=1 https://github.com/tensorflow/tensorflow ./github/tensorflow
 parallel --ungroup --retries 3 --delay 15 --nonall -- docker pull us-central1-docker.pkg.dev/tensorflow-sigs/tensorflow/ml-build:latest
 docker run --detach --name=xla_ci --rm --interactive --tty --volume=./github:/github --workdir=/github/tensorflow us-central1-docker.pkg.dev/tensorflow-sigs/tensorflow/ml-build:latest bash